xref: /aosp_15_r20/external/clang/test/CodeGen/aarch64-neon-2velem.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
2*67e74705SXin Li 
3*67e74705SXin Li // Test new aarch64 intrinsics and types
4*67e74705SXin Li 
5*67e74705SXin Li #include <arm_neon.h>
6*67e74705SXin Li 
7*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
8*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
10*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
11*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t v)12*67e74705SXin Li int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
13*67e74705SXin Li   return vmla_lane_s16(a, b, v, 3);
14*67e74705SXin Li }
15*67e74705SXin Li 
16*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
17*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
18*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
19*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
20*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t v)21*67e74705SXin Li int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
22*67e74705SXin Li   return vmlaq_lane_s16(a, b, v, 3);
23*67e74705SXin Li }
24*67e74705SXin Li 
25*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
26*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
27*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
28*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
29*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t v)30*67e74705SXin Li int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
31*67e74705SXin Li   return vmla_lane_s32(a, b, v, 1);
32*67e74705SXin Li }
33*67e74705SXin Li 
34*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
35*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
36*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
37*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
38*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t v)39*67e74705SXin Li int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
40*67e74705SXin Li   return vmlaq_lane_s32(a, b, v, 1);
41*67e74705SXin Li }
42*67e74705SXin Li 
43*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
44*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
45*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
46*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
47*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)48*67e74705SXin Li int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
49*67e74705SXin Li   return vmla_laneq_s16(a, b, v, 7);
50*67e74705SXin Li }
51*67e74705SXin Li 
52*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
53*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
54*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
55*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
56*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)57*67e74705SXin Li int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
58*67e74705SXin Li   return vmlaq_laneq_s16(a, b, v, 7);
59*67e74705SXin Li }
60*67e74705SXin Li 
61*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
62*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
63*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
64*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
65*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)66*67e74705SXin Li int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
67*67e74705SXin Li   return vmla_laneq_s32(a, b, v, 3);
68*67e74705SXin Li }
69*67e74705SXin Li 
70*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
71*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
72*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
73*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
74*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)75*67e74705SXin Li int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
76*67e74705SXin Li   return vmlaq_laneq_s32(a, b, v, 3);
77*67e74705SXin Li }
78*67e74705SXin Li 
79*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
80*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
81*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
82*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
83*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t v)84*67e74705SXin Li int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
85*67e74705SXin Li   return vmls_lane_s16(a, b, v, 3);
86*67e74705SXin Li }
87*67e74705SXin Li 
88*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
89*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
90*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
91*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
92*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t v)93*67e74705SXin Li int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
94*67e74705SXin Li   return vmlsq_lane_s16(a, b, v, 3);
95*67e74705SXin Li }
96*67e74705SXin Li 
97*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
98*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
99*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
100*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
101*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t v)102*67e74705SXin Li int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
103*67e74705SXin Li   return vmls_lane_s32(a, b, v, 1);
104*67e74705SXin Li }
105*67e74705SXin Li 
106*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
107*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
108*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
109*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
110*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t v)111*67e74705SXin Li int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
112*67e74705SXin Li   return vmlsq_lane_s32(a, b, v, 1);
113*67e74705SXin Li }
114*67e74705SXin Li 
115*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
116*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
117*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
118*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
119*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v)120*67e74705SXin Li int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
121*67e74705SXin Li   return vmls_laneq_s16(a, b, v, 7);
122*67e74705SXin Li }
123*67e74705SXin Li 
124*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
125*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
126*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
127*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
128*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v)129*67e74705SXin Li int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
130*67e74705SXin Li   return vmlsq_laneq_s16(a, b, v, 7);
131*67e74705SXin Li }
132*67e74705SXin Li 
133*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
134*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
135*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
136*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
137*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v)138*67e74705SXin Li int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
139*67e74705SXin Li   return vmls_laneq_s32(a, b, v, 3);
140*67e74705SXin Li }
141*67e74705SXin Li 
142*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
143*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
144*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
145*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
146*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v)147*67e74705SXin Li int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
148*67e74705SXin Li   return vmlsq_laneq_s32(a, b, v, 3);
149*67e74705SXin Li }
150*67e74705SXin Li 
151*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 {
152*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
153*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
154*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t v)155*67e74705SXin Li int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) {
156*67e74705SXin Li   return vmul_lane_s16(a, v, 3);
157*67e74705SXin Li }
158*67e74705SXin Li 
159*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 {
160*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
161*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
162*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t v)163*67e74705SXin Li int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) {
164*67e74705SXin Li   return vmulq_lane_s16(a, v, 3);
165*67e74705SXin Li }
166*67e74705SXin Li 
167*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 {
168*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
169*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
170*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t v)171*67e74705SXin Li int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) {
172*67e74705SXin Li   return vmul_lane_s32(a, v, 1);
173*67e74705SXin Li }
174*67e74705SXin Li 
175*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 {
176*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
177*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
178*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t v)179*67e74705SXin Li int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) {
180*67e74705SXin Li   return vmulq_lane_s32(a, v, 1);
181*67e74705SXin Li }
182*67e74705SXin Li 
183*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 {
184*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
185*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
186*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t v)187*67e74705SXin Li uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) {
188*67e74705SXin Li   return vmul_lane_u16(a, v, 3);
189*67e74705SXin Li }
190*67e74705SXin Li 
191*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 {
192*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
193*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
194*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t v)195*67e74705SXin Li uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) {
196*67e74705SXin Li   return vmulq_lane_u16(a, v, 3);
197*67e74705SXin Li }
198*67e74705SXin Li 
199*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 {
200*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
201*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
202*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t v)203*67e74705SXin Li uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) {
204*67e74705SXin Li   return vmul_lane_u32(a, v, 1);
205*67e74705SXin Li }
206*67e74705SXin Li 
207*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 {
208*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
209*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
210*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t v)211*67e74705SXin Li uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) {
212*67e74705SXin Li   return vmulq_lane_u32(a, v, 1);
213*67e74705SXin Li }
214*67e74705SXin Li 
215*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 {
216*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
217*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
218*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_laneq_s16(int16x4_t a,int16x8_t v)219*67e74705SXin Li int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) {
220*67e74705SXin Li   return vmul_laneq_s16(a, v, 7);
221*67e74705SXin Li }
222*67e74705SXin Li 
223*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 {
224*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
225*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
226*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_laneq_s16(int16x8_t a,int16x8_t v)227*67e74705SXin Li int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) {
228*67e74705SXin Li   return vmulq_laneq_s16(a, v, 7);
229*67e74705SXin Li }
230*67e74705SXin Li 
231*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 {
232*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
233*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
234*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_laneq_s32(int32x2_t a,int32x4_t v)235*67e74705SXin Li int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) {
236*67e74705SXin Li   return vmul_laneq_s32(a, v, 3);
237*67e74705SXin Li }
238*67e74705SXin Li 
239*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 {
240*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
241*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
242*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_laneq_s32(int32x4_t a,int32x4_t v)243*67e74705SXin Li int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) {
244*67e74705SXin Li   return vmulq_laneq_s32(a, v, 3);
245*67e74705SXin Li }
246*67e74705SXin Li 
247*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 {
248*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
249*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
250*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_laneq_u16(uint16x4_t a,uint16x8_t v)251*67e74705SXin Li uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) {
252*67e74705SXin Li   return vmul_laneq_u16(a, v, 7);
253*67e74705SXin Li }
254*67e74705SXin Li 
255*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 {
256*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
257*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
258*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_laneq_u16(uint16x8_t a,uint16x8_t v)259*67e74705SXin Li uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) {
260*67e74705SXin Li   return vmulq_laneq_u16(a, v, 7);
261*67e74705SXin Li }
262*67e74705SXin Li 
263*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 {
264*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
265*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
266*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_laneq_u32(uint32x2_t a,uint32x4_t v)267*67e74705SXin Li uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) {
268*67e74705SXin Li   return vmul_laneq_u32(a, v, 3);
269*67e74705SXin Li }
270*67e74705SXin Li 
271*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 {
272*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
273*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
274*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_laneq_u32(uint32x4_t a,uint32x4_t v)275*67e74705SXin Li uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) {
276*67e74705SXin Li   return vmulq_laneq_u32(a, v, 3);
277*67e74705SXin Li }
278*67e74705SXin Li 
279*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
280*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
281*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
282*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
283*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
284*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1>
285*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
286*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
287*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
288*67e74705SXin Li // CHECK:   ret <2 x float> [[FMLA2]]
test_vfma_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)289*67e74705SXin Li float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
290*67e74705SXin Li   return vfma_lane_f32(a, b, v, 1);
291*67e74705SXin Li }
292*67e74705SXin Li 
293*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
294*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
295*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
296*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
297*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
298*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
299*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
300*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
301*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
302*67e74705SXin Li // CHECK:   ret <4 x float> [[FMLA2]]
test_vfmaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)303*67e74705SXin Li float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
304*67e74705SXin Li   return vfmaq_lane_f32(a, b, v, 1);
305*67e74705SXin Li }
306*67e74705SXin Li 
307*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
308*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
309*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
310*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
311*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
312*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
313*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
314*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3>
315*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
316*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfma_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)317*67e74705SXin Li float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
318*67e74705SXin Li   return vfma_laneq_f32(a, b, v, 3);
319*67e74705SXin Li }
320*67e74705SXin Li 
321*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
322*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
323*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
324*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
325*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
326*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
327*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
328*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
329*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
330*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmaq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)331*67e74705SXin Li float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
332*67e74705SXin Li   return vfmaq_laneq_f32(a, b, v, 3);
333*67e74705SXin Li }
334*67e74705SXin Li 
335*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
336*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
337*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
338*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
339*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
340*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
341*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1>
342*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
343*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
344*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
345*67e74705SXin Li // CHECK:   ret <2 x float> [[FMLA2]]
test_vfms_lane_f32(float32x2_t a,float32x2_t b,float32x2_t v)346*67e74705SXin Li float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
347*67e74705SXin Li   return vfms_lane_f32(a, b, v, 1);
348*67e74705SXin Li }
349*67e74705SXin Li 
350*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
351*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
352*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
353*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
354*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
355*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
356*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
357*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
358*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
359*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
360*67e74705SXin Li // CHECK:   ret <4 x float> [[FMLA2]]
test_vfmsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t v)361*67e74705SXin Li float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
362*67e74705SXin Li   return vfmsq_lane_f32(a, b, v, 1);
363*67e74705SXin Li }
364*67e74705SXin Li 
365*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
366*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
367*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
368*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
369*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
370*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
371*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
372*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
373*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3>
374*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
375*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfms_laneq_f32(float32x2_t a,float32x2_t b,float32x4_t v)376*67e74705SXin Li float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
377*67e74705SXin Li   return vfms_laneq_f32(a, b, v, 3);
378*67e74705SXin Li }
379*67e74705SXin Li 
380*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
381*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
382*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
383*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
384*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
385*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
386*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
387*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
388*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
389*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
390*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmsq_laneq_f32(float32x4_t a,float32x4_t b,float32x4_t v)391*67e74705SXin Li float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
392*67e74705SXin Li   return vfmsq_laneq_f32(a, b, v, 3);
393*67e74705SXin Li }
394*67e74705SXin Li 
395*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 {
396*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
397*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
398*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
399*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
400*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer
401*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
402*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
403*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]])
404*67e74705SXin Li // CHECK:   ret <2 x double> [[FMLA2]]
test_vfmaq_lane_f64(float64x2_t a,float64x2_t b,float64x1_t v)405*67e74705SXin Li float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
406*67e74705SXin Li   return vfmaq_lane_f64(a, b, v, 0);
407*67e74705SXin Li }
408*67e74705SXin Li 
409*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
410*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
411*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
412*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
413*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
414*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
415*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
416*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1>
417*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
418*67e74705SXin Li // CHECK:   ret <2 x double> [[TMP6]]
test_vfmaq_laneq_f64(float64x2_t a,float64x2_t b,float64x2_t v)419*67e74705SXin Li float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
420*67e74705SXin Li   return vfmaq_laneq_f64(a, b, v, 1);
421*67e74705SXin Li }
422*67e74705SXin Li 
423*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 {
424*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
425*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
426*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
427*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
428*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
429*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer
430*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
431*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
432*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]])
433*67e74705SXin Li // CHECK:   ret <2 x double> [[FMLA2]]
test_vfmsq_lane_f64(float64x2_t a,float64x2_t b,float64x1_t v)434*67e74705SXin Li float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
435*67e74705SXin Li   return vfmsq_lane_f64(a, b, v, 0);
436*67e74705SXin Li }
437*67e74705SXin Li 
438*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
439*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
440*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
441*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
442*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
443*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
444*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
445*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
446*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1>
447*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
448*67e74705SXin Li // CHECK:   ret <2 x double> [[TMP6]]
test_vfmsq_laneq_f64(float64x2_t a,float64x2_t b,float64x2_t v)449*67e74705SXin Li float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
450*67e74705SXin Li   return vfmsq_laneq_f64(a, b, v, 1);
451*67e74705SXin Li }
452*67e74705SXin Li 
453*67e74705SXin Li // CHECK-LABEL: define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) #0 {
454*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
455*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
456*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
457*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
458*67e74705SXin Li // CHECK:   ret float [[TMP2]]
test_vfmas_laneq_f32(float32_t a,float32_t b,float32x4_t v)459*67e74705SXin Li float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
460*67e74705SXin Li   return vfmas_laneq_f32(a, b, v, 3);
461*67e74705SXin Li }
462*67e74705SXin Li 
463*67e74705SXin Li // CHECK-LABEL: define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) #0 {
464*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub double -0.000000e+00, %b
465*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8>
466*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
467*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
468*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
469*67e74705SXin Li // CHECK:   ret double [[TMP2]]
test_vfmsd_lane_f64(float64_t a,float64_t b,float64x1_t v)470*67e74705SXin Li float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) {
471*67e74705SXin Li   return vfmsd_lane_f64(a, b, v, 0);
472*67e74705SXin Li }
473*67e74705SXin Li 
474*67e74705SXin Li // CHECK-LABEL: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) #0 {
475*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub float -0.000000e+00, %b
476*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
477*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
478*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
479*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
480*67e74705SXin Li // CHECK:   ret float [[TMP2]]
test_vfmss_laneq_f32(float32_t a,float32_t b,float32x4_t v)481*67e74705SXin Li float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
482*67e74705SXin Li   return vfmss_laneq_f32(a, b, v, 3);
483*67e74705SXin Li }
484*67e74705SXin Li 
485*67e74705SXin Li // CHECK-LABEL: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) #0 {
486*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub double -0.000000e+00, %b
487*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8>
488*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
489*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
490*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
491*67e74705SXin Li // CHECK:   ret double [[TMP2]]
test_vfmsd_laneq_f64(float64_t a,float64_t b,float64x2_t v)492*67e74705SXin Li float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) {
493*67e74705SXin Li   return vfmsd_laneq_f64(a, b, v, 1);
494*67e74705SXin Li }
495*67e74705SXin Li 
496*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
497*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
498*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
499*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
500*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
501*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
502*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
503*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
504*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t v)505*67e74705SXin Li int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
506*67e74705SXin Li   return vmlal_lane_s16(a, b, v, 3);
507*67e74705SXin Li }
508*67e74705SXin Li 
509*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
510*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
511*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
512*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
513*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
514*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
515*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
516*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
517*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t v)518*67e74705SXin Li int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
519*67e74705SXin Li   return vmlal_lane_s32(a, b, v, 1);
520*67e74705SXin Li }
521*67e74705SXin Li 
522*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
523*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
524*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
525*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
526*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
527*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
528*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
529*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
530*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_laneq_s16(int32x4_t a,int16x4_t b,int16x8_t v)531*67e74705SXin Li int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
532*67e74705SXin Li   return vmlal_laneq_s16(a, b, v, 7);
533*67e74705SXin Li }
534*67e74705SXin Li 
535*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
536*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
537*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
538*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
539*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
540*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
541*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
542*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
543*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_laneq_s32(int64x2_t a,int32x2_t b,int32x4_t v)544*67e74705SXin Li int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
545*67e74705SXin Li   return vmlal_laneq_s32(a, b, v, 3);
546*67e74705SXin Li }
547*67e74705SXin Li 
548*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
549*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
550*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
551*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
552*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
553*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
554*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
555*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
556*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
557*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_lane_s16(int32x4_t a,int16x8_t b,int16x4_t v)558*67e74705SXin Li int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
559*67e74705SXin Li   return vmlal_high_lane_s16(a, b, v, 3);
560*67e74705SXin Li }
561*67e74705SXin Li 
562*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
563*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
564*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
565*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
566*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
567*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
568*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
569*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
570*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
571*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_lane_s32(int64x2_t a,int32x4_t b,int32x2_t v)572*67e74705SXin Li int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
573*67e74705SXin Li   return vmlal_high_lane_s32(a, b, v, 1);
574*67e74705SXin Li }
575*67e74705SXin Li 
576*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
577*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
578*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
579*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
580*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
581*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
582*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
583*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
584*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
585*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_laneq_s16(int32x4_t a,int16x8_t b,int16x8_t v)586*67e74705SXin Li int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
587*67e74705SXin Li   return vmlal_high_laneq_s16(a, b, v, 7);
588*67e74705SXin Li }
589*67e74705SXin Li 
590*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
591*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
592*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
593*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
594*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
595*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
596*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
597*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
598*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
599*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_laneq_s32(int64x2_t a,int32x4_t b,int32x4_t v)600*67e74705SXin Li int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
601*67e74705SXin Li   return vmlal_high_laneq_s32(a, b, v, 3);
602*67e74705SXin Li }
603*67e74705SXin Li 
604*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
605*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
606*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
607*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
608*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
609*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
610*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
611*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
612*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t v)613*67e74705SXin Li int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
614*67e74705SXin Li   return vmlsl_lane_s16(a, b, v, 3);
615*67e74705SXin Li }
616*67e74705SXin Li 
617*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
618*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
619*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
620*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
621*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
622*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
623*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
624*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
625*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t v)626*67e74705SXin Li int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
627*67e74705SXin Li   return vmlsl_lane_s32(a, b, v, 1);
628*67e74705SXin Li }
629*67e74705SXin Li 
630*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
631*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
632*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
633*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
634*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
635*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
636*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
637*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
638*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_laneq_s16(int32x4_t a,int16x4_t b,int16x8_t v)639*67e74705SXin Li int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
640*67e74705SXin Li   return vmlsl_laneq_s16(a, b, v, 7);
641*67e74705SXin Li }
642*67e74705SXin Li 
643*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
644*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
645*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
646*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
647*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
648*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
649*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
650*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
651*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_laneq_s32(int64x2_t a,int32x2_t b,int32x4_t v)652*67e74705SXin Li int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
653*67e74705SXin Li   return vmlsl_laneq_s32(a, b, v, 3);
654*67e74705SXin Li }
655*67e74705SXin Li 
656*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
657*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
658*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
659*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
660*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
661*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
662*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
663*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
664*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
665*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_lane_s16(int32x4_t a,int16x8_t b,int16x4_t v)666*67e74705SXin Li int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
667*67e74705SXin Li   return vmlsl_high_lane_s16(a, b, v, 3);
668*67e74705SXin Li }
669*67e74705SXin Li 
670*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
671*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
672*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
673*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
674*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
675*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
676*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
677*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
678*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
679*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_lane_s32(int64x2_t a,int32x4_t b,int32x2_t v)680*67e74705SXin Li int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
681*67e74705SXin Li   return vmlsl_high_lane_s32(a, b, v, 1);
682*67e74705SXin Li }
683*67e74705SXin Li 
684*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
685*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
686*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
687*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
688*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
689*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
690*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
691*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
692*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
693*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_laneq_s16(int32x4_t a,int16x8_t b,int16x8_t v)694*67e74705SXin Li int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
695*67e74705SXin Li   return vmlsl_high_laneq_s16(a, b, v, 7);
696*67e74705SXin Li }
697*67e74705SXin Li 
698*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
699*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
700*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
701*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
702*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
703*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
704*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
705*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
706*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
707*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_laneq_s32(int64x2_t a,int32x4_t b,int32x4_t v)708*67e74705SXin Li int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
709*67e74705SXin Li   return vmlsl_high_laneq_s32(a, b, v, 3);
710*67e74705SXin Li }
711*67e74705SXin Li 
712*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
713*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
714*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
715*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
716*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
717*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
718*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
719*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
720*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(int32x4_t a,int16x4_t b,int16x4_t v)721*67e74705SXin Li int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
722*67e74705SXin Li   return vmlal_lane_u16(a, b, v, 3);
723*67e74705SXin Li }
724*67e74705SXin Li 
725*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
726*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
727*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
728*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
729*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
730*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
731*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
732*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
733*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(int64x2_t a,int32x2_t b,int32x2_t v)734*67e74705SXin Li int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
735*67e74705SXin Li   return vmlal_lane_u32(a, b, v, 1);
736*67e74705SXin Li }
737*67e74705SXin Li 
738*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
739*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
740*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
741*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
742*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
743*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
744*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
745*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
746*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_laneq_u16(int32x4_t a,int16x4_t b,int16x8_t v)747*67e74705SXin Li int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
748*67e74705SXin Li   return vmlal_laneq_u16(a, b, v, 7);
749*67e74705SXin Li }
750*67e74705SXin Li 
751*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
752*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
753*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
754*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
755*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
756*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
757*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
758*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
759*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_laneq_u32(int64x2_t a,int32x2_t b,int32x4_t v)760*67e74705SXin Li int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
761*67e74705SXin Li   return vmlal_laneq_u32(a, b, v, 3);
762*67e74705SXin Li }
763*67e74705SXin Li 
764*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
765*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
766*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
767*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
768*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
769*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
770*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
771*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
772*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
773*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_lane_u16(int32x4_t a,int16x8_t b,int16x4_t v)774*67e74705SXin Li int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
775*67e74705SXin Li   return vmlal_high_lane_u16(a, b, v, 3);
776*67e74705SXin Li }
777*67e74705SXin Li 
778*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
779*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
780*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
781*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
782*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
783*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
784*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
785*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
786*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
787*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_lane_u32(int64x2_t a,int32x4_t b,int32x2_t v)788*67e74705SXin Li int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
789*67e74705SXin Li   return vmlal_high_lane_u32(a, b, v, 1);
790*67e74705SXin Li }
791*67e74705SXin Li 
792*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
793*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
794*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
795*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
796*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
797*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
798*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
799*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
800*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
801*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_laneq_u16(int32x4_t a,int16x8_t b,int16x8_t v)802*67e74705SXin Li int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
803*67e74705SXin Li   return vmlal_high_laneq_u16(a, b, v, 7);
804*67e74705SXin Li }
805*67e74705SXin Li 
806*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
807*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
808*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
809*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
810*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
811*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
812*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
813*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
814*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
815*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_laneq_u32(int64x2_t a,int32x4_t b,int32x4_t v)816*67e74705SXin Li int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
817*67e74705SXin Li   return vmlal_high_laneq_u32(a, b, v, 3);
818*67e74705SXin Li }
819*67e74705SXin Li 
820*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
821*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
822*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
823*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
824*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
825*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
826*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
827*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
828*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(int32x4_t a,int16x4_t b,int16x4_t v)829*67e74705SXin Li int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
830*67e74705SXin Li   return vmlsl_lane_u16(a, b, v, 3);
831*67e74705SXin Li }
832*67e74705SXin Li 
833*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
834*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
835*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
836*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
837*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
838*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
839*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
840*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
841*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(int64x2_t a,int32x2_t b,int32x2_t v)842*67e74705SXin Li int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
843*67e74705SXin Li   return vmlsl_lane_u32(a, b, v, 1);
844*67e74705SXin Li }
845*67e74705SXin Li 
846*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
847*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
848*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
849*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
850*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
851*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
852*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
853*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
854*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_laneq_u16(int32x4_t a,int16x4_t b,int16x8_t v)855*67e74705SXin Li int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
856*67e74705SXin Li   return vmlsl_laneq_u16(a, b, v, 7);
857*67e74705SXin Li }
858*67e74705SXin Li 
859*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
860*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
861*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
862*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
863*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
864*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
865*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
866*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
867*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_laneq_u32(int64x2_t a,int32x2_t b,int32x4_t v)868*67e74705SXin Li int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
869*67e74705SXin Li   return vmlsl_laneq_u32(a, b, v, 3);
870*67e74705SXin Li }
871*67e74705SXin Li 
872*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
873*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
874*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
875*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
876*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
877*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
878*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
879*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
880*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
881*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_lane_u16(int32x4_t a,int16x8_t b,int16x4_t v)882*67e74705SXin Li int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
883*67e74705SXin Li   return vmlsl_high_lane_u16(a, b, v, 3);
884*67e74705SXin Li }
885*67e74705SXin Li 
886*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
887*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
888*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
889*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
890*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
891*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
892*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
893*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
894*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
895*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_lane_u32(int64x2_t a,int32x4_t b,int32x2_t v)896*67e74705SXin Li int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
897*67e74705SXin Li   return vmlsl_high_lane_u32(a, b, v, 1);
898*67e74705SXin Li }
899*67e74705SXin Li 
900*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
901*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
902*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
903*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
904*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
905*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
906*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
907*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
908*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
909*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_laneq_u16(int32x4_t a,int16x8_t b,int16x8_t v)910*67e74705SXin Li int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
911*67e74705SXin Li   return vmlsl_high_laneq_u16(a, b, v, 7);
912*67e74705SXin Li }
913*67e74705SXin Li 
914*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
915*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
916*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
917*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
918*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
919*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
920*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
921*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
922*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
923*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_laneq_u32(int64x2_t a,int32x4_t b,int32x4_t v)924*67e74705SXin Li int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
925*67e74705SXin Li   return vmlsl_high_laneq_u32(a, b, v, 3);
926*67e74705SXin Li }
927*67e74705SXin Li 
928*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 {
929*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
930*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
931*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
932*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
933*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
934*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
935*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t v)936*67e74705SXin Li int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) {
937*67e74705SXin Li   return vmull_lane_s16(a, v, 3);
938*67e74705SXin Li }
939*67e74705SXin Li 
940*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 {
941*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
942*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
943*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
944*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
945*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
946*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
947*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t v)948*67e74705SXin Li int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) {
949*67e74705SXin Li   return vmull_lane_s32(a, v, 1);
950*67e74705SXin Li }
951*67e74705SXin Li 
952*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 {
953*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
954*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
955*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
956*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
957*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
958*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
959*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t v)960*67e74705SXin Li uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) {
961*67e74705SXin Li   return vmull_lane_u16(a, v, 3);
962*67e74705SXin Li }
963*67e74705SXin Li 
964*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 {
965*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
966*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
967*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
968*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
969*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
970*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
971*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t v)972*67e74705SXin Li uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) {
973*67e74705SXin Li   return vmull_lane_u32(a, v, 1);
974*67e74705SXin Li }
975*67e74705SXin Li 
976*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 {
977*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
978*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
979*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
980*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
981*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
982*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
983*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
984*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_lane_s16(int16x8_t a,int16x4_t v)985*67e74705SXin Li int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) {
986*67e74705SXin Li   return vmull_high_lane_s16(a, v, 3);
987*67e74705SXin Li }
988*67e74705SXin Li 
989*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 {
990*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
991*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
992*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
993*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
994*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
995*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
996*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
997*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_lane_s32(int32x4_t a,int32x2_t v)998*67e74705SXin Li int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) {
999*67e74705SXin Li   return vmull_high_lane_s32(a, v, 1);
1000*67e74705SXin Li }
1001*67e74705SXin Li 
1002*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 {
1003*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1004*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1005*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1006*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1007*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1008*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1009*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
1010*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_lane_u16(uint16x8_t a,uint16x4_t v)1011*67e74705SXin Li uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) {
1012*67e74705SXin Li   return vmull_high_lane_u16(a, v, 3);
1013*67e74705SXin Li }
1014*67e74705SXin Li 
1015*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 {
1016*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
1017*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1018*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1019*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1020*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1021*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1022*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
1023*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_lane_u32(uint32x4_t a,uint32x2_t v)1024*67e74705SXin Li uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) {
1025*67e74705SXin Li   return vmull_high_lane_u32(a, v, 1);
1026*67e74705SXin Li }
1027*67e74705SXin Li 
1028*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 {
1029*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1030*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1031*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1032*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1033*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1034*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
1035*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_laneq_s16(int16x4_t a,int16x8_t v)1036*67e74705SXin Li int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) {
1037*67e74705SXin Li   return vmull_laneq_s16(a, v, 7);
1038*67e74705SXin Li }
1039*67e74705SXin Li 
1040*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 {
1041*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1042*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1043*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1044*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1045*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1046*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
1047*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_laneq_s32(int32x2_t a,int32x4_t v)1048*67e74705SXin Li int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) {
1049*67e74705SXin Li   return vmull_laneq_s32(a, v, 3);
1050*67e74705SXin Li }
1051*67e74705SXin Li 
1052*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 {
1053*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1054*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1055*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1056*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1057*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1058*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
1059*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_laneq_u16(uint16x4_t a,uint16x8_t v)1060*67e74705SXin Li uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) {
1061*67e74705SXin Li   return vmull_laneq_u16(a, v, 7);
1062*67e74705SXin Li }
1063*67e74705SXin Li 
1064*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 {
1065*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1066*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1067*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1068*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1069*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1070*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
1071*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_laneq_u32(uint32x2_t a,uint32x4_t v)1072*67e74705SXin Li uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) {
1073*67e74705SXin Li   return vmull_laneq_u32(a, v, 3);
1074*67e74705SXin Li }
1075*67e74705SXin Li 
1076*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 {
1077*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1078*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1079*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1080*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1081*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1082*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1083*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
1084*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_laneq_s16(int16x8_t a,int16x8_t v)1085*67e74705SXin Li int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
1086*67e74705SXin Li   return vmull_high_laneq_s16(a, v, 7);
1087*67e74705SXin Li }
1088*67e74705SXin Li 
1089*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 {
1090*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
1091*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1092*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1093*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1094*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1095*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1096*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
1097*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_laneq_s32(int32x4_t a,int32x4_t v)1098*67e74705SXin Li int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
1099*67e74705SXin Li   return vmull_high_laneq_s32(a, v, 3);
1100*67e74705SXin Li }
1101*67e74705SXin Li 
1102*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 {
1103*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1104*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1105*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1106*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1107*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1108*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1109*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
1110*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_laneq_u16(uint16x8_t a,uint16x8_t v)1111*67e74705SXin Li uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) {
1112*67e74705SXin Li   return vmull_high_laneq_u16(a, v, 7);
1113*67e74705SXin Li }
1114*67e74705SXin Li 
1115*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 {
1116*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
1117*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1118*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1119*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1120*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1121*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1122*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
1123*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_laneq_u32(uint32x4_t a,uint32x4_t v)1124*67e74705SXin Li uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) {
1125*67e74705SXin Li   return vmull_high_laneq_u32(a, v, 3);
1126*67e74705SXin Li }
1127*67e74705SXin Li 
1128*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
1129*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1130*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1131*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1132*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1133*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1134*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1135*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
1136*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1137*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
1138*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t v)1139*67e74705SXin Li int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
1140*67e74705SXin Li   return vqdmlal_lane_s16(a, b, v, 3);
1141*67e74705SXin Li }
1142*67e74705SXin Li 
1143*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
1144*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1145*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1146*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1147*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1148*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1149*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1150*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
1151*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1152*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
1153*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t v)1154*67e74705SXin Li int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
1155*67e74705SXin Li   return vqdmlal_lane_s32(a, b, v, 1);
1156*67e74705SXin Li }
1157*67e74705SXin Li 
1158*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
1159*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1160*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1161*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1162*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1163*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1164*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1165*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1166*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
1167*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1168*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
1169*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_high_lane_s16(int32x4_t a,int16x8_t b,int16x4_t v)1170*67e74705SXin Li int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
1171*67e74705SXin Li   return vqdmlal_high_lane_s16(a, b, v, 3);
1172*67e74705SXin Li }
1173*67e74705SXin Li 
1174*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
1175*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
1176*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1177*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1178*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1179*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1180*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1181*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1182*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
1183*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1184*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
1185*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_high_lane_s32(int64x2_t a,int32x4_t b,int32x2_t v)1186*67e74705SXin Li int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
1187*67e74705SXin Li   return vqdmlal_high_lane_s32(a, b, v, 1);
1188*67e74705SXin Li }
1189*67e74705SXin Li 
1190*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
1191*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1192*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1193*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1194*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1195*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1196*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1197*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
1198*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1199*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
1200*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t v)1201*67e74705SXin Li int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
1202*67e74705SXin Li   return vqdmlsl_lane_s16(a, b, v, 3);
1203*67e74705SXin Li }
1204*67e74705SXin Li 
1205*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
1206*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1207*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1208*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1209*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1210*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1211*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1212*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
1213*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1214*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
1215*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t v)1216*67e74705SXin Li int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
1217*67e74705SXin Li   return vqdmlsl_lane_s32(a, b, v, 1);
1218*67e74705SXin Li }
1219*67e74705SXin Li 
1220*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
1221*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1222*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1223*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1224*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1225*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1226*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1227*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1228*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
1229*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1230*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
1231*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_lane_s16(int32x4_t a,int16x8_t b,int16x4_t v)1232*67e74705SXin Li int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
1233*67e74705SXin Li   return vqdmlsl_high_lane_s16(a, b, v, 3);
1234*67e74705SXin Li }
1235*67e74705SXin Li 
1236*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
1237*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
1238*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1239*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1240*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1241*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1242*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1243*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1244*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
1245*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1246*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
1247*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_lane_s32(int64x2_t a,int32x4_t b,int32x2_t v)1248*67e74705SXin Li int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
1249*67e74705SXin Li   return vqdmlsl_high_lane_s32(a, b, v, 1);
1250*67e74705SXin Li }
1251*67e74705SXin Li 
1252*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 {
1253*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1254*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1255*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1256*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1257*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1258*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
1259*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
1260*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
1261*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t v)1262*67e74705SXin Li int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) {
1263*67e74705SXin Li   return vqdmull_lane_s16(a, v, 3);
1264*67e74705SXin Li }
1265*67e74705SXin Li 
1266*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 {
1267*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1268*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1269*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1270*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1271*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1272*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
1273*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
1274*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
1275*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t v)1276*67e74705SXin Li int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) {
1277*67e74705SXin Li   return vqdmull_lane_s32(a, v, 1);
1278*67e74705SXin Li }
1279*67e74705SXin Li 
1280*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 {
1281*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1282*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1283*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1284*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1285*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1286*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
1287*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
1288*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
1289*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_laneq_s16(int16x4_t a,int16x8_t v)1290*67e74705SXin Li int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) {
1291*67e74705SXin Li   return vqdmull_laneq_s16(a, v, 3);
1292*67e74705SXin Li }
1293*67e74705SXin Li 
1294*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 {
1295*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1296*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1297*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1298*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1299*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1300*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
1301*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
1302*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
1303*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_laneq_s32(int32x2_t a,int32x4_t v)1304*67e74705SXin Li int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) {
1305*67e74705SXin Li   return vqdmull_laneq_s32(a, v, 3);
1306*67e74705SXin Li }
1307*67e74705SXin Li 
1308*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 {
1309*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1310*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1311*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1312*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1313*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1314*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1315*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
1316*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
1317*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
1318*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_lane_s16(int16x8_t a,int16x4_t v)1319*67e74705SXin Li int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) {
1320*67e74705SXin Li   return vqdmull_high_lane_s16(a, v, 3);
1321*67e74705SXin Li }
1322*67e74705SXin Li 
1323*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 {
1324*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
1325*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1326*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1327*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1328*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1329*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1330*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
1331*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
1332*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
1333*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_lane_s32(int32x4_t a,int32x2_t v)1334*67e74705SXin Li int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) {
1335*67e74705SXin Li   return vqdmull_high_lane_s32(a, v, 1);
1336*67e74705SXin Li }
1337*67e74705SXin Li 
1338*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 {
1339*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1340*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1341*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
1342*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1343*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1344*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1345*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
1346*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
1347*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
1348*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_laneq_s16(int16x8_t a,int16x8_t v)1349*67e74705SXin Li int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
1350*67e74705SXin Li   return vqdmull_high_laneq_s16(a, v, 7);
1351*67e74705SXin Li }
1352*67e74705SXin Li 
1353*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 {
1354*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
1355*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
1356*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
1357*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1358*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1359*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1360*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
1361*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
1362*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
1363*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_laneq_s32(int32x4_t a,int32x4_t v)1364*67e74705SXin Li int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
1365*67e74705SXin Li   return vqdmull_high_laneq_s32(a, v, 3);
1366*67e74705SXin Li }
1367*67e74705SXin Li 
1368*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 {
1369*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1370*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1371*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1372*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1373*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1374*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2
1375*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
1376*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
1377*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t v)1378*67e74705SXin Li int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) {
1379*67e74705SXin Li   return vqdmulh_lane_s16(a, v, 3);
1380*67e74705SXin Li }
1381*67e74705SXin Li 
1382*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 {
1383*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1384*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1385*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
1386*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1387*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1388*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2
1389*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
1390*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
1391*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t v)1392*67e74705SXin Li int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
1393*67e74705SXin Li   return vqdmulhq_lane_s16(a, v, 3);
1394*67e74705SXin Li }
1395*67e74705SXin Li 
1396*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 {
1397*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1398*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1399*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1400*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1401*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1402*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2
1403*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
1404*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
1405*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t v)1406*67e74705SXin Li int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) {
1407*67e74705SXin Li   return vqdmulh_lane_s32(a, v, 1);
1408*67e74705SXin Li }
1409*67e74705SXin Li 
1410*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 {
1411*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1412*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1413*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
1414*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1415*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1416*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2
1417*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
1418*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
1419*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t v)1420*67e74705SXin Li int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
1421*67e74705SXin Li   return vqdmulhq_lane_s32(a, v, 1);
1422*67e74705SXin Li }
1423*67e74705SXin Li 
1424*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 {
1425*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1426*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1427*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
1428*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1429*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1430*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2
1431*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
1432*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
1433*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t v)1434*67e74705SXin Li int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) {
1435*67e74705SXin Li   return vqrdmulh_lane_s16(a, v, 3);
1436*67e74705SXin Li }
1437*67e74705SXin Li 
1438*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 {
1439*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1440*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1441*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
1442*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1443*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1444*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2
1445*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
1446*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
1447*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t v)1448*67e74705SXin Li int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
1449*67e74705SXin Li   return vqrdmulhq_lane_s16(a, v, 3);
1450*67e74705SXin Li }
1451*67e74705SXin Li 
1452*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 {
1453*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
1454*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1455*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
1456*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1457*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1458*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2
1459*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
1460*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
1461*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t v)1462*67e74705SXin Li int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) {
1463*67e74705SXin Li   return vqrdmulh_lane_s32(a, v, 1);
1464*67e74705SXin Li }
1465*67e74705SXin Li 
1466*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 {
1467*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1468*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1469*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
1470*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1471*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1472*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2
1473*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
1474*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
1475*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t v)1476*67e74705SXin Li int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
1477*67e74705SXin Li   return vqrdmulhq_lane_s32(a, v, 1);
1478*67e74705SXin Li }
1479*67e74705SXin Li 
1480*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) #0 {
1481*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
1482*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
1483*67e74705SXin Li // CHECK:   ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t v)1484*67e74705SXin Li float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) {
1485*67e74705SXin Li   return vmul_lane_f32(a, v, 1);
1486*67e74705SXin Li }
1487*67e74705SXin Li 
1488*67e74705SXin Li 
1489*67e74705SXin Li // CHECK-LABEL: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) #0 {
1490*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1491*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v to <8 x i8>
1492*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
1493*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1494*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP3]], i32 0
1495*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
1496*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
1497*67e74705SXin Li // CHECK:   ret <1 x double> [[TMP5]]
test_vmul_lane_f64(float64x1_t a,float64x1_t v)1498*67e74705SXin Li float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) {
1499*67e74705SXin Li   return vmul_lane_f64(a, v, 0);
1500*67e74705SXin Li }
1501*67e74705SXin Li 
1502*67e74705SXin Li 
1503*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) #0 {
1504*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1505*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
1506*67e74705SXin Li // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t v)1507*67e74705SXin Li float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) {
1508*67e74705SXin Li   return vmulq_lane_f32(a, v, 1);
1509*67e74705SXin Li }
1510*67e74705SXin Li 
1511*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) #0 {
1512*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer
1513*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]]
1514*67e74705SXin Li // CHECK:   ret <2 x double> [[MUL]]
test_vmulq_lane_f64(float64x2_t a,float64x1_t v)1515*67e74705SXin Li float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) {
1516*67e74705SXin Li   return vmulq_lane_f64(a, v, 0);
1517*67e74705SXin Li }
1518*67e74705SXin Li 
1519*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) #0 {
1520*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
1521*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
1522*67e74705SXin Li // CHECK:   ret <2 x float> [[MUL]]
test_vmul_laneq_f32(float32x2_t a,float32x4_t v)1523*67e74705SXin Li float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) {
1524*67e74705SXin Li   return vmul_laneq_f32(a, v, 3);
1525*67e74705SXin Li }
1526*67e74705SXin Li 
1527*67e74705SXin Li // CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) #0 {
1528*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1529*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8>
1530*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
1531*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1532*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
1533*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
1534*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
1535*67e74705SXin Li // CHECK:   ret <1 x double> [[TMP5]]
test_vmul_laneq_f64(float64x1_t a,float64x2_t v)1536*67e74705SXin Li float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) {
1537*67e74705SXin Li   return vmul_laneq_f64(a, v, 1);
1538*67e74705SXin Li }
1539*67e74705SXin Li 
1540*67e74705SXin Li 
1541*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 {
1542*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1543*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
1544*67e74705SXin Li // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_laneq_f32(float32x4_t a,float32x4_t v)1545*67e74705SXin Li float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) {
1546*67e74705SXin Li   return vmulq_laneq_f32(a, v, 3);
1547*67e74705SXin Li }
1548*67e74705SXin Li 
1549*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 {
1550*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1>
1551*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]]
1552*67e74705SXin Li // CHECK:   ret <2 x double> [[MUL]]
test_vmulq_laneq_f64(float64x2_t a,float64x2_t v)1553*67e74705SXin Li float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) {
1554*67e74705SXin Li   return vmulq_laneq_f64(a, v, 1);
1555*67e74705SXin Li }
1556*67e74705SXin Li 
1557*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) #0 {
1558*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
1559*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1560*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8>
1561*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1562*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1563*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2
1564*67e74705SXin Li // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_lane_f32(float32x2_t a,float32x2_t v)1565*67e74705SXin Li float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) {
1566*67e74705SXin Li   return vmulx_lane_f32(a, v, 1);
1567*67e74705SXin Li }
1568*67e74705SXin Li 
1569*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) #0 {
1570*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1571*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1572*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8>
1573*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1574*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1575*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2
1576*67e74705SXin Li // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_lane_f32(float32x4_t a,float32x2_t v)1577*67e74705SXin Li float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) {
1578*67e74705SXin Li   return vmulxq_lane_f32(a, v, 1);
1579*67e74705SXin Li }
1580*67e74705SXin Li 
1581*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) #0 {
1582*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer
1583*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
1584*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8>
1585*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1586*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1587*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2
1588*67e74705SXin Li // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_lane_f64(float64x2_t a,float64x1_t v)1589*67e74705SXin Li float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) {
1590*67e74705SXin Li   return vmulxq_lane_f64(a, v, 0);
1591*67e74705SXin Li }
1592*67e74705SXin Li 
1593*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) #0 {
1594*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
1595*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1596*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8>
1597*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1598*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1599*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2
1600*67e74705SXin Li // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_laneq_f32(float32x2_t a,float32x4_t v)1601*67e74705SXin Li float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) {
1602*67e74705SXin Li   return vmulx_laneq_f32(a, v, 3);
1603*67e74705SXin Li }
1604*67e74705SXin Li 
1605*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 {
1606*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1607*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1608*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8>
1609*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1610*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1611*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2
1612*67e74705SXin Li // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_laneq_f32(float32x4_t a,float32x4_t v)1613*67e74705SXin Li float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) {
1614*67e74705SXin Li   return vmulxq_laneq_f32(a, v, 3);
1615*67e74705SXin Li }
1616*67e74705SXin Li 
1617*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 {
1618*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1>
1619*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
1620*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8>
1621*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1622*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1623*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2
1624*67e74705SXin Li // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_laneq_f64(float64x2_t a,float64x2_t v)1625*67e74705SXin Li float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) {
1626*67e74705SXin Li   return vmulxq_laneq_f64(a, v, 1);
1627*67e74705SXin Li }
1628*67e74705SXin Li 
1629*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
1630*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
1631*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
1632*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
1633*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_s16_0(int16x4_t a,int16x4_t b,int16x4_t v)1634*67e74705SXin Li int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
1635*67e74705SXin Li   return vmla_lane_s16(a, b, v, 0);
1636*67e74705SXin Li }
1637*67e74705SXin Li 
1638*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
1639*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
1640*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
1641*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
1642*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16_0(int16x8_t a,int16x8_t b,int16x4_t v)1643*67e74705SXin Li int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
1644*67e74705SXin Li   return vmlaq_lane_s16(a, b, v, 0);
1645*67e74705SXin Li }
1646*67e74705SXin Li 
1647*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
1648*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
1649*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
1650*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
1651*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_s32_0(int32x2_t a,int32x2_t b,int32x2_t v)1652*67e74705SXin Li int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
1653*67e74705SXin Li   return vmla_lane_s32(a, b, v, 0);
1654*67e74705SXin Li }
1655*67e74705SXin Li 
1656*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
1657*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
1658*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
1659*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
1660*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32_0(int32x4_t a,int32x4_t b,int32x2_t v)1661*67e74705SXin Li int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
1662*67e74705SXin Li   return vmlaq_lane_s32(a, b, v, 0);
1663*67e74705SXin Li }
1664*67e74705SXin Li 
1665*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
1666*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
1667*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
1668*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
1669*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_laneq_s16_0(int16x4_t a,int16x4_t b,int16x8_t v)1670*67e74705SXin Li int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
1671*67e74705SXin Li   return vmla_laneq_s16(a, b, v, 0);
1672*67e74705SXin Li }
1673*67e74705SXin Li 
1674*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
1675*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
1676*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
1677*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
1678*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_laneq_s16_0(int16x8_t a,int16x8_t b,int16x8_t v)1679*67e74705SXin Li int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
1680*67e74705SXin Li   return vmlaq_laneq_s16(a, b, v, 0);
1681*67e74705SXin Li }
1682*67e74705SXin Li 
1683*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
1684*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
1685*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
1686*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
1687*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_laneq_s32_0(int32x2_t a,int32x2_t b,int32x4_t v)1688*67e74705SXin Li int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
1689*67e74705SXin Li   return vmla_laneq_s32(a, b, v, 0);
1690*67e74705SXin Li }
1691*67e74705SXin Li 
1692*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
1693*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
1694*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
1695*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
1696*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_laneq_s32_0(int32x4_t a,int32x4_t b,int32x4_t v)1697*67e74705SXin Li int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
1698*67e74705SXin Li   return vmlaq_laneq_s32(a, b, v, 0);
1699*67e74705SXin Li }
1700*67e74705SXin Li 
1701*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
1702*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
1703*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
1704*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
1705*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_s16_0(int16x4_t a,int16x4_t b,int16x4_t v)1706*67e74705SXin Li int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
1707*67e74705SXin Li   return vmls_lane_s16(a, b, v, 0);
1708*67e74705SXin Li }
1709*67e74705SXin Li 
1710*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
1711*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
1712*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
1713*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
1714*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16_0(int16x8_t a,int16x8_t b,int16x4_t v)1715*67e74705SXin Li int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
1716*67e74705SXin Li   return vmlsq_lane_s16(a, b, v, 0);
1717*67e74705SXin Li }
1718*67e74705SXin Li 
1719*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
1720*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
1721*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
1722*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
1723*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_s32_0(int32x2_t a,int32x2_t b,int32x2_t v)1724*67e74705SXin Li int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
1725*67e74705SXin Li   return vmls_lane_s32(a, b, v, 0);
1726*67e74705SXin Li }
1727*67e74705SXin Li 
1728*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
1729*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
1730*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
1731*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
1732*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32_0(int32x4_t a,int32x4_t b,int32x2_t v)1733*67e74705SXin Li int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
1734*67e74705SXin Li   return vmlsq_lane_s32(a, b, v, 0);
1735*67e74705SXin Li }
1736*67e74705SXin Li 
1737*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
1738*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
1739*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
1740*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
1741*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_laneq_s16_0(int16x4_t a,int16x4_t b,int16x8_t v)1742*67e74705SXin Li int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
1743*67e74705SXin Li   return vmls_laneq_s16(a, b, v, 0);
1744*67e74705SXin Li }
1745*67e74705SXin Li 
1746*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
1747*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
1748*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
1749*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
1750*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_laneq_s16_0(int16x8_t a,int16x8_t b,int16x8_t v)1751*67e74705SXin Li int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
1752*67e74705SXin Li   return vmlsq_laneq_s16(a, b, v, 0);
1753*67e74705SXin Li }
1754*67e74705SXin Li 
1755*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
1756*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
1757*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
1758*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
1759*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_laneq_s32_0(int32x2_t a,int32x2_t b,int32x4_t v)1760*67e74705SXin Li int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
1761*67e74705SXin Li   return vmls_laneq_s32(a, b, v, 0);
1762*67e74705SXin Li }
1763*67e74705SXin Li 
1764*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
1765*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
1766*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
1767*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
1768*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_laneq_s32_0(int32x4_t a,int32x4_t b,int32x4_t v)1769*67e74705SXin Li int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
1770*67e74705SXin Li   return vmlsq_laneq_s32(a, b, v, 0);
1771*67e74705SXin Li }
1772*67e74705SXin Li 
1773*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 {
1774*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
1775*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
1776*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_s16_0(int16x4_t a,int16x4_t v)1777*67e74705SXin Li int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) {
1778*67e74705SXin Li   return vmul_lane_s16(a, v, 0);
1779*67e74705SXin Li }
1780*67e74705SXin Li 
1781*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 {
1782*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
1783*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
1784*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_s16_0(int16x8_t a,int16x4_t v)1785*67e74705SXin Li int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) {
1786*67e74705SXin Li   return vmulq_lane_s16(a, v, 0);
1787*67e74705SXin Li }
1788*67e74705SXin Li 
1789*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 {
1790*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
1791*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
1792*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_s32_0(int32x2_t a,int32x2_t v)1793*67e74705SXin Li int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) {
1794*67e74705SXin Li   return vmul_lane_s32(a, v, 0);
1795*67e74705SXin Li }
1796*67e74705SXin Li 
1797*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 {
1798*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
1799*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
1800*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_s32_0(int32x4_t a,int32x2_t v)1801*67e74705SXin Li int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) {
1802*67e74705SXin Li   return vmulq_lane_s32(a, v, 0);
1803*67e74705SXin Li }
1804*67e74705SXin Li 
1805*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 {
1806*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
1807*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
1808*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_u16_0(uint16x4_t a,uint16x4_t v)1809*67e74705SXin Li uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) {
1810*67e74705SXin Li   return vmul_lane_u16(a, v, 0);
1811*67e74705SXin Li }
1812*67e74705SXin Li 
1813*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 {
1814*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
1815*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
1816*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_u16_0(uint16x8_t a,uint16x4_t v)1817*67e74705SXin Li uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) {
1818*67e74705SXin Li   return vmulq_lane_u16(a, v, 0);
1819*67e74705SXin Li }
1820*67e74705SXin Li 
1821*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 {
1822*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
1823*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
1824*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_u32_0(uint32x2_t a,uint32x2_t v)1825*67e74705SXin Li uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) {
1826*67e74705SXin Li   return vmul_lane_u32(a, v, 0);
1827*67e74705SXin Li }
1828*67e74705SXin Li 
1829*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 {
1830*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
1831*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
1832*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_u32_0(uint32x4_t a,uint32x2_t v)1833*67e74705SXin Li uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) {
1834*67e74705SXin Li   return vmulq_lane_u32(a, v, 0);
1835*67e74705SXin Li }
1836*67e74705SXin Li 
1837*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 {
1838*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
1839*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
1840*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_laneq_s16_0(int16x4_t a,int16x8_t v)1841*67e74705SXin Li int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) {
1842*67e74705SXin Li   return vmul_laneq_s16(a, v, 0);
1843*67e74705SXin Li }
1844*67e74705SXin Li 
1845*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 {
1846*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
1847*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
1848*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_laneq_s16_0(int16x8_t a,int16x8_t v)1849*67e74705SXin Li int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) {
1850*67e74705SXin Li   return vmulq_laneq_s16(a, v, 0);
1851*67e74705SXin Li }
1852*67e74705SXin Li 
1853*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 {
1854*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
1855*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
1856*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_laneq_s32_0(int32x2_t a,int32x4_t v)1857*67e74705SXin Li int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) {
1858*67e74705SXin Li   return vmul_laneq_s32(a, v, 0);
1859*67e74705SXin Li }
1860*67e74705SXin Li 
1861*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 {
1862*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
1863*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
1864*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_laneq_s32_0(int32x4_t a,int32x4_t v)1865*67e74705SXin Li int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) {
1866*67e74705SXin Li   return vmulq_laneq_s32(a, v, 0);
1867*67e74705SXin Li }
1868*67e74705SXin Li 
1869*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 {
1870*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
1871*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
1872*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_laneq_u16_0(uint16x4_t a,uint16x8_t v)1873*67e74705SXin Li uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
1874*67e74705SXin Li   return vmul_laneq_u16(a, v, 0);
1875*67e74705SXin Li }
1876*67e74705SXin Li 
1877*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 {
1878*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
1879*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
1880*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_laneq_u16_0(uint16x8_t a,uint16x8_t v)1881*67e74705SXin Li uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
1882*67e74705SXin Li   return vmulq_laneq_u16(a, v, 0);
1883*67e74705SXin Li }
1884*67e74705SXin Li 
1885*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 {
1886*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
1887*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
1888*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_laneq_u32_0(uint32x2_t a,uint32x4_t v)1889*67e74705SXin Li uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
1890*67e74705SXin Li   return vmul_laneq_u32(a, v, 0);
1891*67e74705SXin Li }
1892*67e74705SXin Li 
1893*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 {
1894*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
1895*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
1896*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_laneq_u32_0(uint32x4_t a,uint32x4_t v)1897*67e74705SXin Li uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
1898*67e74705SXin Li   return vmulq_laneq_u32(a, v, 0);
1899*67e74705SXin Li }
1900*67e74705SXin Li 
1901*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
1902*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1903*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1904*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
1905*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1906*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
1907*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1908*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1909*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
1910*67e74705SXin Li // CHECK:   ret <2 x float> [[FMLA2]]
test_vfma_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)1911*67e74705SXin Li float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
1912*67e74705SXin Li   return vfma_lane_f32(a, b, v, 0);
1913*67e74705SXin Li }
1914*67e74705SXin Li 
1915*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
1916*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1917*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1918*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
1919*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1920*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
1921*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1922*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1923*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
1924*67e74705SXin Li // CHECK:   ret <4 x float> [[FMLA2]]
test_vfmaq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)1925*67e74705SXin Li float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
1926*67e74705SXin Li   return vfmaq_lane_f32(a, b, v, 0);
1927*67e74705SXin Li }
1928*67e74705SXin Li 
1929*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
1930*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1931*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1932*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
1933*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1934*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1935*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1936*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer
1937*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
1938*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfma_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)1939*67e74705SXin Li float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
1940*67e74705SXin Li   return vfma_laneq_f32(a, b, v, 0);
1941*67e74705SXin Li }
1942*67e74705SXin Li 
1943*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
1944*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1945*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1946*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
1947*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1948*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1949*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1950*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer
1951*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
1952*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmaq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)1953*67e74705SXin Li float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
1954*67e74705SXin Li   return vfmaq_laneq_f32(a, b, v, 0);
1955*67e74705SXin Li }
1956*67e74705SXin Li 
1957*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
1958*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
1959*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1960*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
1961*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
1962*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1963*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
1964*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1965*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1966*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
1967*67e74705SXin Li // CHECK:   ret <2 x float> [[FMLA2]]
test_vfms_lane_f32_0(float32x2_t a,float32x2_t b,float32x2_t v)1968*67e74705SXin Li float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
1969*67e74705SXin Li   return vfms_lane_f32(a, b, v, 0);
1970*67e74705SXin Li }
1971*67e74705SXin Li 
1972*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
1973*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
1974*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1975*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
1976*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
1977*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
1978*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
1979*67e74705SXin Li // CHECK:   [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1980*67e74705SXin Li // CHECK:   [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1981*67e74705SXin Li // CHECK:   [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
1982*67e74705SXin Li // CHECK:   ret <4 x float> [[FMLA2]]
test_vfmsq_lane_f32_0(float32x4_t a,float32x4_t b,float32x2_t v)1983*67e74705SXin Li float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
1984*67e74705SXin Li   return vfmsq_lane_f32(a, b, v, 0);
1985*67e74705SXin Li }
1986*67e74705SXin Li 
1987*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
1988*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
1989*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1990*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
1991*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
1992*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1993*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1994*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
1995*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer
1996*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
1997*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfms_laneq_f32_0(float32x2_t a,float32x2_t b,float32x4_t v)1998*67e74705SXin Li float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
1999*67e74705SXin Li   return vfms_laneq_f32(a, b, v, 0);
2000*67e74705SXin Li }
2001*67e74705SXin Li 
2002*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
2003*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
2004*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2005*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
2006*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
2007*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2008*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
2009*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
2010*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer
2011*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
2012*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmsq_laneq_f32_0(float32x4_t a,float32x4_t b,float32x4_t v)2013*67e74705SXin Li float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
2014*67e74705SXin Li   return vfmsq_laneq_f32(a, b, v, 0);
2015*67e74705SXin Li }
2016*67e74705SXin Li 
2017*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
2018*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
2019*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
2020*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
2021*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
2022*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
2023*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
2024*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer
2025*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
2026*67e74705SXin Li // CHECK:   ret <2 x double> [[TMP6]]
test_vfmaq_laneq_f64_0(float64x2_t a,float64x2_t b,float64x2_t v)2027*67e74705SXin Li float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
2028*67e74705SXin Li   return vfmaq_laneq_f64(a, b, v, 0);
2029*67e74705SXin Li }
2030*67e74705SXin Li 
2031*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
2032*67e74705SXin Li // CHECK:   [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
2033*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
2034*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
2035*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
2036*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
2037*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
2038*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
2039*67e74705SXin Li // CHECK:   [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer
2040*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
2041*67e74705SXin Li // CHECK:   ret <2 x double> [[TMP6]]
test_vfmsq_laneq_f64_0(float64x2_t a,float64x2_t b,float64x2_t v)2042*67e74705SXin Li float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
2043*67e74705SXin Li   return vfmsq_laneq_f64(a, b, v, 0);
2044*67e74705SXin Li }
2045*67e74705SXin Li 
2046*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2047*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2048*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2049*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2050*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2051*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2052*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2053*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2054*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_s16_0(int32x4_t a,int16x4_t b,int16x4_t v)2055*67e74705SXin Li int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2056*67e74705SXin Li   return vmlal_lane_s16(a, b, v, 0);
2057*67e74705SXin Li }
2058*67e74705SXin Li 
2059*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2060*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2061*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2062*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2063*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2064*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2065*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2066*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2067*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_s32_0(int64x2_t a,int32x2_t b,int32x2_t v)2068*67e74705SXin Li int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2069*67e74705SXin Li   return vmlal_lane_s32(a, b, v, 0);
2070*67e74705SXin Li }
2071*67e74705SXin Li 
2072*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
2073*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2074*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2075*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2076*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2077*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2078*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2079*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2080*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_laneq_s16_0(int32x4_t a,int16x4_t b,int16x8_t v)2081*67e74705SXin Li int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
2082*67e74705SXin Li   return vmlal_laneq_s16(a, b, v, 0);
2083*67e74705SXin Li }
2084*67e74705SXin Li 
2085*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
2086*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2087*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2088*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2089*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2090*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2091*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2092*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2093*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_laneq_s32_0(int64x2_t a,int32x2_t b,int32x4_t v)2094*67e74705SXin Li int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
2095*67e74705SXin Li   return vmlal_laneq_s32(a, b, v, 0);
2096*67e74705SXin Li }
2097*67e74705SXin Li 
2098*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2099*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2100*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2101*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2102*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2103*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2104*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2105*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2106*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2107*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_lane_s16_0(int32x4_t a,int16x8_t b,int16x4_t v)2108*67e74705SXin Li int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2109*67e74705SXin Li   return vmlal_high_lane_s16(a, b, v, 0);
2110*67e74705SXin Li }
2111*67e74705SXin Li 
2112*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2113*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2114*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2115*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2116*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2117*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2118*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2119*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2120*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2121*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_lane_s32_0(int64x2_t a,int32x4_t b,int32x2_t v)2122*67e74705SXin Li int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2123*67e74705SXin Li   return vmlal_high_lane_s32(a, b, v, 0);
2124*67e74705SXin Li }
2125*67e74705SXin Li 
2126*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
2127*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2128*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2129*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2130*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2131*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2132*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2133*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2134*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2135*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_laneq_s16_0(int32x4_t a,int16x8_t b,int16x8_t v)2136*67e74705SXin Li int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
2137*67e74705SXin Li   return vmlal_high_laneq_s16(a, b, v, 0);
2138*67e74705SXin Li }
2139*67e74705SXin Li 
2140*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
2141*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2142*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2143*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2144*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2145*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2146*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2147*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2148*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2149*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_laneq_s32_0(int64x2_t a,int32x4_t b,int32x4_t v)2150*67e74705SXin Li int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
2151*67e74705SXin Li   return vmlal_high_laneq_s32(a, b, v, 0);
2152*67e74705SXin Li }
2153*67e74705SXin Li 
2154*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2155*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2156*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2157*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2158*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2159*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2160*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2161*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2162*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16_0(int32x4_t a,int16x4_t b,int16x4_t v)2163*67e74705SXin Li int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2164*67e74705SXin Li   return vmlsl_lane_s16(a, b, v, 0);
2165*67e74705SXin Li }
2166*67e74705SXin Li 
2167*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2168*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2169*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2170*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2171*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2172*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2173*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2174*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2175*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32_0(int64x2_t a,int32x2_t b,int32x2_t v)2176*67e74705SXin Li int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2177*67e74705SXin Li   return vmlsl_lane_s32(a, b, v, 0);
2178*67e74705SXin Li }
2179*67e74705SXin Li 
2180*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
2181*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2182*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2183*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2184*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2185*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2186*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2187*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2188*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_laneq_s16_0(int32x4_t a,int16x4_t b,int16x8_t v)2189*67e74705SXin Li int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
2190*67e74705SXin Li   return vmlsl_laneq_s16(a, b, v, 0);
2191*67e74705SXin Li }
2192*67e74705SXin Li 
2193*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
2194*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2195*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2196*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2197*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2198*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2199*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2200*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2201*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_laneq_s32_0(int64x2_t a,int32x2_t b,int32x4_t v)2202*67e74705SXin Li int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
2203*67e74705SXin Li   return vmlsl_laneq_s32(a, b, v, 0);
2204*67e74705SXin Li }
2205*67e74705SXin Li 
2206*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2207*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2208*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2209*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2210*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2211*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2212*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2213*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2214*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2215*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_lane_s16_0(int32x4_t a,int16x8_t b,int16x4_t v)2216*67e74705SXin Li int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2217*67e74705SXin Li   return vmlsl_high_lane_s16(a, b, v, 0);
2218*67e74705SXin Li }
2219*67e74705SXin Li 
2220*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2221*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2222*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2223*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2224*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2225*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2226*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2227*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2228*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2229*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_lane_s32_0(int64x2_t a,int32x4_t b,int32x2_t v)2230*67e74705SXin Li int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2231*67e74705SXin Li   return vmlsl_high_lane_s32(a, b, v, 0);
2232*67e74705SXin Li }
2233*67e74705SXin Li 
2234*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
2235*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2236*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2237*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2238*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2239*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2240*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2241*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2242*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2243*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_laneq_s16_0(int32x4_t a,int16x8_t b,int16x8_t v)2244*67e74705SXin Li int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
2245*67e74705SXin Li   return vmlsl_high_laneq_s16(a, b, v, 0);
2246*67e74705SXin Li }
2247*67e74705SXin Li 
2248*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
2249*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2250*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2251*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2252*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2253*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2254*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2255*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2256*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2257*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_laneq_s32_0(int64x2_t a,int32x4_t b,int32x4_t v)2258*67e74705SXin Li int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
2259*67e74705SXin Li   return vmlsl_high_laneq_s32(a, b, v, 0);
2260*67e74705SXin Li }
2261*67e74705SXin Li 
2262*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2263*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2264*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2265*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2266*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2267*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2268*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2269*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2270*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_u16_0(int32x4_t a,int16x4_t b,int16x4_t v)2271*67e74705SXin Li int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2272*67e74705SXin Li   return vmlal_lane_u16(a, b, v, 0);
2273*67e74705SXin Li }
2274*67e74705SXin Li 
2275*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2276*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2277*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2278*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2279*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2280*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2281*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2282*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2283*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_u32_0(int64x2_t a,int32x2_t b,int32x2_t v)2284*67e74705SXin Li int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2285*67e74705SXin Li   return vmlal_lane_u32(a, b, v, 0);
2286*67e74705SXin Li }
2287*67e74705SXin Li 
2288*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
2289*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2290*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2291*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2292*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2293*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2294*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2295*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2296*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_laneq_u16_0(int32x4_t a,int16x4_t b,int16x8_t v)2297*67e74705SXin Li int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
2298*67e74705SXin Li   return vmlal_laneq_u16(a, b, v, 0);
2299*67e74705SXin Li }
2300*67e74705SXin Li 
2301*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
2302*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2303*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2304*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2305*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2306*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2307*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2308*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2309*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_laneq_u32_0(int64x2_t a,int32x2_t b,int32x4_t v)2310*67e74705SXin Li int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
2311*67e74705SXin Li   return vmlal_laneq_u32(a, b, v, 0);
2312*67e74705SXin Li }
2313*67e74705SXin Li 
2314*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2315*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2316*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2317*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2318*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2319*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2320*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2321*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2322*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2323*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_lane_u16_0(int32x4_t a,int16x8_t b,int16x4_t v)2324*67e74705SXin Li int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2325*67e74705SXin Li   return vmlal_high_lane_u16(a, b, v, 0);
2326*67e74705SXin Li }
2327*67e74705SXin Li 
2328*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2329*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2330*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2331*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2332*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2333*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2334*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2335*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2336*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2337*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_lane_u32_0(int64x2_t a,int32x4_t b,int32x2_t v)2338*67e74705SXin Li int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2339*67e74705SXin Li   return vmlal_high_lane_u32(a, b, v, 0);
2340*67e74705SXin Li }
2341*67e74705SXin Li 
2342*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
2343*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2344*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2345*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2346*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2347*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2348*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2349*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2350*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
2351*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_high_laneq_u16_0(int32x4_t a,int16x8_t b,int16x8_t v)2352*67e74705SXin Li int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
2353*67e74705SXin Li   return vmlal_high_laneq_u16(a, b, v, 0);
2354*67e74705SXin Li }
2355*67e74705SXin Li 
2356*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
2357*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2358*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2359*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2360*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2361*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2362*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2363*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2364*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
2365*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_high_laneq_u32_0(int64x2_t a,int32x4_t b,int32x4_t v)2366*67e74705SXin Li int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
2367*67e74705SXin Li   return vmlal_high_laneq_u32(a, b, v, 0);
2368*67e74705SXin Li }
2369*67e74705SXin Li 
2370*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2371*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2372*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2373*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2374*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2375*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2376*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2377*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2378*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16_0(int32x4_t a,int16x4_t b,int16x4_t v)2379*67e74705SXin Li int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2380*67e74705SXin Li   return vmlsl_lane_u16(a, b, v, 0);
2381*67e74705SXin Li }
2382*67e74705SXin Li 
2383*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2384*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2385*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2386*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2387*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2388*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2389*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2390*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2391*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32_0(int64x2_t a,int32x2_t b,int32x2_t v)2392*67e74705SXin Li int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2393*67e74705SXin Li   return vmlsl_lane_u32(a, b, v, 0);
2394*67e74705SXin Li }
2395*67e74705SXin Li 
2396*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
2397*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2398*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2399*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2400*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2401*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2402*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2403*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2404*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_laneq_u16_0(int32x4_t a,int16x4_t b,int16x8_t v)2405*67e74705SXin Li int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
2406*67e74705SXin Li   return vmlsl_laneq_u16(a, b, v, 0);
2407*67e74705SXin Li }
2408*67e74705SXin Li 
2409*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
2410*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2411*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2412*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2413*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2414*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2415*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2416*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2417*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_laneq_u32_0(int64x2_t a,int32x2_t b,int32x4_t v)2418*67e74705SXin Li int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
2419*67e74705SXin Li   return vmlsl_laneq_u32(a, b, v, 0);
2420*67e74705SXin Li }
2421*67e74705SXin Li 
2422*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2423*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2424*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2425*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2426*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2427*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2428*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2429*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2430*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2431*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_lane_u16_0(int32x4_t a,int16x8_t b,int16x4_t v)2432*67e74705SXin Li int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2433*67e74705SXin Li   return vmlsl_high_lane_u16(a, b, v, 0);
2434*67e74705SXin Li }
2435*67e74705SXin Li 
2436*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2437*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2438*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2439*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2440*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2441*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2442*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2443*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2444*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2445*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_lane_u32_0(int64x2_t a,int32x4_t b,int32x2_t v)2446*67e74705SXin Li int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2447*67e74705SXin Li   return vmlsl_high_lane_u32(a, b, v, 0);
2448*67e74705SXin Li }
2449*67e74705SXin Li 
2450*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
2451*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2452*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2453*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2454*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2455*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2456*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2457*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2458*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
2459*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_high_laneq_u16_0(int32x4_t a,int16x8_t b,int16x8_t v)2460*67e74705SXin Li int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
2461*67e74705SXin Li   return vmlsl_high_laneq_u16(a, b, v, 0);
2462*67e74705SXin Li }
2463*67e74705SXin Li 
2464*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
2465*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2466*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2467*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2468*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2469*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2470*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2471*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2472*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
2473*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_high_laneq_u32_0(int64x2_t a,int32x4_t b,int32x4_t v)2474*67e74705SXin Li int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
2475*67e74705SXin Li   return vmlsl_high_laneq_u32(a, b, v, 0);
2476*67e74705SXin Li }
2477*67e74705SXin Li 
2478*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 {
2479*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2480*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2481*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2482*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2483*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2484*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2485*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16_0(int16x4_t a,int16x4_t v)2486*67e74705SXin Li int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) {
2487*67e74705SXin Li   return vmull_lane_s16(a, v, 0);
2488*67e74705SXin Li }
2489*67e74705SXin Li 
2490*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 {
2491*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2492*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2493*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2494*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2495*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2496*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2497*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32_0(int32x2_t a,int32x2_t v)2498*67e74705SXin Li int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) {
2499*67e74705SXin Li   return vmull_lane_s32(a, v, 0);
2500*67e74705SXin Li }
2501*67e74705SXin Li 
2502*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 {
2503*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2504*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2505*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2506*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2507*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2508*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2509*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16_0(uint16x4_t a,uint16x4_t v)2510*67e74705SXin Li uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) {
2511*67e74705SXin Li   return vmull_lane_u16(a, v, 0);
2512*67e74705SXin Li }
2513*67e74705SXin Li 
2514*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 {
2515*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2516*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2517*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2518*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2519*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2520*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2521*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32_0(uint32x2_t a,uint32x2_t v)2522*67e74705SXin Li uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) {
2523*67e74705SXin Li   return vmull_lane_u32(a, v, 0);
2524*67e74705SXin Li }
2525*67e74705SXin Li 
2526*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 {
2527*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2528*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2529*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2530*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2531*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2532*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2533*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2534*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_lane_s16_0(int16x8_t a,int16x4_t v)2535*67e74705SXin Li int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
2536*67e74705SXin Li   return vmull_high_lane_s16(a, v, 0);
2537*67e74705SXin Li }
2538*67e74705SXin Li 
2539*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 {
2540*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2541*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2542*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2543*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2544*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2545*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2546*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2547*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_lane_s32_0(int32x4_t a,int32x2_t v)2548*67e74705SXin Li int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
2549*67e74705SXin Li   return vmull_high_lane_s32(a, v, 0);
2550*67e74705SXin Li }
2551*67e74705SXin Li 
2552*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 {
2553*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2554*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2555*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2556*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2557*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2558*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2559*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2560*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_lane_u16_0(uint16x8_t a,uint16x4_t v)2561*67e74705SXin Li uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) {
2562*67e74705SXin Li   return vmull_high_lane_u16(a, v, 0);
2563*67e74705SXin Li }
2564*67e74705SXin Li 
2565*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 {
2566*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2567*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2568*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2569*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2570*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2571*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2572*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2573*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_lane_u32_0(uint32x4_t a,uint32x2_t v)2574*67e74705SXin Li uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) {
2575*67e74705SXin Li   return vmull_high_lane_u32(a, v, 0);
2576*67e74705SXin Li }
2577*67e74705SXin Li 
2578*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 {
2579*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2580*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2581*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2582*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2583*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2584*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2585*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_laneq_s16_0(int16x4_t a,int16x8_t v)2586*67e74705SXin Li int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
2587*67e74705SXin Li   return vmull_laneq_s16(a, v, 0);
2588*67e74705SXin Li }
2589*67e74705SXin Li 
2590*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 {
2591*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2592*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2593*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2594*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2595*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2596*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2597*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_laneq_s32_0(int32x2_t a,int32x4_t v)2598*67e74705SXin Li int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
2599*67e74705SXin Li   return vmull_laneq_s32(a, v, 0);
2600*67e74705SXin Li }
2601*67e74705SXin Li 
2602*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 {
2603*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2604*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2605*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2606*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2607*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2608*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2609*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_laneq_u16_0(uint16x4_t a,uint16x8_t v)2610*67e74705SXin Li uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
2611*67e74705SXin Li   return vmull_laneq_u16(a, v, 0);
2612*67e74705SXin Li }
2613*67e74705SXin Li 
2614*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 {
2615*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2616*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2617*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2618*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2619*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2620*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2621*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_laneq_u32_0(uint32x2_t a,uint32x4_t v)2622*67e74705SXin Li uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
2623*67e74705SXin Li   return vmull_laneq_u32(a, v, 0);
2624*67e74705SXin Li }
2625*67e74705SXin Li 
2626*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 {
2627*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2628*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2629*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2630*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2631*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2632*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2633*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2634*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_laneq_s16_0(int16x8_t a,int16x8_t v)2635*67e74705SXin Li int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
2636*67e74705SXin Li   return vmull_high_laneq_s16(a, v, 0);
2637*67e74705SXin Li }
2638*67e74705SXin Li 
2639*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 {
2640*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2641*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2642*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2643*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2644*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2645*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2646*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2647*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_laneq_s32_0(int32x4_t a,int32x4_t v)2648*67e74705SXin Li int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
2649*67e74705SXin Li   return vmull_high_laneq_s32(a, v, 0);
2650*67e74705SXin Li }
2651*67e74705SXin Li 
2652*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 {
2653*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2654*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2655*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2656*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2657*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2658*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2659*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2
2660*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_high_laneq_u16_0(uint16x8_t a,uint16x8_t v)2661*67e74705SXin Li uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
2662*67e74705SXin Li   return vmull_high_laneq_u16(a, v, 0);
2663*67e74705SXin Li }
2664*67e74705SXin Li 
2665*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 {
2666*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2667*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2668*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2669*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2670*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2671*67e74705SXin Li // CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2672*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2
2673*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_high_laneq_u32_0(uint32x4_t a,uint32x4_t v)2674*67e74705SXin Li uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
2675*67e74705SXin Li   return vmull_high_laneq_u32(a, v, 0);
2676*67e74705SXin Li }
2677*67e74705SXin Li 
2678*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2679*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2680*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2681*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2682*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2683*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2684*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2685*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
2686*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2687*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
2688*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16_0(int32x4_t a,int16x4_t b,int16x4_t v)2689*67e74705SXin Li int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2690*67e74705SXin Li   return vqdmlal_lane_s16(a, b, v, 0);
2691*67e74705SXin Li }
2692*67e74705SXin Li 
2693*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2694*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2695*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2696*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2697*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2698*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2699*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
2700*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
2701*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
2702*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
2703*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32_0(int64x2_t a,int32x2_t b,int32x2_t v)2704*67e74705SXin Li int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2705*67e74705SXin Li   return vqdmlal_lane_s32(a, b, v, 0);
2706*67e74705SXin Li }
2707*67e74705SXin Li 
2708*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2709*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2710*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2711*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2712*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2713*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2714*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2715*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2716*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
2717*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2718*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
2719*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_high_lane_s16_0(int32x4_t a,int16x8_t b,int16x4_t v)2720*67e74705SXin Li int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2721*67e74705SXin Li   return vqdmlal_high_lane_s16(a, b, v, 0);
2722*67e74705SXin Li }
2723*67e74705SXin Li 
2724*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2725*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2726*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2727*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2728*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2729*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2730*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2731*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
2732*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
2733*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
2734*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
2735*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_high_lane_s32_0(int64x2_t a,int32x4_t b,int32x2_t v)2736*67e74705SXin Li int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2737*67e74705SXin Li   return vqdmlal_high_lane_s32(a, b, v, 0);
2738*67e74705SXin Li }
2739*67e74705SXin Li 
2740*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 {
2741*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2742*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2743*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2744*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2745*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2746*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2747*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
2748*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2749*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
2750*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16_0(int32x4_t a,int16x4_t b,int16x4_t v)2751*67e74705SXin Li int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
2752*67e74705SXin Li   return vqdmlsl_lane_s16(a, b, v, 0);
2753*67e74705SXin Li }
2754*67e74705SXin Li 
2755*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 {
2756*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2757*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2758*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2759*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2760*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2761*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
2762*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
2763*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
2764*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
2765*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32_0(int64x2_t a,int32x2_t b,int32x2_t v)2766*67e74705SXin Li int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
2767*67e74705SXin Li   return vqdmlsl_lane_s32(a, b, v, 0);
2768*67e74705SXin Li }
2769*67e74705SXin Li 
2770*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 {
2771*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2772*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2773*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2774*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2775*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2776*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2777*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
2778*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
2779*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2780*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
2781*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_lane_s16_0(int32x4_t a,int16x8_t b,int16x4_t v)2782*67e74705SXin Li int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
2783*67e74705SXin Li   return vqdmlsl_high_lane_s16(a, b, v, 0);
2784*67e74705SXin Li }
2785*67e74705SXin Li 
2786*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 {
2787*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
2788*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2789*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2790*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2791*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2792*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2793*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
2794*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
2795*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
2796*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
2797*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_lane_s32_0(int64x2_t a,int32x4_t b,int32x2_t v)2798*67e74705SXin Li int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
2799*67e74705SXin Li   return vqdmlsl_high_lane_s32(a, b, v, 0);
2800*67e74705SXin Li }
2801*67e74705SXin Li 
2802*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 {
2803*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2804*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2805*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2806*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2807*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2808*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
2809*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
2810*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
2811*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_lane_s16_0(int16x4_t a,int16x4_t v)2812*67e74705SXin Li int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) {
2813*67e74705SXin Li   return vqdmull_lane_s16(a, v, 0);
2814*67e74705SXin Li }
2815*67e74705SXin Li 
2816*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 {
2817*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2818*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2819*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2820*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2821*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2822*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
2823*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
2824*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
2825*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_lane_s32_0(int32x2_t a,int32x2_t v)2826*67e74705SXin Li int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) {
2827*67e74705SXin Li   return vqdmull_lane_s32(a, v, 0);
2828*67e74705SXin Li }
2829*67e74705SXin Li 
2830*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 {
2831*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2832*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2833*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2834*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2835*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2836*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
2837*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
2838*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
2839*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_laneq_s16_0(int16x4_t a,int16x8_t v)2840*67e74705SXin Li int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
2841*67e74705SXin Li   return vqdmull_laneq_s16(a, v, 0);
2842*67e74705SXin Li }
2843*67e74705SXin Li 
2844*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 {
2845*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2846*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2847*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2848*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2849*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2850*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
2851*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
2852*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
2853*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_laneq_s32_0(int32x2_t a,int32x4_t v)2854*67e74705SXin Li int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
2855*67e74705SXin Li   return vqdmull_laneq_s32(a, v, 0);
2856*67e74705SXin Li }
2857*67e74705SXin Li 
2858*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 {
2859*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2860*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2861*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2862*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2863*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2864*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2865*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
2866*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
2867*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
2868*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_lane_s16_0(int16x8_t a,int16x4_t v)2869*67e74705SXin Li int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
2870*67e74705SXin Li   return vqdmull_high_lane_s16(a, v, 0);
2871*67e74705SXin Li }
2872*67e74705SXin Li 
2873*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 {
2874*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2875*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2876*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2877*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2878*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2879*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2880*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
2881*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
2882*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
2883*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_lane_s32_0(int32x4_t a,int32x2_t v)2884*67e74705SXin Li int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
2885*67e74705SXin Li   return vqdmull_high_lane_s32(a, v, 0);
2886*67e74705SXin Li }
2887*67e74705SXin Li 
2888*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 {
2889*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2890*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
2891*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
2892*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2893*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2894*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2895*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2
2896*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
2897*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
2898*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_laneq_s16_0(int16x8_t a,int16x8_t v)2899*67e74705SXin Li int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
2900*67e74705SXin Li   return vqdmull_high_laneq_s16(a, v, 0);
2901*67e74705SXin Li }
2902*67e74705SXin Li 
2903*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 {
2904*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
2905*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
2906*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
2907*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2908*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2909*67e74705SXin Li // CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2910*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2
2911*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
2912*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
2913*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_laneq_s32_0(int32x4_t a,int32x4_t v)2914*67e74705SXin Li int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
2915*67e74705SXin Li   return vqdmull_high_laneq_s32(a, v, 0);
2916*67e74705SXin Li }
2917*67e74705SXin Li 
2918*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 {
2919*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2920*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2921*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2922*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2923*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2924*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2
2925*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
2926*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
2927*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_lane_s16_0(int16x4_t a,int16x4_t v)2928*67e74705SXin Li int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
2929*67e74705SXin Li   return vqdmulh_lane_s16(a, v, 0);
2930*67e74705SXin Li }
2931*67e74705SXin Li 
2932*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 {
2933*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
2934*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2935*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
2936*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2937*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2938*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2
2939*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
2940*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
2941*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_lane_s16_0(int16x8_t a,int16x4_t v)2942*67e74705SXin Li int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
2943*67e74705SXin Li   return vqdmulhq_lane_s16(a, v, 0);
2944*67e74705SXin Li }
2945*67e74705SXin Li 
2946*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 {
2947*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
2948*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2949*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
2950*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2951*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2952*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2
2953*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
2954*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
2955*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_lane_s32_0(int32x2_t a,int32x2_t v)2956*67e74705SXin Li int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
2957*67e74705SXin Li   return vqdmulh_lane_s32(a, v, 0);
2958*67e74705SXin Li }
2959*67e74705SXin Li 
2960*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 {
2961*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
2962*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2963*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
2964*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2965*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2966*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2
2967*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
2968*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
2969*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_lane_s32_0(int32x4_t a,int32x2_t v)2970*67e74705SXin Li int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
2971*67e74705SXin Li   return vqdmulhq_lane_s32(a, v, 0);
2972*67e74705SXin Li }
2973*67e74705SXin Li 
2974*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 {
2975*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
2976*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2977*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
2978*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2979*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2980*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2
2981*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
2982*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
2983*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_lane_s16_0(int16x4_t a,int16x4_t v)2984*67e74705SXin Li int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
2985*67e74705SXin Li   return vqrdmulh_lane_s16(a, v, 0);
2986*67e74705SXin Li }
2987*67e74705SXin Li 
2988*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 {
2989*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
2990*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2991*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
2992*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2993*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2994*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2
2995*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
2996*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
2997*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_lane_s16_0(int16x8_t a,int16x4_t v)2998*67e74705SXin Li int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
2999*67e74705SXin Li   return vqrdmulhq_lane_s16(a, v, 0);
3000*67e74705SXin Li }
3001*67e74705SXin Li 
3002*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 {
3003*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
3004*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3005*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
3006*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3007*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3008*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2
3009*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
3010*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
3011*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_lane_s32_0(int32x2_t a,int32x2_t v)3012*67e74705SXin Li int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
3013*67e74705SXin Li   return vqrdmulh_lane_s32(a, v, 0);
3014*67e74705SXin Li }
3015*67e74705SXin Li 
3016*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 {
3017*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
3018*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3019*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
3020*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3021*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3022*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2
3023*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
3024*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
3025*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_lane_s32_0(int32x4_t a,int32x2_t v)3026*67e74705SXin Li int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
3027*67e74705SXin Li   return vqrdmulhq_lane_s32(a, v, 0);
3028*67e74705SXin Li }
3029*67e74705SXin Li 
3030*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 {
3031*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
3032*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
3033*67e74705SXin Li // CHECK:   ret <2 x float> [[MUL]]
test_vmul_lane_f32_0(float32x2_t a,float32x2_t v)3034*67e74705SXin Li float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) {
3035*67e74705SXin Li   return vmul_lane_f32(a, v, 0);
3036*67e74705SXin Li }
3037*67e74705SXin Li 
3038*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 {
3039*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
3040*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
3041*67e74705SXin Li // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_lane_f32_0(float32x4_t a,float32x2_t v)3042*67e74705SXin Li float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) {
3043*67e74705SXin Li   return vmulq_lane_f32(a, v, 0);
3044*67e74705SXin Li }
3045*67e74705SXin Li 
3046*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 {
3047*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
3048*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
3049*67e74705SXin Li // CHECK:   ret <2 x float> [[MUL]]
test_vmul_laneq_f32_0(float32x2_t a,float32x4_t v)3050*67e74705SXin Li float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) {
3051*67e74705SXin Li   return vmul_laneq_f32(a, v, 0);
3052*67e74705SXin Li }
3053*67e74705SXin Li 
3054*67e74705SXin Li // CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) #0 {
3055*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
3056*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8>
3057*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
3058*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
3059*67e74705SXin Li // CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
3060*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
3061*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
3062*67e74705SXin Li // CHECK:   ret <1 x double> [[TMP5]]
test_vmul_laneq_f64_0(float64x1_t a,float64x2_t v)3063*67e74705SXin Li float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
3064*67e74705SXin Li   return vmul_laneq_f64(a, v, 0);
3065*67e74705SXin Li }
3066*67e74705SXin Li 
3067*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 {
3068*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
3069*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
3070*67e74705SXin Li // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_laneq_f32_0(float32x4_t a,float32x4_t v)3071*67e74705SXin Li float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) {
3072*67e74705SXin Li   return vmulq_laneq_f32(a, v, 0);
3073*67e74705SXin Li }
3074*67e74705SXin Li 
3075*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 {
3076*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer
3077*67e74705SXin Li // CHECK:   [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]]
3078*67e74705SXin Li // CHECK:   ret <2 x double> [[MUL]]
test_vmulq_laneq_f64_0(float64x2_t a,float64x2_t v)3079*67e74705SXin Li float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) {
3080*67e74705SXin Li   return vmulq_laneq_f64(a, v, 0);
3081*67e74705SXin Li }
3082*67e74705SXin Li 
3083*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 {
3084*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
3085*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3086*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8>
3087*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3088*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3089*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2
3090*67e74705SXin Li // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_lane_f32_0(float32x2_t a,float32x2_t v)3091*67e74705SXin Li float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) {
3092*67e74705SXin Li   return vmulx_lane_f32(a, v, 0);
3093*67e74705SXin Li }
3094*67e74705SXin Li 
3095*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 {
3096*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
3097*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3098*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8>
3099*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3100*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3101*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2
3102*67e74705SXin Li // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_lane_f32_0(float32x4_t a,float32x2_t v)3103*67e74705SXin Li float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) {
3104*67e74705SXin Li   return vmulxq_lane_f32(a, v, 0);
3105*67e74705SXin Li }
3106*67e74705SXin Li 
3107*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) #0 {
3108*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer
3109*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3110*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8>
3111*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
3112*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
3113*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2
3114*67e74705SXin Li // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_lane_f64_0(float64x2_t a,float64x1_t v)3115*67e74705SXin Li float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) {
3116*67e74705SXin Li   return vmulxq_lane_f64(a, v, 0);
3117*67e74705SXin Li }
3118*67e74705SXin Li 
3119*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 {
3120*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
3121*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3122*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8>
3123*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3124*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3125*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2
3126*67e74705SXin Li // CHECK:   ret <2 x float> [[VMULX2_I]]
test_vmulx_laneq_f32_0(float32x2_t a,float32x4_t v)3127*67e74705SXin Li float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) {
3128*67e74705SXin Li   return vmulx_laneq_f32(a, v, 0);
3129*67e74705SXin Li }
3130*67e74705SXin Li 
3131*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 {
3132*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
3133*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3134*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8>
3135*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3136*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3137*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2
3138*67e74705SXin Li // CHECK:   ret <4 x float> [[VMULX2_I]]
test_vmulxq_laneq_f32_0(float32x4_t a,float32x4_t v)3139*67e74705SXin Li float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) {
3140*67e74705SXin Li   return vmulxq_laneq_f32(a, v, 0);
3141*67e74705SXin Li }
3142*67e74705SXin Li 
3143*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 {
3144*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer
3145*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3146*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8>
3147*67e74705SXin Li // CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
3148*67e74705SXin Li // CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
3149*67e74705SXin Li // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2
3150*67e74705SXin Li // CHECK:   ret <2 x double> [[VMULX2_I]]
test_vmulxq_laneq_f64_0(float64x2_t a,float64x2_t v)3151*67e74705SXin Li float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
3152*67e74705SXin Li   return vmulxq_laneq_f64(a, v, 0);
3153*67e74705SXin Li }
3154*67e74705SXin Li 
3155*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
3156*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3157*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3158*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3159*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1
3160*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2
3161*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3
3162*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3163*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3164*67e74705SXin Li // CHECK:   [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3165*67e74705SXin Li // CHECK:   [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2
3166*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL5_I_I]]
test_vmull_high_n_s16(int16x8_t a,int16_t b)3167*67e74705SXin Li int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
3168*67e74705SXin Li   return vmull_high_n_s16(a, b);
3169*67e74705SXin Li }
3170*67e74705SXin Li 
3171*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
3172*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3173*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3174*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3175*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1
3176*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3177*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3178*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3179*67e74705SXin Li // CHECK:   [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2
3180*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL3_I_I]]
test_vmull_high_n_s32(int32x4_t a,int32_t b)3181*67e74705SXin Li int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
3182*67e74705SXin Li   return vmull_high_n_s32(a, b);
3183*67e74705SXin Li }
3184*67e74705SXin Li 
3185*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 {
3186*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3187*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3188*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3189*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1
3190*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2
3191*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3
3192*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3193*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3194*67e74705SXin Li // CHECK:   [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3195*67e74705SXin Li // CHECK:   [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2
3196*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL5_I_I]]
test_vmull_high_n_u16(uint16x8_t a,uint16_t b)3197*67e74705SXin Li uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
3198*67e74705SXin Li   return vmull_high_n_u16(a, b);
3199*67e74705SXin Li }
3200*67e74705SXin Li 
3201*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 {
3202*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3203*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3204*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3205*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1
3206*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3207*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3208*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3209*67e74705SXin Li // CHECK:   [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2
3210*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL3_I_I]]
test_vmull_high_n_u32(uint32x4_t a,uint32_t b)3211*67e74705SXin Li uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
3212*67e74705SXin Li   return vmull_high_n_u32(a, b);
3213*67e74705SXin Li }
3214*67e74705SXin Li 
3215*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
3216*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3217*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3218*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3219*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1
3220*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2
3221*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3
3222*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3223*67e74705SXin Li // CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3224*67e74705SXin Li // CHECK:   [[VQDMULL_V4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3225*67e74705SXin Li // CHECK:   [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V4_I_I]]) #2
3226*67e74705SXin Li // CHECK:   [[VQDMULL_V6_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I_I]] to <16 x i8>
3227*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I_I]] to <4 x i32>
3228*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_high_n_s16(int16x8_t a,int16_t b)3229*67e74705SXin Li int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
3230*67e74705SXin Li   return vqdmull_high_n_s16(a, b);
3231*67e74705SXin Li }
3232*67e74705SXin Li 
3233*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
3234*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3235*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3236*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3237*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1
3238*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3239*67e74705SXin Li // CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3240*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3241*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V2_I_I]]) #2
3242*67e74705SXin Li // CHECK:   [[VQDMULL_V4_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I_I]] to <16 x i8>
3243*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I_I]] to <2 x i64>
3244*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_high_n_s32(int32x4_t a,int32_t b)3245*67e74705SXin Li int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
3246*67e74705SXin Li   return vqdmull_high_n_s32(a, b);
3247*67e74705SXin Li }
3248*67e74705SXin Li 
3249*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3250*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3251*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3252*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3253*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3254*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3255*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3256*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3257*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3258*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3259*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2
3260*67e74705SXin Li // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
3261*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)3262*67e74705SXin Li int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
3263*67e74705SXin Li   return vmlal_high_n_s16(a, b, c);
3264*67e74705SXin Li }
3265*67e74705SXin Li 
3266*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3267*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3268*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3269*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3270*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3271*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3272*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3273*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3274*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2
3275*67e74705SXin Li // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
3276*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)3277*67e74705SXin Li int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
3278*67e74705SXin Li   return vmlal_high_n_s32(a, b, c);
3279*67e74705SXin Li }
3280*67e74705SXin Li 
3281*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3282*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3283*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3284*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3285*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3286*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3287*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3288*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3289*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3290*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3291*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2
3292*67e74705SXin Li // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
3293*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I_I]]
test_vmlal_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)3294*67e74705SXin Li uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
3295*67e74705SXin Li   return vmlal_high_n_u16(a, b, c);
3296*67e74705SXin Li }
3297*67e74705SXin Li 
3298*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3299*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3300*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3301*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3302*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3303*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3304*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3305*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3306*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2
3307*67e74705SXin Li // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
3308*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD_I_I]]
test_vmlal_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)3309*67e74705SXin Li uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
3310*67e74705SXin Li   return vmlal_high_n_u32(a, b, c);
3311*67e74705SXin Li }
3312*67e74705SXin Li 
3313*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3314*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3315*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3316*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3317*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3318*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3319*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3320*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3321*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3322*67e74705SXin Li // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3323*67e74705SXin Li // CHECK:   [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
3324*67e74705SXin Li // CHECK:   [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2
3325*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3326*67e74705SXin Li // CHECK:   [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2
3327*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V6_I_I]]
test_vqdmlal_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)3328*67e74705SXin Li int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
3329*67e74705SXin Li   return vqdmlal_high_n_s16(a, b, c);
3330*67e74705SXin Li }
3331*67e74705SXin Li 
3332*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3333*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3334*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3335*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3336*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3337*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3338*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3339*67e74705SXin Li // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3340*67e74705SXin Li // CHECK:   [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
3341*67e74705SXin Li // CHECK:   [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2
3342*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3343*67e74705SXin Li // CHECK:   [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2
3344*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V4_I_I]]
test_vqdmlal_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)3345*67e74705SXin Li int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
3346*67e74705SXin Li   return vqdmlal_high_n_s32(a, b, c);
3347*67e74705SXin Li }
3348*67e74705SXin Li 
3349*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3350*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3351*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3352*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3353*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3354*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3355*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3356*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3357*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3358*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3359*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2
3360*67e74705SXin Li // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
3361*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)3362*67e74705SXin Li int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
3363*67e74705SXin Li   return vmlsl_high_n_s16(a, b, c);
3364*67e74705SXin Li }
3365*67e74705SXin Li 
3366*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3367*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3368*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3369*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3370*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3371*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3372*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3373*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3374*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2
3375*67e74705SXin Li // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
3376*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)3377*67e74705SXin Li int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
3378*67e74705SXin Li   return vmlsl_high_n_s32(a, b, c);
3379*67e74705SXin Li }
3380*67e74705SXin Li 
3381*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3382*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3383*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3384*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3385*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3386*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3387*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3388*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3389*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3390*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3391*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2
3392*67e74705SXin Li // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
3393*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I_I]]
test_vmlsl_high_n_u16(uint32x4_t a,uint16x8_t b,uint16_t c)3394*67e74705SXin Li uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
3395*67e74705SXin Li   return vmlsl_high_n_u16(a, b, c);
3396*67e74705SXin Li }
3397*67e74705SXin Li 
3398*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3399*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3400*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3401*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3402*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3403*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3404*67e74705SXin Li // CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3405*67e74705SXin Li // CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3406*67e74705SXin Li // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2
3407*67e74705SXin Li // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
3408*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB_I_I]]
test_vmlsl_high_n_u32(uint64x2_t a,uint32x4_t b,uint32_t c)3409*67e74705SXin Li uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
3410*67e74705SXin Li   return vmlsl_high_n_u32(a, b, c);
3411*67e74705SXin Li }
3412*67e74705SXin Li 
3413*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
3414*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3415*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3416*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
3417*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3418*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1
3419*67e74705SXin Li // CHECK:   [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2
3420*67e74705SXin Li // CHECK:   [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3
3421*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8>
3422*67e74705SXin Li // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3423*67e74705SXin Li // CHECK:   [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
3424*67e74705SXin Li // CHECK:   [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2
3425*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3426*67e74705SXin Li // CHECK:   [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2
3427*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V6_I_I]]
test_vqdmlsl_high_n_s16(int32x4_t a,int16x8_t b,int16_t c)3428*67e74705SXin Li int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
3429*67e74705SXin Li   return vqdmlsl_high_n_s16(a, b, c);
3430*67e74705SXin Li }
3431*67e74705SXin Li 
3432*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
3433*67e74705SXin Li // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
3434*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3435*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
3436*67e74705SXin Li // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3437*67e74705SXin Li // CHECK:   [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1
3438*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8>
3439*67e74705SXin Li // CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3440*67e74705SXin Li // CHECK:   [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
3441*67e74705SXin Li // CHECK:   [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2
3442*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3443*67e74705SXin Li // CHECK:   [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2
3444*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V4_I_I]]
test_vqdmlsl_high_n_s32(int64x2_t a,int32x4_t b,int32_t c)3445*67e74705SXin Li int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
3446*67e74705SXin Li   return vqdmlsl_high_n_s32(a, b, c);
3447*67e74705SXin Li }
3448*67e74705SXin Li 
3449*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
3450*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
3451*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
3452*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
3453*67e74705SXin Li // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)3454*67e74705SXin Li float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
3455*67e74705SXin Li   return vmul_n_f32(a, b);
3456*67e74705SXin Li }
3457*67e74705SXin Li 
3458*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
3459*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
3460*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
3461*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
3462*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
3463*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
3464*67e74705SXin Li // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)3465*67e74705SXin Li float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
3466*67e74705SXin Li   return vmulq_n_f32(a, b);
3467*67e74705SXin Li }
3468*67e74705SXin Li 
3469*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 {
3470*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %b, i32 0
3471*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %b, i32 1
3472*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %a, [[VECINIT1_I]]
3473*67e74705SXin Li // CHECK:   ret <2 x double> [[MUL_I]]
test_vmulq_n_f64(float64x2_t a,float64_t b)3474*67e74705SXin Li float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) {
3475*67e74705SXin Li   return vmulq_n_f64(a, b);
3476*67e74705SXin Li }
3477*67e74705SXin Li 
3478*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
3479*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0
3480*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1
3481*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3482*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3483*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
3484*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3485*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3486*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
3487*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2
3488*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfma_n_f32(float32x2_t a,float32x2_t b,float32_t n)3489*67e74705SXin Li float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
3490*67e74705SXin Li   return vfma_n_f32(a, b, n);
3491*67e74705SXin Li }
3492*67e74705SXin Li 
3493*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
3494*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
3495*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
3496*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
3497*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3
3498*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3499*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3500*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
3501*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3502*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3503*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
3504*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2
3505*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmaq_n_f32(float32x4_t a,float32x4_t b,float32_t n)3506*67e74705SXin Li float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
3507*67e74705SXin Li   return vfmaq_n_f32(a, b, n);
3508*67e74705SXin Li }
3509*67e74705SXin Li 
3510*67e74705SXin Li // CHECK-LABEL: define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
3511*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
3512*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0
3513*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1
3514*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3515*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3516*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
3517*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3518*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3519*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
3520*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2
3521*67e74705SXin Li // CHECK:   ret <2 x float> [[TMP6]]
test_vfms_n_f32(float32x2_t a,float32x2_t b,float32_t n)3522*67e74705SXin Li float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
3523*67e74705SXin Li   return vfms_n_f32(a, b, n);
3524*67e74705SXin Li }
3525*67e74705SXin Li 
3526*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
3527*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
3528*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
3529*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
3530*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
3531*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3
3532*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3533*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3534*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
3535*67e74705SXin Li // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3536*67e74705SXin Li // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3537*67e74705SXin Li // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
3538*67e74705SXin Li // CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2
3539*67e74705SXin Li // CHECK:   ret <4 x float> [[TMP6]]
test_vfmsq_n_f32(float32x4_t a,float32x4_t b,float32_t n)3540*67e74705SXin Li float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
3541*67e74705SXin Li   return vfmsq_n_f32(a, b, n);
3542*67e74705SXin Li }
3543*67e74705SXin Li 
3544*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 %b) #0 {
3545*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3546*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3547*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3548*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3549*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
3550*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)3551*67e74705SXin Li int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
3552*67e74705SXin Li   return vmul_n_s16(a, b);
3553*67e74705SXin Li }
3554*67e74705SXin Li 
3555*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 %b) #0 {
3556*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
3557*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
3558*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
3559*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
3560*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
3561*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
3562*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
3563*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
3564*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
3565*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)3566*67e74705SXin Li int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
3567*67e74705SXin Li   return vmulq_n_s16(a, b);
3568*67e74705SXin Li }
3569*67e74705SXin Li 
3570*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 {
3571*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3572*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3573*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
3574*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)3575*67e74705SXin Li int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
3576*67e74705SXin Li   return vmul_n_s32(a, b);
3577*67e74705SXin Li }
3578*67e74705SXin Li 
3579*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 {
3580*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
3581*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
3582*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
3583*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
3584*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
3585*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)3586*67e74705SXin Li int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
3587*67e74705SXin Li   return vmulq_n_s32(a, b);
3588*67e74705SXin Li }
3589*67e74705SXin Li 
3590*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 %b) #0 {
3591*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3592*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3593*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3594*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3595*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
3596*67e74705SXin Li // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)3597*67e74705SXin Li uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
3598*67e74705SXin Li   return vmul_n_u16(a, b);
3599*67e74705SXin Li }
3600*67e74705SXin Li 
3601*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 %b) #0 {
3602*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
3603*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
3604*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
3605*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
3606*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
3607*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
3608*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
3609*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
3610*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
3611*67e74705SXin Li // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)3612*67e74705SXin Li uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
3613*67e74705SXin Li   return vmulq_n_u16(a, b);
3614*67e74705SXin Li }
3615*67e74705SXin Li 
3616*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 {
3617*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3618*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3619*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
3620*67e74705SXin Li // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)3621*67e74705SXin Li uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
3622*67e74705SXin Li   return vmul_n_u32(a, b);
3623*67e74705SXin Li }
3624*67e74705SXin Li 
3625*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 {
3626*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
3627*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
3628*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
3629*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
3630*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
3631*67e74705SXin Li // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)3632*67e74705SXin Li uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
3633*67e74705SXin Li   return vmulq_n_u32(a, b);
3634*67e74705SXin Li }
3635*67e74705SXin Li 
3636*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 %b) #0 {
3637*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3638*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3639*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3640*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3641*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3642*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3643*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3644*67e74705SXin Li // CHECK:   [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3645*67e74705SXin Li // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2
3646*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)3647*67e74705SXin Li int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
3648*67e74705SXin Li   return vmull_n_s16(a, b);
3649*67e74705SXin Li }
3650*67e74705SXin Li 
3651*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 {
3652*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3653*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3654*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3655*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3656*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3657*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3658*67e74705SXin Li // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2
3659*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)3660*67e74705SXin Li int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
3661*67e74705SXin Li   return vmull_n_s32(a, b);
3662*67e74705SXin Li }
3663*67e74705SXin Li 
3664*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 %b) #0 {
3665*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3666*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3667*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3668*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3669*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3670*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3671*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3672*67e74705SXin Li // CHECK:   [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3673*67e74705SXin Li // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2
3674*67e74705SXin Li // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)3675*67e74705SXin Li uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
3676*67e74705SXin Li   return vmull_n_u16(a, b);
3677*67e74705SXin Li }
3678*67e74705SXin Li 
3679*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 {
3680*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3681*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3682*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3683*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3684*67e74705SXin Li // CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3685*67e74705SXin Li // CHECK:   [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3686*67e74705SXin Li // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2
3687*67e74705SXin Li // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)3688*67e74705SXin Li uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
3689*67e74705SXin Li   return vmull_n_u32(a, b);
3690*67e74705SXin Li }
3691*67e74705SXin Li 
3692*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 %b) #0 {
3693*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3694*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3695*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3696*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3697*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3698*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3699*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3700*67e74705SXin Li // CHECK:   [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3701*67e74705SXin Li // CHECK:   [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #2
3702*67e74705SXin Li // CHECK:   [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
3703*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32>
3704*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)3705*67e74705SXin Li int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
3706*67e74705SXin Li   return vqdmull_n_s16(a, b);
3707*67e74705SXin Li }
3708*67e74705SXin Li 
3709*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 {
3710*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3711*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3712*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3713*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3714*67e74705SXin Li // CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3715*67e74705SXin Li // CHECK:   [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3716*67e74705SXin Li // CHECK:   [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #2
3717*67e74705SXin Li // CHECK:   [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
3718*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64>
3719*67e74705SXin Li // CHECK:   ret <2 x i64> [[TMP2]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)3720*67e74705SXin Li int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
3721*67e74705SXin Li   return vqdmull_n_s32(a, b);
3722*67e74705SXin Li }
3723*67e74705SXin Li 
3724*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 %b) #0 {
3725*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3726*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3727*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3728*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3729*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3730*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3731*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3732*67e74705SXin Li // CHECK:   [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3733*67e74705SXin Li // CHECK:   [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #2
3734*67e74705SXin Li // CHECK:   [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
3735*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16>
3736*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)3737*67e74705SXin Li int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
3738*67e74705SXin Li   return vqdmulh_n_s16(a, b);
3739*67e74705SXin Li }
3740*67e74705SXin Li 
3741*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 {
3742*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3743*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
3744*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
3745*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
3746*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
3747*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
3748*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
3749*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
3750*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
3751*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
3752*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3753*67e74705SXin Li // CHECK:   [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3754*67e74705SXin Li // CHECK:   [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #2
3755*67e74705SXin Li // CHECK:   [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
3756*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16>
3757*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)3758*67e74705SXin Li int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
3759*67e74705SXin Li   return vqdmulhq_n_s16(a, b);
3760*67e74705SXin Li }
3761*67e74705SXin Li 
3762*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
3763*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3764*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3765*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3766*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3767*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3768*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3769*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #2
3770*67e74705SXin Li // CHECK:   [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
3771*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32>
3772*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)3773*67e74705SXin Li int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
3774*67e74705SXin Li   return vqdmulh_n_s32(a, b);
3775*67e74705SXin Li }
3776*67e74705SXin Li 
3777*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
3778*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3779*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
3780*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
3781*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
3782*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
3783*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
3784*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3785*67e74705SXin Li // CHECK:   [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3786*67e74705SXin Li // CHECK:   [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #2
3787*67e74705SXin Li // CHECK:   [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
3788*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32>
3789*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)3790*67e74705SXin Li int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
3791*67e74705SXin Li   return vqdmulhq_n_s32(a, b);
3792*67e74705SXin Li }
3793*67e74705SXin Li 
3794*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 %b) #0 {
3795*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3796*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
3797*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
3798*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
3799*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
3800*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3801*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3802*67e74705SXin Li // CHECK:   [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3803*67e74705SXin Li // CHECK:   [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #2
3804*67e74705SXin Li // CHECK:   [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
3805*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16>
3806*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)3807*67e74705SXin Li int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
3808*67e74705SXin Li   return vqrdmulh_n_s16(a, b);
3809*67e74705SXin Li }
3810*67e74705SXin Li 
3811*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 {
3812*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3813*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
3814*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
3815*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
3816*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
3817*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
3818*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
3819*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
3820*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
3821*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
3822*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3823*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3824*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #2
3825*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
3826*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16>
3827*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)3828*67e74705SXin Li int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
3829*67e74705SXin Li   return vqrdmulhq_n_s16(a, b);
3830*67e74705SXin Li }
3831*67e74705SXin Li 
3832*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
3833*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3834*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
3835*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
3836*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3837*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3838*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3839*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #2
3840*67e74705SXin Li // CHECK:   [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
3841*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32>
3842*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)3843*67e74705SXin Li int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
3844*67e74705SXin Li   return vqrdmulh_n_s32(a, b);
3845*67e74705SXin Li }
3846*67e74705SXin Li 
3847*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
3848*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3849*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
3850*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
3851*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
3852*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
3853*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
3854*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3855*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3856*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #2
3857*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
3858*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32>
3859*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)3860*67e74705SXin Li int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
3861*67e74705SXin Li   return vqrdmulhq_n_s32(a, b);
3862*67e74705SXin Li }
3863*67e74705SXin Li 
3864*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
3865*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3866*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
3867*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
3868*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
3869*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
3870*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
3871*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)3872*67e74705SXin Li int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
3873*67e74705SXin Li   return vmla_n_s16(a, b, c);
3874*67e74705SXin Li }
3875*67e74705SXin Li 
3876*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
3877*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
3878*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
3879*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
3880*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
3881*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
3882*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
3883*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
3884*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
3885*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
3886*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
3887*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)3888*67e74705SXin Li int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
3889*67e74705SXin Li   return vmlaq_n_s16(a, b, c);
3890*67e74705SXin Li }
3891*67e74705SXin Li 
3892*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
3893*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3894*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
3895*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
3896*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
3897*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)3898*67e74705SXin Li int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
3899*67e74705SXin Li   return vmla_n_s32(a, b, c);
3900*67e74705SXin Li }
3901*67e74705SXin Li 
3902*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
3903*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
3904*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
3905*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
3906*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
3907*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
3908*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
3909*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)3910*67e74705SXin Li int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
3911*67e74705SXin Li   return vmlaq_n_s32(a, b, c);
3912*67e74705SXin Li }
3913*67e74705SXin Li 
3914*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
3915*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3916*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
3917*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
3918*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
3919*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
3920*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
3921*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)3922*67e74705SXin Li uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
3923*67e74705SXin Li   return vmla_n_u16(a, b, c);
3924*67e74705SXin Li }
3925*67e74705SXin Li 
3926*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
3927*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
3928*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
3929*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
3930*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
3931*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
3932*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
3933*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
3934*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
3935*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
3936*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
3937*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)3938*67e74705SXin Li uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
3939*67e74705SXin Li   return vmlaq_n_u16(a, b, c);
3940*67e74705SXin Li }
3941*67e74705SXin Li 
3942*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
3943*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3944*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
3945*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
3946*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
3947*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)3948*67e74705SXin Li uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
3949*67e74705SXin Li   return vmla_n_u32(a, b, c);
3950*67e74705SXin Li }
3951*67e74705SXin Li 
3952*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
3953*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
3954*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
3955*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
3956*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
3957*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
3958*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
3959*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)3960*67e74705SXin Li uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
3961*67e74705SXin Li   return vmlaq_n_u32(a, b, c);
3962*67e74705SXin Li }
3963*67e74705SXin Li 
3964*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
3965*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3966*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
3967*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
3968*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
3969*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3970*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
3971*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3972*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3973*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2
3974*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
3975*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)3976*67e74705SXin Li int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
3977*67e74705SXin Li   return vmlal_n_s16(a, b, c);
3978*67e74705SXin Li }
3979*67e74705SXin Li 
3980*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
3981*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
3982*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
3983*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3984*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
3985*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3986*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3987*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2
3988*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
3989*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)3990*67e74705SXin Li int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
3991*67e74705SXin Li   return vmlal_n_s32(a, b, c);
3992*67e74705SXin Li }
3993*67e74705SXin Li 
3994*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
3995*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
3996*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
3997*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
3998*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
3999*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4000*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
4001*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4002*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4003*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2
4004*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
4005*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)4006*67e74705SXin Li uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
4007*67e74705SXin Li   return vmlal_n_u16(a, b, c);
4008*67e74705SXin Li }
4009*67e74705SXin Li 
4010*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
4011*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4012*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4013*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4014*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
4015*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4016*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4017*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2
4018*67e74705SXin Li // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
4019*67e74705SXin Li // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)4020*67e74705SXin Li uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
4021*67e74705SXin Li   return vmlal_n_u32(a, b, c);
4022*67e74705SXin Li }
4023*67e74705SXin Li 
4024*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
4025*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4026*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4027*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4028*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4029*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4030*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4031*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
4032*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4033*67e74705SXin Li // CHECK:   [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4034*67e74705SXin Li // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2
4035*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4036*67e74705SXin Li // CHECK:   [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2
4037*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)4038*67e74705SXin Li int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
4039*67e74705SXin Li   return vqdmlal_n_s16(a, b, c);
4040*67e74705SXin Li }
4041*67e74705SXin Li 
4042*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
4043*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4044*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4045*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4046*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4047*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
4048*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4049*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4050*67e74705SXin Li // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2
4051*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4052*67e74705SXin Li // CHECK:   [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2
4053*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)4054*67e74705SXin Li int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
4055*67e74705SXin Li   return vqdmlal_n_s32(a, b, c);
4056*67e74705SXin Li }
4057*67e74705SXin Li 
4058*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
4059*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4060*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4061*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4062*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4063*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
4064*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
4065*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)4066*67e74705SXin Li int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
4067*67e74705SXin Li   return vmls_n_s16(a, b, c);
4068*67e74705SXin Li }
4069*67e74705SXin Li 
4070*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
4071*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
4072*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
4073*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
4074*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
4075*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
4076*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
4077*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
4078*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
4079*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
4080*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
4081*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)4082*67e74705SXin Li int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
4083*67e74705SXin Li   return vmlsq_n_s16(a, b, c);
4084*67e74705SXin Li }
4085*67e74705SXin Li 
4086*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
4087*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4088*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4089*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
4090*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
4091*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)4092*67e74705SXin Li int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
4093*67e74705SXin Li   return vmls_n_s32(a, b, c);
4094*67e74705SXin Li }
4095*67e74705SXin Li 
4096*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
4097*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
4098*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
4099*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
4100*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
4101*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
4102*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
4103*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)4104*67e74705SXin Li int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
4105*67e74705SXin Li   return vmlsq_n_s32(a, b, c);
4106*67e74705SXin Li }
4107*67e74705SXin Li 
4108*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
4109*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4110*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4111*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4112*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4113*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
4114*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
4115*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)4116*67e74705SXin Li uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
4117*67e74705SXin Li   return vmls_n_u16(a, b, c);
4118*67e74705SXin Li }
4119*67e74705SXin Li 
4120*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
4121*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
4122*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
4123*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
4124*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
4125*67e74705SXin Li // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
4126*67e74705SXin Li // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
4127*67e74705SXin Li // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
4128*67e74705SXin Li // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
4129*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
4130*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
4131*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)4132*67e74705SXin Li uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
4133*67e74705SXin Li   return vmlsq_n_u16(a, b, c);
4134*67e74705SXin Li }
4135*67e74705SXin Li 
4136*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
4137*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4138*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4139*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
4140*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
4141*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)4142*67e74705SXin Li uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
4143*67e74705SXin Li   return vmls_n_u32(a, b, c);
4144*67e74705SXin Li }
4145*67e74705SXin Li 
4146*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
4147*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
4148*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
4149*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
4150*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
4151*67e74705SXin Li // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
4152*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
4153*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)4154*67e74705SXin Li uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
4155*67e74705SXin Li   return vmlsq_n_u32(a, b, c);
4156*67e74705SXin Li }
4157*67e74705SXin Li 
4158*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
4159*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4160*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4161*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4162*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4163*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4164*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
4165*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4166*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4167*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2
4168*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
4169*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)4170*67e74705SXin Li int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
4171*67e74705SXin Li   return vmlsl_n_s16(a, b, c);
4172*67e74705SXin Li }
4173*67e74705SXin Li 
4174*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
4175*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4176*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4177*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4178*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
4179*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4180*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4181*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2
4182*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
4183*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)4184*67e74705SXin Li int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
4185*67e74705SXin Li   return vmlsl_n_s32(a, b, c);
4186*67e74705SXin Li }
4187*67e74705SXin Li 
4188*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
4189*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4190*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4191*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4192*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4193*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4194*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
4195*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4196*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4197*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2
4198*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
4199*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)4200*67e74705SXin Li uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
4201*67e74705SXin Li   return vmlsl_n_u16(a, b, c);
4202*67e74705SXin Li }
4203*67e74705SXin Li 
4204*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
4205*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4206*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4207*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4208*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
4209*67e74705SXin Li // CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4210*67e74705SXin Li // CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4211*67e74705SXin Li // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2
4212*67e74705SXin Li // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
4213*67e74705SXin Li // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)4214*67e74705SXin Li uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
4215*67e74705SXin Li   return vmlsl_n_u32(a, b, c);
4216*67e74705SXin Li }
4217*67e74705SXin Li 
4218*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
4219*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4220*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4221*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
4222*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
4223*67e74705SXin Li // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
4224*67e74705SXin Li // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
4225*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
4226*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4227*67e74705SXin Li // CHECK:   [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4228*67e74705SXin Li // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2
4229*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4230*67e74705SXin Li // CHECK:   [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2
4231*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)4232*67e74705SXin Li int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
4233*67e74705SXin Li   return vqdmlsl_n_s16(a, b, c);
4234*67e74705SXin Li }
4235*67e74705SXin Li 
4236*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
4237*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4238*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4239*67e74705SXin Li // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
4240*67e74705SXin Li // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
4241*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
4242*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4243*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4244*67e74705SXin Li // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2
4245*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4246*67e74705SXin Li // CHECK:   [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2
4247*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)4248*67e74705SXin Li int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
4249*67e74705SXin Li   return vqdmlsl_n_s32(a, b, c);
4250*67e74705SXin Li }
4251*67e74705SXin Li 
4252*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
4253*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
4254*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4255*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
4256*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_u16_0(uint16x4_t a,uint16x4_t b,uint16x4_t v)4257*67e74705SXin Li uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
4258*67e74705SXin Li   return vmla_lane_u16(a, b, v, 0);
4259*67e74705SXin Li }
4260*67e74705SXin Li 
4261*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
4262*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
4263*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4264*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
4265*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16_0(uint16x8_t a,uint16x8_t b,uint16x4_t v)4266*67e74705SXin Li uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
4267*67e74705SXin Li   return vmlaq_lane_u16(a, b, v, 0);
4268*67e74705SXin Li }
4269*67e74705SXin Li 
4270*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
4271*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
4272*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4273*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
4274*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_u32_0(uint32x2_t a,uint32x2_t b,uint32x2_t v)4275*67e74705SXin Li uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
4276*67e74705SXin Li   return vmla_lane_u32(a, b, v, 0);
4277*67e74705SXin Li }
4278*67e74705SXin Li 
4279*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
4280*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
4281*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4282*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
4283*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32_0(uint32x4_t a,uint32x4_t b,uint32x2_t v)4284*67e74705SXin Li uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
4285*67e74705SXin Li   return vmlaq_lane_u32(a, b, v, 0);
4286*67e74705SXin Li }
4287*67e74705SXin Li 
4288*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4289*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4290*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4291*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
4292*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_laneq_u16_0(uint16x4_t a,uint16x4_t b,uint16x8_t v)4293*67e74705SXin Li uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
4294*67e74705SXin Li   return vmla_laneq_u16(a, b, v, 0);
4295*67e74705SXin Li }
4296*67e74705SXin Li 
4297*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4298*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
4299*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4300*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
4301*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_laneq_u16_0(uint16x8_t a,uint16x8_t b,uint16x8_t v)4302*67e74705SXin Li uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
4303*67e74705SXin Li   return vmlaq_laneq_u16(a, b, v, 0);
4304*67e74705SXin Li }
4305*67e74705SXin Li 
4306*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4307*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4308*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4309*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
4310*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_laneq_u32_0(uint32x2_t a,uint32x2_t b,uint32x4_t v)4311*67e74705SXin Li uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
4312*67e74705SXin Li   return vmla_laneq_u32(a, b, v, 0);
4313*67e74705SXin Li }
4314*67e74705SXin Li 
4315*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4316*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
4317*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4318*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
4319*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_laneq_u32_0(uint32x4_t a,uint32x4_t b,uint32x4_t v)4320*67e74705SXin Li uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
4321*67e74705SXin Li   return vmlaq_laneq_u32(a, b, v, 0);
4322*67e74705SXin Li }
4323*67e74705SXin Li 
4324*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4325*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4326*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4327*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4328*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4329*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4330*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4331*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4332*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4333*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4334*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_laneq_s16_0(int32x4_t a,int16x4_t b,int16x8_t v)4335*67e74705SXin Li int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
4336*67e74705SXin Li   return vqdmlal_laneq_s16(a, b, v, 0);
4337*67e74705SXin Li }
4338*67e74705SXin Li 
4339*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4340*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4341*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4342*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4343*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4344*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4345*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4346*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4347*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4348*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4349*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_laneq_s32_0(int64x2_t a,int32x2_t b,int32x4_t v)4350*67e74705SXin Li int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
4351*67e74705SXin Li   return vqdmlal_laneq_s32(a, b, v, 0);
4352*67e74705SXin Li }
4353*67e74705SXin Li 
4354*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4355*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
4356*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4357*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4358*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
4359*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4360*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4361*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4362*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4363*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4364*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4365*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_high_laneq_s16_0(int32x4_t a,int16x8_t b,int16x8_t v)4366*67e74705SXin Li int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
4367*67e74705SXin Li   return vqdmlal_high_laneq_s16(a, b, v, 0);
4368*67e74705SXin Li }
4369*67e74705SXin Li 
4370*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4371*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
4372*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4373*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4374*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
4375*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4376*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4377*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4378*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4379*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4380*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4381*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_high_laneq_s32_0(int64x2_t a,int32x4_t b,int32x4_t v)4382*67e74705SXin Li int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
4383*67e74705SXin Li   return vqdmlal_high_laneq_s32(a, b, v, 0);
4384*67e74705SXin Li }
4385*67e74705SXin Li 
4386*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
4387*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer
4388*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4389*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
4390*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_u16_0(uint16x4_t a,uint16x4_t b,uint16x4_t v)4391*67e74705SXin Li uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
4392*67e74705SXin Li   return vmls_lane_u16(a, b, v, 0);
4393*67e74705SXin Li }
4394*67e74705SXin Li 
4395*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
4396*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
4397*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4398*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
4399*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16_0(uint16x8_t a,uint16x8_t b,uint16x4_t v)4400*67e74705SXin Li uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
4401*67e74705SXin Li   return vmlsq_lane_u16(a, b, v, 0);
4402*67e74705SXin Li }
4403*67e74705SXin Li 
4404*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
4405*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer
4406*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4407*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
4408*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_u32_0(uint32x2_t a,uint32x2_t b,uint32x2_t v)4409*67e74705SXin Li uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
4410*67e74705SXin Li   return vmls_lane_u32(a, b, v, 0);
4411*67e74705SXin Li }
4412*67e74705SXin Li 
4413*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
4414*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer
4415*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4416*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
4417*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32_0(uint32x4_t a,uint32x4_t b,uint32x2_t v)4418*67e74705SXin Li uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
4419*67e74705SXin Li   return vmlsq_lane_u32(a, b, v, 0);
4420*67e74705SXin Li }
4421*67e74705SXin Li 
4422*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4423*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4424*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4425*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
4426*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_laneq_u16_0(uint16x4_t a,uint16x4_t b,uint16x8_t v)4427*67e74705SXin Li uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
4428*67e74705SXin Li   return vmls_laneq_u16(a, b, v, 0);
4429*67e74705SXin Li }
4430*67e74705SXin Li 
4431*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4432*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
4433*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4434*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
4435*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_laneq_u16_0(uint16x8_t a,uint16x8_t b,uint16x8_t v)4436*67e74705SXin Li uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
4437*67e74705SXin Li   return vmlsq_laneq_u16(a, b, v, 0);
4438*67e74705SXin Li }
4439*67e74705SXin Li 
4440*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4441*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4442*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4443*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
4444*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_laneq_u32_0(uint32x2_t a,uint32x2_t b,uint32x4_t v)4445*67e74705SXin Li uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
4446*67e74705SXin Li   return vmls_laneq_u32(a, b, v, 0);
4447*67e74705SXin Li }
4448*67e74705SXin Li 
4449*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4450*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
4451*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4452*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
4453*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_laneq_u32_0(uint32x4_t a,uint32x4_t b,uint32x4_t v)4454*67e74705SXin Li uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
4455*67e74705SXin Li   return vmlsq_laneq_u32(a, b, v, 0);
4456*67e74705SXin Li }
4457*67e74705SXin Li 
4458*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4459*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4460*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4461*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4462*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4463*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4464*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4465*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4466*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4467*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4468*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_laneq_s16_0(int32x4_t a,int16x4_t b,int16x8_t v)4469*67e74705SXin Li int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
4470*67e74705SXin Li   return vqdmlsl_laneq_s16(a, b, v, 0);
4471*67e74705SXin Li }
4472*67e74705SXin Li 
4473*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4474*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4475*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4476*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4477*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4478*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4479*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4480*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4481*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4482*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4483*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_laneq_s32_0(int64x2_t a,int32x2_t b,int32x4_t v)4484*67e74705SXin Li int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
4485*67e74705SXin Li   return vqdmlsl_laneq_s32(a, b, v, 0);
4486*67e74705SXin Li }
4487*67e74705SXin Li 
4488*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4489*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
4490*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4491*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4492*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
4493*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4494*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4495*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4496*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4497*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4498*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4499*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_laneq_s16_0(int32x4_t a,int16x8_t b,int16x8_t v)4500*67e74705SXin Li int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
4501*67e74705SXin Li   return vqdmlsl_high_laneq_s16(a, b, v, 0);
4502*67e74705SXin Li }
4503*67e74705SXin Li 
4504*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4505*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
4506*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4507*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4508*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
4509*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4510*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4511*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4512*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4513*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4514*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4515*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_laneq_s32_0(int64x2_t a,int32x4_t b,int32x4_t v)4516*67e74705SXin Li int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
4517*67e74705SXin Li   return vqdmlsl_high_laneq_s32(a, b, v, 0);
4518*67e74705SXin Li }
4519*67e74705SXin Li 
4520*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 {
4521*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4522*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4523*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4524*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4525*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4526*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2
4527*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4528*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
4529*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_laneq_s16_0(int16x4_t a,int16x8_t v)4530*67e74705SXin Li int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) {
4531*67e74705SXin Li   return vqdmulh_laneq_s16(a, v, 0);
4532*67e74705SXin Li }
4533*67e74705SXin Li 
4534*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 {
4535*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
4536*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4537*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
4538*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4539*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4540*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2
4541*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4542*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
4543*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_laneq_s16_0(int16x8_t a,int16x8_t v)4544*67e74705SXin Li int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) {
4545*67e74705SXin Li   return vqdmulhq_laneq_s16(a, v, 0);
4546*67e74705SXin Li }
4547*67e74705SXin Li 
4548*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 {
4549*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4550*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4551*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4552*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4553*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4554*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2
4555*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4556*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
4557*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_laneq_s32_0(int32x2_t a,int32x4_t v)4558*67e74705SXin Li int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) {
4559*67e74705SXin Li   return vqdmulh_laneq_s32(a, v, 0);
4560*67e74705SXin Li }
4561*67e74705SXin Li 
4562*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 {
4563*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
4564*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4565*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
4566*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4567*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4568*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2
4569*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4570*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
4571*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_laneq_s32_0(int32x4_t a,int32x4_t v)4572*67e74705SXin Li int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) {
4573*67e74705SXin Li   return vqdmulhq_laneq_s32(a, v, 0);
4574*67e74705SXin Li }
4575*67e74705SXin Li 
4576*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 {
4577*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer
4578*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4579*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4580*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4581*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4582*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2
4583*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4584*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
4585*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_laneq_s16_0(int16x4_t a,int16x8_t v)4586*67e74705SXin Li int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) {
4587*67e74705SXin Li   return vqrdmulh_laneq_s16(a, v, 0);
4588*67e74705SXin Li }
4589*67e74705SXin Li 
4590*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 {
4591*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer
4592*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4593*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
4594*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4595*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4596*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2
4597*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4598*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
4599*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_laneq_s16_0(int16x8_t a,int16x8_t v)4600*67e74705SXin Li int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) {
4601*67e74705SXin Li   return vqrdmulhq_laneq_s16(a, v, 0);
4602*67e74705SXin Li }
4603*67e74705SXin Li 
4604*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 {
4605*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer
4606*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4607*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4608*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4609*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4610*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2
4611*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4612*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
4613*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_laneq_s32_0(int32x2_t a,int32x4_t v)4614*67e74705SXin Li int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) {
4615*67e74705SXin Li   return vqrdmulh_laneq_s32(a, v, 0);
4616*67e74705SXin Li }
4617*67e74705SXin Li 
4618*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 {
4619*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer
4620*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4621*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
4622*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4623*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4624*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2
4625*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4626*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
4627*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_laneq_s32_0(int32x4_t a,int32x4_t v)4628*67e74705SXin Li int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) {
4629*67e74705SXin Li   return vqrdmulhq_laneq_s32(a, v, 0);
4630*67e74705SXin Li }
4631*67e74705SXin Li 
4632*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
4633*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4634*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4635*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
4636*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t v)4637*67e74705SXin Li uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
4638*67e74705SXin Li   return vmla_lane_u16(a, b, v, 3);
4639*67e74705SXin Li }
4640*67e74705SXin Li 
4641*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
4642*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
4643*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4644*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
4645*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t v)4646*67e74705SXin Li uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
4647*67e74705SXin Li   return vmlaq_lane_u16(a, b, v, 3);
4648*67e74705SXin Li }
4649*67e74705SXin Li 
4650*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
4651*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
4652*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4653*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
4654*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t v)4655*67e74705SXin Li uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
4656*67e74705SXin Li   return vmla_lane_u32(a, b, v, 1);
4657*67e74705SXin Li }
4658*67e74705SXin Li 
4659*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
4660*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
4661*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4662*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
4663*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t v)4664*67e74705SXin Li uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
4665*67e74705SXin Li   return vmlaq_lane_u32(a, b, v, 1);
4666*67e74705SXin Li }
4667*67e74705SXin Li 
4668*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4669*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4670*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4671*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
4672*67e74705SXin Li // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_laneq_u16(uint16x4_t a,uint16x4_t b,uint16x8_t v)4673*67e74705SXin Li uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
4674*67e74705SXin Li   return vmla_laneq_u16(a, b, v, 7);
4675*67e74705SXin Li }
4676*67e74705SXin Li 
4677*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4678*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
4679*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4680*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
4681*67e74705SXin Li // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_laneq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t v)4682*67e74705SXin Li uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
4683*67e74705SXin Li   return vmlaq_laneq_u16(a, b, v, 7);
4684*67e74705SXin Li }
4685*67e74705SXin Li 
4686*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4687*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4688*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4689*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
4690*67e74705SXin Li // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_laneq_u32(uint32x2_t a,uint32x2_t b,uint32x4_t v)4691*67e74705SXin Li uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
4692*67e74705SXin Li   return vmla_laneq_u32(a, b, v, 3);
4693*67e74705SXin Li }
4694*67e74705SXin Li 
4695*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4696*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4697*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4698*67e74705SXin Li // CHECK:   [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
4699*67e74705SXin Li // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_laneq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t v)4700*67e74705SXin Li uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
4701*67e74705SXin Li   return vmlaq_laneq_u32(a, b, v, 3);
4702*67e74705SXin Li }
4703*67e74705SXin Li 
4704*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4705*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4706*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4707*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4708*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4709*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4710*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4711*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4712*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4713*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4714*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_laneq_s16(int32x4_t a,int16x4_t b,int16x8_t v)4715*67e74705SXin Li int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
4716*67e74705SXin Li   return vqdmlal_laneq_s16(a, b, v, 7);
4717*67e74705SXin Li }
4718*67e74705SXin Li 
4719*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4720*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4721*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4722*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4723*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4724*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4725*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4726*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4727*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4728*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4729*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_laneq_s32(int64x2_t a,int32x2_t b,int32x4_t v)4730*67e74705SXin Li int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
4731*67e74705SXin Li   return vqdmlal_laneq_s32(a, b, v, 3);
4732*67e74705SXin Li }
4733*67e74705SXin Li 
4734*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4735*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
4736*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4737*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4738*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
4739*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4740*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4741*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4742*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4743*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4744*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4745*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_high_laneq_s16(int32x4_t a,int16x8_t b,int16x8_t v)4746*67e74705SXin Li int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
4747*67e74705SXin Li   return vqdmlal_high_laneq_s16(a, b, v, 7);
4748*67e74705SXin Li }
4749*67e74705SXin Li 
4750*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4751*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
4752*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4753*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4754*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
4755*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4756*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4757*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4758*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4759*67e74705SXin Li // CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4760*67e74705SXin Li // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4761*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_high_laneq_s32(int64x2_t a,int32x4_t b,int32x4_t v)4762*67e74705SXin Li int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
4763*67e74705SXin Li   return vqdmlal_high_laneq_s32(a, b, v, 3);
4764*67e74705SXin Li }
4765*67e74705SXin Li 
4766*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 {
4767*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4768*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4769*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
4770*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t v)4771*67e74705SXin Li uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
4772*67e74705SXin Li   return vmls_lane_u16(a, b, v, 3);
4773*67e74705SXin Li }
4774*67e74705SXin Li 
4775*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 {
4776*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
4777*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4778*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
4779*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t v)4780*67e74705SXin Li uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
4781*67e74705SXin Li   return vmlsq_lane_u16(a, b, v, 3);
4782*67e74705SXin Li }
4783*67e74705SXin Li 
4784*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 {
4785*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1>
4786*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4787*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
4788*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t v)4789*67e74705SXin Li uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
4790*67e74705SXin Li   return vmls_lane_u32(a, b, v, 1);
4791*67e74705SXin Li }
4792*67e74705SXin Li 
4793*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 {
4794*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
4795*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4796*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
4797*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t v)4798*67e74705SXin Li uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
4799*67e74705SXin Li   return vmlsq_lane_u32(a, b, v, 1);
4800*67e74705SXin Li }
4801*67e74705SXin Li 
4802*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4803*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4804*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
4805*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
4806*67e74705SXin Li // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_laneq_u16(uint16x4_t a,uint16x4_t b,uint16x8_t v)4807*67e74705SXin Li uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
4808*67e74705SXin Li   return vmls_laneq_u16(a, b, v, 7);
4809*67e74705SXin Li }
4810*67e74705SXin Li 
4811*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4812*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
4813*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
4814*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
4815*67e74705SXin Li // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_laneq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t v)4816*67e74705SXin Li uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
4817*67e74705SXin Li   return vmlsq_laneq_u16(a, b, v, 7);
4818*67e74705SXin Li }
4819*67e74705SXin Li 
4820*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4821*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4822*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
4823*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
4824*67e74705SXin Li // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_laneq_u32(uint32x2_t a,uint32x2_t b,uint32x4_t v)4825*67e74705SXin Li uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
4826*67e74705SXin Li   return vmls_laneq_u32(a, b, v, 3);
4827*67e74705SXin Li }
4828*67e74705SXin Li 
4829*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4830*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4831*67e74705SXin Li // CHECK:   [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
4832*67e74705SXin Li // CHECK:   [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
4833*67e74705SXin Li // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_laneq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t v)4834*67e74705SXin Li uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
4835*67e74705SXin Li   return vmlsq_laneq_u32(a, b, v, 3);
4836*67e74705SXin Li }
4837*67e74705SXin Li 
4838*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 {
4839*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4840*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4841*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4842*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4843*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4844*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4845*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4846*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4847*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4848*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_laneq_s16(int32x4_t a,int16x4_t b,int16x8_t v)4849*67e74705SXin Li int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
4850*67e74705SXin Li   return vqdmlsl_laneq_s16(a, b, v, 7);
4851*67e74705SXin Li }
4852*67e74705SXin Li 
4853*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 {
4854*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4855*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4856*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4857*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4858*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4859*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4860*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4861*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4862*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4863*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_laneq_s32(int64x2_t a,int32x2_t b,int32x4_t v)4864*67e74705SXin Li int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
4865*67e74705SXin Li   return vqdmlsl_laneq_s32(a, b, v, 3);
4866*67e74705SXin Li }
4867*67e74705SXin Li 
4868*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 {
4869*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
4870*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4871*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4872*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
4873*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4874*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4875*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
4876*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2
4877*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4878*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2
4879*67e74705SXin Li // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_laneq_s16(int32x4_t a,int16x8_t b,int16x8_t v)4880*67e74705SXin Li int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
4881*67e74705SXin Li   return vqdmlsl_high_laneq_s16(a, b, v, 7);
4882*67e74705SXin Li }
4883*67e74705SXin Li 
4884*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 {
4885*67e74705SXin Li // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
4886*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4887*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4888*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
4889*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4890*67e74705SXin Li // CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4891*67e74705SXin Li // CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
4892*67e74705SXin Li // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2
4893*67e74705SXin Li // CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4894*67e74705SXin Li // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2
4895*67e74705SXin Li // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_high_laneq_s32(int64x2_t a,int32x4_t b,int32x4_t v)4896*67e74705SXin Li int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
4897*67e74705SXin Li   return vqdmlsl_high_laneq_s32(a, b, v, 3);
4898*67e74705SXin Li }
4899*67e74705SXin Li 
4900*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 {
4901*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4902*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4903*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4904*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4905*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4906*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2
4907*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4908*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
4909*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqdmulh_laneq_s16(int16x4_t a,int16x8_t v)4910*67e74705SXin Li int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) {
4911*67e74705SXin Li   return vqdmulh_laneq_s16(a, v, 7);
4912*67e74705SXin Li }
4913*67e74705SXin Li 
4914*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 {
4915*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
4916*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4917*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
4918*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4919*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4920*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2
4921*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4922*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
4923*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqdmulhq_laneq_s16(int16x8_t a,int16x8_t v)4924*67e74705SXin Li int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) {
4925*67e74705SXin Li   return vqdmulhq_laneq_s16(a, v, 7);
4926*67e74705SXin Li }
4927*67e74705SXin Li 
4928*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 {
4929*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4930*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4931*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4932*67e74705SXin Li // CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4933*67e74705SXin Li // CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4934*67e74705SXin Li // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2
4935*67e74705SXin Li // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4936*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
4937*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqdmulh_laneq_s32(int32x2_t a,int32x4_t v)4938*67e74705SXin Li int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) {
4939*67e74705SXin Li   return vqdmulh_laneq_s32(a, v, 3);
4940*67e74705SXin Li }
4941*67e74705SXin Li 
4942*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 {
4943*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4944*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4945*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
4946*67e74705SXin Li // CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4947*67e74705SXin Li // CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4948*67e74705SXin Li // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2
4949*67e74705SXin Li // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4950*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
4951*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqdmulhq_laneq_s32(int32x4_t a,int32x4_t v)4952*67e74705SXin Li int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) {
4953*67e74705SXin Li   return vqdmulhq_laneq_s32(a, v, 3);
4954*67e74705SXin Li }
4955*67e74705SXin Li 
4956*67e74705SXin Li // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 {
4957*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
4958*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4959*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
4960*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4961*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4962*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2
4963*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4964*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
4965*67e74705SXin Li // CHECK:   ret <4 x i16> [[TMP2]]
test_vqrdmulh_laneq_s16(int16x4_t a,int16x8_t v)4966*67e74705SXin Li int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) {
4967*67e74705SXin Li   return vqrdmulh_laneq_s16(a, v, 7);
4968*67e74705SXin Li }
4969*67e74705SXin Li 
4970*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 {
4971*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
4972*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4973*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
4974*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4975*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4976*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2
4977*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4978*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
4979*67e74705SXin Li // CHECK:   ret <8 x i16> [[TMP2]]
test_vqrdmulhq_laneq_s16(int16x8_t a,int16x8_t v)4980*67e74705SXin Li int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) {
4981*67e74705SXin Li   return vqrdmulhq_laneq_s16(a, v, 7);
4982*67e74705SXin Li }
4983*67e74705SXin Li 
4984*67e74705SXin Li // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 {
4985*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3>
4986*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4987*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
4988*67e74705SXin Li // CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4989*67e74705SXin Li // CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4990*67e74705SXin Li // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2
4991*67e74705SXin Li // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4992*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
4993*67e74705SXin Li // CHECK:   ret <2 x i32> [[TMP2]]
test_vqrdmulh_laneq_s32(int32x2_t a,int32x4_t v)4994*67e74705SXin Li int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) {
4995*67e74705SXin Li   return vqrdmulh_laneq_s32(a, v, 3);
4996*67e74705SXin Li }
4997*67e74705SXin Li 
4998*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 {
4999*67e74705SXin Li // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
5000*67e74705SXin Li // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5001*67e74705SXin Li // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
5002*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5003*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5004*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2
5005*67e74705SXin Li // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
5006*67e74705SXin Li // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
5007*67e74705SXin Li // CHECK:   ret <4 x i32> [[TMP2]]
test_vqrdmulhq_laneq_s32(int32x4_t a,int32x4_t v)5008*67e74705SXin Li int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) {
5009*67e74705SXin Li   return vqrdmulhq_laneq_s32(a, v, 3);
5010*67e74705SXin Li }
5011*67e74705SXin Li 
5012