xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/vmul.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmuli8:
5*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i8
6*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
7*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
8*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <8 x i8> %tmp1, %tmp2
9*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp3
10*9880d681SAndroid Build Coastguard Worker}
11*9880d681SAndroid Build Coastguard Worker
12*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmuli16:
14*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i16
15*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
16*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
17*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <4 x i16> %tmp1, %tmp2
18*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp3
19*9880d681SAndroid Build Coastguard Worker}
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmuli32:
23*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i32
24*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
25*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
26*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <2 x i32> %tmp1, %tmp2
27*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp3
28*9880d681SAndroid Build Coastguard Worker}
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
31*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulf32:
32*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.f32
33*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %A
34*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x float>, <2 x float>* %B
35*9880d681SAndroid Build Coastguard Worker	%tmp3 = fmul <2 x float> %tmp1, %tmp2
36*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp3
37*9880d681SAndroid Build Coastguard Worker}
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
40*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulp8:
41*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.p8
42*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
43*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
44*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
45*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp3
46*9880d681SAndroid Build Coastguard Worker}
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
49*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulQi8:
50*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i8
51*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
52*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
53*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <16 x i8> %tmp1, %tmp2
54*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
55*9880d681SAndroid Build Coastguard Worker}
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
58*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulQi16:
59*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i16
60*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
61*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
62*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <8 x i16> %tmp1, %tmp2
63*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
64*9880d681SAndroid Build Coastguard Worker}
65*9880d681SAndroid Build Coastguard Worker
66*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
67*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulQi32:
68*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.i32
69*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
70*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
71*9880d681SAndroid Build Coastguard Worker	%tmp3 = mul <4 x i32> %tmp1, %tmp2
72*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp3
73*9880d681SAndroid Build Coastguard Worker}
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
76*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulQf32:
77*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.f32
78*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
79*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
80*9880d681SAndroid Build Coastguard Worker	%tmp3 = fmul <4 x float> %tmp1, %tmp2
81*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp3
82*9880d681SAndroid Build Coastguard Worker}
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
85*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulQp8:
86*9880d681SAndroid Build Coastguard Worker;CHECK: vmul.p8
87*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
88*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
89*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
90*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
91*9880d681SAndroid Build Coastguard Worker}
92*9880d681SAndroid Build Coastguard Worker
93*9880d681SAndroid Build Coastguard Workerdeclare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
94*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
97*9880d681SAndroid Build Coastguard Workerentry:
98*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmul_lanef32:
99*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.f32 d0, d0, d1[0]
100*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
101*9880d681SAndroid Build Coastguard Worker  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
102*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %1
103*9880d681SAndroid Build Coastguard Worker}
104*9880d681SAndroid Build Coastguard Worker
105*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
106*9880d681SAndroid Build Coastguard Workerentry:
107*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmul_lanes16:
108*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i16 d0, d0, d1[1]
109*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
110*9880d681SAndroid Build Coastguard Worker  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
111*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %1
112*9880d681SAndroid Build Coastguard Worker}
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
115*9880d681SAndroid Build Coastguard Workerentry:
116*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmul_lanes32:
117*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i32 d0, d0, d1[1]
118*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
119*9880d681SAndroid Build Coastguard Worker  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
120*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %1
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
124*9880d681SAndroid Build Coastguard Workerentry:
125*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmulQ_lanef32:
126*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.f32 q0, q0, d2[1]
127*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
128*9880d681SAndroid Build Coastguard Worker  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
129*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
130*9880d681SAndroid Build Coastguard Worker}
131*9880d681SAndroid Build Coastguard Worker
132*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
133*9880d681SAndroid Build Coastguard Workerentry:
134*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmulQ_lanes16:
135*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i16 q0, q0, d2[1]
136*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
137*9880d681SAndroid Build Coastguard Worker  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
138*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %1
139*9880d681SAndroid Build Coastguard Worker}
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
142*9880d681SAndroid Build Coastguard Workerentry:
143*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_vmulQ_lanes32:
144*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i32 q0, q0, d2[1]
145*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
146*9880d681SAndroid Build Coastguard Worker  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
147*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %1
148*9880d681SAndroid Build Coastguard Worker}
149*9880d681SAndroid Build Coastguard Worker
150*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
151*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls8:
152*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s8
153*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
154*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
155*9880d681SAndroid Build Coastguard Worker	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
156*9880d681SAndroid Build Coastguard Worker	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
157*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <8 x i16> %tmp3, %tmp4
158*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
159*9880d681SAndroid Build Coastguard Worker}
160*9880d681SAndroid Build Coastguard Worker
161*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
162*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls8_int:
163*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s8
164*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
165*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
166*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
167*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
168*9880d681SAndroid Build Coastguard Worker}
169*9880d681SAndroid Build Coastguard Worker
170*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
171*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls16:
172*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s16
173*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
174*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
175*9880d681SAndroid Build Coastguard Worker	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
176*9880d681SAndroid Build Coastguard Worker	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
177*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <4 x i32> %tmp3, %tmp4
178*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
179*9880d681SAndroid Build Coastguard Worker}
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
182*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls16_int:
183*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s16
184*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
185*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
186*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
187*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp3
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
191*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls32:
192*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s32
193*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
194*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
195*9880d681SAndroid Build Coastguard Worker	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
196*9880d681SAndroid Build Coastguard Worker	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
197*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <2 x i64> %tmp3, %tmp4
198*9880d681SAndroid Build Coastguard Worker	ret <2 x i64> %tmp5
199*9880d681SAndroid Build Coastguard Worker}
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
202*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmulls32_int:
203*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.s32
204*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
205*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
206*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
207*9880d681SAndroid Build Coastguard Worker	ret <2 x i64> %tmp3
208*9880d681SAndroid Build Coastguard Worker}
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
211*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu8:
212*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u8
213*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
214*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
215*9880d681SAndroid Build Coastguard Worker	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
216*9880d681SAndroid Build Coastguard Worker	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
217*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <8 x i16> %tmp3, %tmp4
218*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
219*9880d681SAndroid Build Coastguard Worker}
220*9880d681SAndroid Build Coastguard Worker
221*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
222*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu8_int:
223*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u8
224*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
225*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
226*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
227*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
228*9880d681SAndroid Build Coastguard Worker}
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
231*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu16:
232*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u16
233*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
234*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
235*9880d681SAndroid Build Coastguard Worker	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
236*9880d681SAndroid Build Coastguard Worker	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
237*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <4 x i32> %tmp3, %tmp4
238*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
242*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu16_int:
243*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u16
244*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
245*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
246*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
247*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp3
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Worker
250*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
251*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu32:
252*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u32
253*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
254*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
255*9880d681SAndroid Build Coastguard Worker	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
256*9880d681SAndroid Build Coastguard Worker	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
257*9880d681SAndroid Build Coastguard Worker	%tmp5 = mul <2 x i64> %tmp3, %tmp4
258*9880d681SAndroid Build Coastguard Worker	ret <2 x i64> %tmp5
259*9880d681SAndroid Build Coastguard Worker}
260*9880d681SAndroid Build Coastguard Worker
261*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
262*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullu32_int:
263*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.u32
264*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
265*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
266*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
267*9880d681SAndroid Build Coastguard Worker	ret <2 x i64> %tmp3
268*9880d681SAndroid Build Coastguard Worker}
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
271*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vmullp8:
272*9880d681SAndroid Build Coastguard Worker;CHECK: vmull.p8
273*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
274*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
275*9880d681SAndroid Build Coastguard Worker	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
276*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
277*9880d681SAndroid Build Coastguard Worker}
278*9880d681SAndroid Build Coastguard Worker
279*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
280*9880d681SAndroid Build Coastguard Workerentry:
281*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_lanes16
282*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s16 q0, d0, d1[1]
283*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
284*9880d681SAndroid Build Coastguard Worker  %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
285*9880d681SAndroid Build Coastguard Worker  %2 = sext <4 x i16> %0 to <4 x i32>
286*9880d681SAndroid Build Coastguard Worker  %3 = mul <4 x i32> %1, %2
287*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %3
288*9880d681SAndroid Build Coastguard Worker}
289*9880d681SAndroid Build Coastguard Worker
290*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
291*9880d681SAndroid Build Coastguard Workerentry:
292*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_lanes16_int
293*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s16 q0, d0, d1[1]
294*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
295*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
296*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %1
297*9880d681SAndroid Build Coastguard Worker}
298*9880d681SAndroid Build Coastguard Worker
299*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
300*9880d681SAndroid Build Coastguard Workerentry:
301*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_lanes32
302*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s32 q0, d0, d1[1]
303*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
304*9880d681SAndroid Build Coastguard Worker  %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
305*9880d681SAndroid Build Coastguard Worker  %2 = sext <2 x i32> %0 to <2 x i64>
306*9880d681SAndroid Build Coastguard Worker  %3 = mul <2 x i64> %1, %2
307*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %3
308*9880d681SAndroid Build Coastguard Worker}
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
311*9880d681SAndroid Build Coastguard Workerentry:
312*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_lanes32_int
313*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s32 q0, d0, d1[1]
314*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
315*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
316*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %1
317*9880d681SAndroid Build Coastguard Worker}
318*9880d681SAndroid Build Coastguard Worker
319*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
320*9880d681SAndroid Build Coastguard Workerentry:
321*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_laneu16
322*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u16 q0, d0, d1[1]
323*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
324*9880d681SAndroid Build Coastguard Worker  %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
325*9880d681SAndroid Build Coastguard Worker  %2 = zext <4 x i16> %0 to <4 x i32>
326*9880d681SAndroid Build Coastguard Worker  %3 = mul <4 x i32> %1, %2
327*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %3
328*9880d681SAndroid Build Coastguard Worker}
329*9880d681SAndroid Build Coastguard Worker
330*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
331*9880d681SAndroid Build Coastguard Workerentry:
332*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_laneu16_int
333*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u16 q0, d0, d1[1]
334*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
335*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
336*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %1
337*9880d681SAndroid Build Coastguard Worker}
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
340*9880d681SAndroid Build Coastguard Workerentry:
341*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_laneu32
342*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u32 q0, d0, d1[1]
343*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
344*9880d681SAndroid Build Coastguard Worker  %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
345*9880d681SAndroid Build Coastguard Worker  %2 = zext <2 x i32> %0 to <2 x i64>
346*9880d681SAndroid Build Coastguard Worker  %3 = mul <2 x i64> %1, %2
347*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %3
348*9880d681SAndroid Build Coastguard Worker}
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
351*9880d681SAndroid Build Coastguard Workerentry:
352*9880d681SAndroid Build Coastguard Worker; CHECK: test_vmull_laneu32_int
353*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u32 q0, d0, d1[1]
354*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
355*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
356*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %1
357*9880d681SAndroid Build Coastguard Worker}
358*9880d681SAndroid Build Coastguard Worker
359*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
360*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
361*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
362*9880d681SAndroid Build Coastguard Worker
363*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
364*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
365*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
366*9880d681SAndroid Build Coastguard Worker
367*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Worker
370*9880d681SAndroid Build Coastguard Worker; Radar 8687140
371*9880d681SAndroid Build Coastguard Worker; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
374*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_s8
375*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s8
376*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %arg to <8 x i16>
377*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
378*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
379*9880d681SAndroid Build Coastguard Worker}
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
382*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_u8
383*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u8
384*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %arg to <8 x i16>
385*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
386*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
390*9880d681SAndroid Build Coastguard Worker; Do not use VMULL if the BUILD_VECTOR element values are too big.
391*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_noextvec_s8
392*9880d681SAndroid Build Coastguard Worker; CHECK: vmovl.s8
393*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i16
394*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %arg to <8 x i16>
395*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
396*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
397*9880d681SAndroid Build Coastguard Worker}
398*9880d681SAndroid Build Coastguard Worker
399*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
400*9880d681SAndroid Build Coastguard Worker; Do not use VMULL if the BUILD_VECTOR element values are too big.
401*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_noextvec_u8
402*9880d681SAndroid Build Coastguard Worker; CHECK: vmovl.u8
403*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i16
404*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %arg to <8 x i16>
405*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
406*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
407*9880d681SAndroid Build Coastguard Worker}
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
410*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_s16
411*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s16
412*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <4 x i16> %arg to <4 x i32>
413*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
414*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
415*9880d681SAndroid Build Coastguard Worker}
416*9880d681SAndroid Build Coastguard Worker
417*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
418*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_u16
419*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u16
420*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <4 x i16> %arg to <4 x i32>
421*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
422*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
423*9880d681SAndroid Build Coastguard Worker}
424*9880d681SAndroid Build Coastguard Worker
425*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
426*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_s32
427*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.s32
428*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <2 x i32> %arg to <2 x i64>
429*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
430*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
431*9880d681SAndroid Build Coastguard Worker}
432*9880d681SAndroid Build Coastguard Worker
433*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
434*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_extvec_u32
435*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u32
436*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <2 x i32> %arg to <2 x i64>
437*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
438*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
439*9880d681SAndroid Build Coastguard Worker}
440*9880d681SAndroid Build Coastguard Worker
441*9880d681SAndroid Build Coastguard Worker; rdar://9197392
442*9880d681SAndroid Build Coastguard Workerdefine void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
443*9880d681SAndroid Build Coastguard Workerentry:
444*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: distribute:
445*9880d681SAndroid Build Coastguard Worker; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
446*9880d681SAndroid Build Coastguard Worker; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
447*9880d681SAndroid Build Coastguard Worker  %0 = trunc i32 %mul to i8
448*9880d681SAndroid Build Coastguard Worker  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
449*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
450*9880d681SAndroid Build Coastguard Worker  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
451*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <16 x i8> %3 to <2 x double>
452*9880d681SAndroid Build Coastguard Worker  %5 = extractelement <2 x double> %4, i32 1
453*9880d681SAndroid Build Coastguard Worker  %6 = bitcast double %5 to <8 x i8>
454*9880d681SAndroid Build Coastguard Worker  %7 = zext <8 x i8> %6 to <8 x i16>
455*9880d681SAndroid Build Coastguard Worker  %8 = zext <8 x i8> %2 to <8 x i16>
456*9880d681SAndroid Build Coastguard Worker  %9 = extractelement <2 x double> %4, i32 0
457*9880d681SAndroid Build Coastguard Worker  %10 = bitcast double %9 to <8 x i8>
458*9880d681SAndroid Build Coastguard Worker  %11 = zext <8 x i8> %10 to <8 x i16>
459*9880d681SAndroid Build Coastguard Worker  %12 = add <8 x i16> %7, %11
460*9880d681SAndroid Build Coastguard Worker  %13 = mul <8 x i16> %12, %8
461*9880d681SAndroid Build Coastguard Worker  %14 = bitcast i16* %dst to i8*
462*9880d681SAndroid Build Coastguard Worker  tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %14, <8 x i16> %13, i32 2)
463*9880d681SAndroid Build Coastguard Worker  ret void
464*9880d681SAndroid Build Coastguard Worker}
465*9880d681SAndroid Build Coastguard Worker
466*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) nounwind readonly
467*9880d681SAndroid Build Coastguard Worker
468*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
469*9880d681SAndroid Build Coastguard Worker
470*9880d681SAndroid Build Coastguard Worker; Take advantage of the Cortex-A8 multiplier accumulator forward.
471*9880d681SAndroid Build Coastguard Worker
472*9880d681SAndroid Build Coastguard Worker%struct.uint8x8_t = type { <8 x i8> }
473*9880d681SAndroid Build Coastguard Worker
474*9880d681SAndroid Build Coastguard Workerdefine void @distribute2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
475*9880d681SAndroid Build Coastguard Workerentry:
476*9880d681SAndroid Build Coastguard Worker; CHECK: distribute2
477*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vadd.i8
478*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i8
479*9880d681SAndroid Build Coastguard Worker; CHECK: vmla.i8
480*9880d681SAndroid Build Coastguard Worker  %0 = trunc i32 %mul to i8
481*9880d681SAndroid Build Coastguard Worker  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
482*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
483*9880d681SAndroid Build Coastguard Worker  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
484*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <16 x i8> %3 to <2 x double>
485*9880d681SAndroid Build Coastguard Worker  %5 = extractelement <2 x double> %4, i32 1
486*9880d681SAndroid Build Coastguard Worker  %6 = bitcast double %5 to <8 x i8>
487*9880d681SAndroid Build Coastguard Worker  %7 = extractelement <2 x double> %4, i32 0
488*9880d681SAndroid Build Coastguard Worker  %8 = bitcast double %7 to <8 x i8>
489*9880d681SAndroid Build Coastguard Worker  %9 = add <8 x i8> %6, %8
490*9880d681SAndroid Build Coastguard Worker  %10 = mul <8 x i8> %9, %2
491*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.uint8x8_t, %struct.uint8x8_t* %dst, i32 0, i32 0
492*9880d681SAndroid Build Coastguard Worker  store <8 x i8> %10, <8 x i8>* %11, align 8
493*9880d681SAndroid Build Coastguard Worker  ret void
494*9880d681SAndroid Build Coastguard Worker}
495*9880d681SAndroid Build Coastguard Worker
496*9880d681SAndroid Build Coastguard Workerdefine void @distribute2_commutative(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
497*9880d681SAndroid Build Coastguard Workerentry:
498*9880d681SAndroid Build Coastguard Worker; CHECK: distribute2_commutative
499*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vadd.i8
500*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i8
501*9880d681SAndroid Build Coastguard Worker; CHECK: vmla.i8
502*9880d681SAndroid Build Coastguard Worker  %0 = trunc i32 %mul to i8
503*9880d681SAndroid Build Coastguard Worker  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
504*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
505*9880d681SAndroid Build Coastguard Worker  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
506*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <16 x i8> %3 to <2 x double>
507*9880d681SAndroid Build Coastguard Worker  %5 = extractelement <2 x double> %4, i32 1
508*9880d681SAndroid Build Coastguard Worker  %6 = bitcast double %5 to <8 x i8>
509*9880d681SAndroid Build Coastguard Worker  %7 = extractelement <2 x double> %4, i32 0
510*9880d681SAndroid Build Coastguard Worker  %8 = bitcast double %7 to <8 x i8>
511*9880d681SAndroid Build Coastguard Worker  %9 = add <8 x i8> %6, %8
512*9880d681SAndroid Build Coastguard Worker  %10 = mul <8 x i8> %2, %9
513*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.uint8x8_t, %struct.uint8x8_t* %dst, i32 0, i32 0
514*9880d681SAndroid Build Coastguard Worker  store <8 x i8> %10, <8 x i8>* %11, align 8
515*9880d681SAndroid Build Coastguard Worker  ret void
516*9880d681SAndroid Build Coastguard Worker}
517*9880d681SAndroid Build Coastguard Worker
518*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @no_distribute(<8 x i8> %a, <8 x i8> %b) nounwind {
519*9880d681SAndroid Build Coastguard Workerentry:
520*9880d681SAndroid Build Coastguard Worker; CHECK: no_distribute
521*9880d681SAndroid Build Coastguard Worker; CHECK: vadd.i8
522*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.i8
523*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vmla.i8
524*9880d681SAndroid Build Coastguard Worker  %0 = add <8 x i8> %a, %b
525*9880d681SAndroid Build Coastguard Worker  %1 = mul <8x i8> %0, %0
526*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %1
527*9880d681SAndroid Build Coastguard Worker}
528*9880d681SAndroid Build Coastguard Worker
529*9880d681SAndroid Build Coastguard Worker; If one operand has a zero-extend and the other a sign-extend, vmull
530*9880d681SAndroid Build Coastguard Worker; cannot be used.
531*9880d681SAndroid Build Coastguard Workerdefine i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
532*9880d681SAndroid Build Coastguard Worker; CHECK: vmullWithInconsistentExtensions
533*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vmull.s8
534*9880d681SAndroid Build Coastguard Worker  %1 = sext <8 x i8> %vec to <8 x i16>
535*9880d681SAndroid Build Coastguard Worker  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
536*9880d681SAndroid Build Coastguard Worker  %3 = extractelement <8 x i16> %2, i32 0
537*9880d681SAndroid Build Coastguard Worker  ret i16 %3
538*9880d681SAndroid Build Coastguard Worker}
539*9880d681SAndroid Build Coastguard Worker
540*9880d681SAndroid Build Coastguard Worker; A constant build_vector created for a vmull with half-width elements must
541*9880d681SAndroid Build Coastguard Worker; not introduce illegal types. <rdar://problem/11324364>
542*9880d681SAndroid Build Coastguard Workerdefine void @vmull_buildvector() nounwind optsize ssp align 2 {
543*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_buildvector
544*9880d681SAndroid Build Coastguard Workerentry:
545*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %for.end179, label %for.body.lr.ph
546*9880d681SAndroid Build Coastguard Worker
547*9880d681SAndroid Build Coastguard Workerfor.body.lr.ph:                                   ; preds = %entry
548*9880d681SAndroid Build Coastguard Worker  br label %for.body
549*9880d681SAndroid Build Coastguard Worker
550*9880d681SAndroid Build Coastguard Workerfor.cond.loopexit:                                ; preds = %for.body33, %for.body
551*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %for.end179, label %for.body
552*9880d681SAndroid Build Coastguard Worker
553*9880d681SAndroid Build Coastguard Workerfor.body:                                         ; preds = %for.cond.loopexit, %for.body.lr.ph
554*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph
555*9880d681SAndroid Build Coastguard Worker
556*9880d681SAndroid Build Coastguard Workerfor.body33.lr.ph:                                 ; preds = %for.body
557*9880d681SAndroid Build Coastguard Worker  %.sub = select i1 undef, i32 0, i32 undef
558*9880d681SAndroid Build Coastguard Worker  br label %for.body33
559*9880d681SAndroid Build Coastguard Worker
560*9880d681SAndroid Build Coastguard Workerfor.body33:                                       ; preds = %for.body33, %for.body33.lr.ph
561*9880d681SAndroid Build Coastguard Worker  %add45 = add i32 undef, undef
562*9880d681SAndroid Build Coastguard Worker  %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* undef, i32 1)
563*9880d681SAndroid Build Coastguard Worker  %0 = load i32*, i32** undef, align 4
564*9880d681SAndroid Build Coastguard Worker  %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
565*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
566*9880d681SAndroid Build Coastguard Worker  %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
567*9880d681SAndroid Build Coastguard Worker  %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
568*9880d681SAndroid Build Coastguard Worker  %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
569*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8>
570*9880d681SAndroid Build Coastguard Worker  %3 = bitcast <16 x i8> undef to <2 x i64>
571*9880d681SAndroid Build Coastguard Worker  %vmovl.i237 = zext <8 x i8> undef to <8 x i16>
572*9880d681SAndroid Build Coastguard Worker  %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
573*9880d681SAndroid Build Coastguard Worker  %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
574*9880d681SAndroid Build Coastguard Worker  %vmovl.i225 = zext <8 x i8> undef to <8 x i16>
575*9880d681SAndroid Build Coastguard Worker  %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249
576*9880d681SAndroid Build Coastguard Worker  %vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
577*9880d681SAndroid Build Coastguard Worker  %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind
578*9880d681SAndroid Build Coastguard Worker  %mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>
579*9880d681SAndroid Build Coastguard Worker  %vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
580*9880d681SAndroid Build Coastguard Worker  %vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
581*9880d681SAndroid Build Coastguard Worker  %sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130
582*9880d681SAndroid Build Coastguard Worker  %sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134
583*9880d681SAndroid Build Coastguard Worker  %add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96>
584*9880d681SAndroid Build Coastguard Worker  %add.i198 = add <8 x i16> %add.i200, %sub.i203
585*9880d681SAndroid Build Coastguard Worker  %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237
586*9880d681SAndroid Build Coastguard Worker  %mul.i191 = mul <8 x i16> %vshr_n130, undef
587*9880d681SAndroid Build Coastguard Worker  %add.i192 = add <8 x i16> %mul.i191, %mul.i194
588*9880d681SAndroid Build Coastguard Worker  %mul.i187 = mul <8 x i16> %vshr_n134, undef
589*9880d681SAndroid Build Coastguard Worker  %add.i188 = add <8 x i16> %mul.i187, %add.i192
590*9880d681SAndroid Build Coastguard Worker  %mul.i185 = mul <8 x i16> undef, undef
591*9880d681SAndroid Build Coastguard Worker  %add.i186 = add <8 x i16> %mul.i185, undef
592*9880d681SAndroid Build Coastguard Worker  %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
593*9880d681SAndroid Build Coastguard Worker  %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
594*9880d681SAndroid Build Coastguard Worker  %mul.i184 = mul <8 x i16> undef, %vrshr_n160
595*9880d681SAndroid Build Coastguard Worker  %mul.i181 = mul <8 x i16> undef, %vmovl.i225
596*9880d681SAndroid Build Coastguard Worker  %add.i182 = add <8 x i16> %mul.i181, %mul.i184
597*9880d681SAndroid Build Coastguard Worker  %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>)
598*9880d681SAndroid Build Coastguard Worker  %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind
599*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64>
600*9880d681SAndroid Build Coastguard Worker  %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
601*9880d681SAndroid Build Coastguard Worker  %5 = bitcast <2 x i64> %shuffle.i to <16 x i8>
602*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %5, <16 x i8>* undef, align 16
603*9880d681SAndroid Build Coastguard Worker  %add177 = add nsw i32 undef, 16
604*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %for.body33, label %for.cond.loopexit
605*9880d681SAndroid Build Coastguard Worker
606*9880d681SAndroid Build Coastguard Workerfor.end179:                                       ; preds = %for.cond.loopexit, %entry
607*9880d681SAndroid Build Coastguard Worker  ret void
608*9880d681SAndroid Build Coastguard Worker}
609*9880d681SAndroid Build Coastguard Worker
610*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
611*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
612*9880d681SAndroid Build Coastguard Workerdeclare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
613*9880d681SAndroid Build Coastguard Worker
614*9880d681SAndroid Build Coastguard Worker; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8),
615*9880d681SAndroid Build Coastguard Worker; creating an illegal type during legalization and causing an assert.
616*9880d681SAndroid Build Coastguard Worker; PR15970
617*9880d681SAndroid Build Coastguard Workerdefine void @no_illegal_types_vmull_sext(<4 x i32> %a) {
618*9880d681SAndroid Build Coastguard Workerentry:
619*9880d681SAndroid Build Coastguard Worker  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
620*9880d681SAndroid Build Coastguard Worker  %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
621*9880d681SAndroid Build Coastguard Worker  %1 = sub nsw <4 x i32> %0, %a
622*9880d681SAndroid Build Coastguard Worker  %2 = mul nsw <4 x i32> %1, %1
623*9880d681SAndroid Build Coastguard Worker  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
624*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
625*9880d681SAndroid Build Coastguard Worker  ret void
626*9880d681SAndroid Build Coastguard Worker}
627*9880d681SAndroid Build Coastguard Workerdefine void @no_illegal_types_vmull_zext(<4 x i32> %a) {
628*9880d681SAndroid Build Coastguard Workerentry:
629*9880d681SAndroid Build Coastguard Worker  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
630*9880d681SAndroid Build Coastguard Worker  %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
631*9880d681SAndroid Build Coastguard Worker  %1 = sub nsw <4 x i32> %0, %a
632*9880d681SAndroid Build Coastguard Worker  %2 = mul nsw <4 x i32> %1, %1
633*9880d681SAndroid Build Coastguard Worker  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
634*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
635*9880d681SAndroid Build Coastguard Worker  ret void
636*9880d681SAndroid Build Coastguard Worker}
637*9880d681SAndroid Build Coastguard Worker
638*9880d681SAndroid Build Coastguard Workerdefine void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind {
639*9880d681SAndroid Build Coastguard Worker;   Look for doing a normal scalar FP load rather than an to-all-lanes load.
640*9880d681SAndroid Build Coastguard Worker;   e.g., "ldr s0, [r2]" rathern than "vld1.32  {d18[], d19[]}, [r2:32]"
641*9880d681SAndroid Build Coastguard Worker;   Then check that the vector multiply has folded the splat to all lanes
642*9880d681SAndroid Build Coastguard Worker;   and used a vector * scalar instruction.
643*9880d681SAndroid Build Coastguard Worker; CHECK: vldr  {{s[0-9]+}}, [r2]
644*9880d681SAndroid Build Coastguard Worker; CHECK: vmul.f32  q8, q8, d0[0]
645*9880d681SAndroid Build Coastguard Worker  %tmp = load float, float* %src, align 4
646*9880d681SAndroid Build Coastguard Worker  %tmp5 = load <4 x float>, <4 x float>* %a, align 4
647*9880d681SAndroid Build Coastguard Worker  %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
648*9880d681SAndroid Build Coastguard Worker  %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
649*9880d681SAndroid Build Coastguard Worker  %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
650*9880d681SAndroid Build Coastguard Worker  %tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3
651*9880d681SAndroid Build Coastguard Worker  %tmp10 = fmul <4 x float> %tmp9, %tmp5
652*9880d681SAndroid Build Coastguard Worker  store <4 x float> %tmp10, <4 x float>* %dst, align 4
653*9880d681SAndroid Build Coastguard Worker  ret void
654*9880d681SAndroid Build Coastguard Worker}
655