xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/aarch64-smull.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v8i8_v8i16:
5*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
6*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
7*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
8*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
9*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
10*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <8 x i16> %tmp3, %tmp4
11*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp5
12*9880d681SAndroid Build Coastguard Worker}
13*9880d681SAndroid Build Coastguard Worker
14*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
15*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v4i16_v4i32:
16*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
17*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
18*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
19*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
20*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
21*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <4 x i32> %tmp3, %tmp4
22*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
26*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v2i32_v2i64:
27*9880d681SAndroid Build Coastguard Worker; CHECK:  smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
28*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
29*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
30*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
31*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
32*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <2 x i64> %tmp3, %tmp4
33*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
37*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v8i8_v8i16:
38*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
39*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
40*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
41*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
42*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
43*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <8 x i16> %tmp3, %tmp4
44*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp5
45*9880d681SAndroid Build Coastguard Worker}
46*9880d681SAndroid Build Coastguard Worker
47*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
48*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v4i16_v4i32:
49*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
50*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
51*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
52*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
53*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
54*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <4 x i32> %tmp3, %tmp4
55*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
59*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v2i32_v2i64:
60*9880d681SAndroid Build Coastguard Worker; CHECK:  umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
61*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
62*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
63*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
64*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
65*9880d681SAndroid Build Coastguard Worker  %tmp5 = mul <2 x i64> %tmp3, %tmp4
66*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
67*9880d681SAndroid Build Coastguard Worker}
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v8i8_v8i16:
71*9880d681SAndroid Build Coastguard Worker; CHECK:  smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
72*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
73*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
74*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <8 x i8>, <8 x i8>* %C
75*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
76*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
77*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <8 x i16> %tmp4, %tmp5
78*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <8 x i16> %tmp1, %tmp6
79*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp7
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
83*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v4i16_v4i32:
84*9880d681SAndroid Build Coastguard Worker; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
85*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
86*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
87*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i16>, <4 x i16>* %C
88*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
89*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
90*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <4 x i32> %tmp4, %tmp5
91*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <4 x i32> %tmp1, %tmp6
92*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp7
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
96*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v2i32_v2i64:
97*9880d681SAndroid Build Coastguard Worker; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
98*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i64>, <2 x i64>* %A
99*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
100*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i32>, <2 x i32>* %C
101*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
102*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
103*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <2 x i64> %tmp4, %tmp5
104*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <2 x i64> %tmp1, %tmp6
105*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp7
106*9880d681SAndroid Build Coastguard Worker}
107*9880d681SAndroid Build Coastguard Worker
108*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
109*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v8i8_v8i16:
110*9880d681SAndroid Build Coastguard Worker; CHECK:  umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
111*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
112*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
113*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <8 x i8>, <8 x i8>* %C
114*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
115*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
116*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <8 x i16> %tmp4, %tmp5
117*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <8 x i16> %tmp1, %tmp6
118*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp7
119*9880d681SAndroid Build Coastguard Worker}
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
122*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v4i16_v4i32:
123*9880d681SAndroid Build Coastguard Worker; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
124*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
125*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
126*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i16>, <4 x i16>* %C
127*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
128*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
129*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <4 x i32> %tmp4, %tmp5
130*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <4 x i32> %tmp1, %tmp6
131*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp7
132*9880d681SAndroid Build Coastguard Worker}
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
135*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v2i32_v2i64:
136*9880d681SAndroid Build Coastguard Worker; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
137*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i64>, <2 x i64>* %A
138*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
139*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i32>, <2 x i32>* %C
140*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
141*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
142*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <2 x i64> %tmp4, %tmp5
143*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <2 x i64> %tmp1, %tmp6
144*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp7
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
148*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v8i8_v8i16:
149*9880d681SAndroid Build Coastguard Worker; CHECK:  smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
150*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
151*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
152*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <8 x i8>, <8 x i8>* %C
153*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
154*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
155*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <8 x i16> %tmp4, %tmp5
156*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <8 x i16> %tmp1, %tmp6
157*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp7
158*9880d681SAndroid Build Coastguard Worker}
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
161*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v4i16_v4i32:
162*9880d681SAndroid Build Coastguard Worker; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
163*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
164*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
165*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i16>, <4 x i16>* %C
166*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
167*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
168*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <4 x i32> %tmp4, %tmp5
169*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <4 x i32> %tmp1, %tmp6
170*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp7
171*9880d681SAndroid Build Coastguard Worker}
172*9880d681SAndroid Build Coastguard Worker
173*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
174*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v2i32_v2i64:
175*9880d681SAndroid Build Coastguard Worker; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
176*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i64>, <2 x i64>* %A
177*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
178*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i32>, <2 x i32>* %C
179*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
180*9880d681SAndroid Build Coastguard Worker  %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
181*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <2 x i64> %tmp4, %tmp5
182*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <2 x i64> %tmp1, %tmp6
183*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp7
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v8i8_v8i16:
188*9880d681SAndroid Build Coastguard Worker; CHECK:  umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
189*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
190*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
191*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <8 x i8>, <8 x i8>* %C
192*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
193*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
194*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <8 x i16> %tmp4, %tmp5
195*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <8 x i16> %tmp1, %tmp6
196*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp7
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
200*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v4i16_v4i32:
201*9880d681SAndroid Build Coastguard Worker; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
202*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
203*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
204*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i16>, <4 x i16>* %C
205*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
206*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
207*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <4 x i32> %tmp4, %tmp5
208*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <4 x i32> %tmp1, %tmp6
209*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp7
210*9880d681SAndroid Build Coastguard Worker}
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
213*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v2i32_v2i64:
214*9880d681SAndroid Build Coastguard Worker; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
215*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i64>, <2 x i64>* %A
216*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
217*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i32>, <2 x i32>* %C
218*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
219*9880d681SAndroid Build Coastguard Worker  %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
220*9880d681SAndroid Build Coastguard Worker  %tmp6 = mul <2 x i64> %tmp4, %tmp5
221*9880d681SAndroid Build Coastguard Worker  %tmp7 = sub <2 x i64> %tmp1, %tmp6
222*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp7
223*9880d681SAndroid Build Coastguard Worker}
224*9880d681SAndroid Build Coastguard Worker
225*9880d681SAndroid Build Coastguard Worker; SMULL recognizing BUILD_VECTORs with sign/zero-extended elements.
226*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
227*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_extvec_v8i8_v8i16:
228*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
229*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %arg to <8 x i16>
230*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
231*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
232*9880d681SAndroid Build Coastguard Worker}
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
235*9880d681SAndroid Build Coastguard Worker; Do not use SMULL if the BUILD_VECTOR element values are too big.
236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_noextvec_v8i8_v8i16:
237*9880d681SAndroid Build Coastguard Worker; CHECK: mov
238*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
239*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %arg to <8 x i16>
240*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
241*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
242*9880d681SAndroid Build Coastguard Worker}
243*9880d681SAndroid Build Coastguard Worker
244*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
245*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_extvec_v4i16_v4i32:
246*9880d681SAndroid Build Coastguard Worker; CHECK:  smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
247*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <4 x i16> %arg to <4 x i32>
248*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
249*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
250*9880d681SAndroid Build Coastguard Worker}
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
253*9880d681SAndroid Build Coastguard Worker; CHECK: smull_extvec_v2i32_v2i64
254*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
255*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <2 x i32> %arg to <2 x i64>
256*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
257*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
258*9880d681SAndroid Build Coastguard Worker}
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
261*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v8i8_v8i16:
262*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
263*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %arg to <8 x i16>
264*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
265*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
269*9880d681SAndroid Build Coastguard Worker; Do not use SMULL if the BUILD_VECTOR element values are too big.
270*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
271*9880d681SAndroid Build Coastguard Worker; CHECK: mov
272*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
273*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %arg to <8 x i16>
274*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
275*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
276*9880d681SAndroid Build Coastguard Worker}
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
279*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v4i16_v4i32:
280*9880d681SAndroid Build Coastguard Worker; CHECK:  umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
281*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <4 x i16> %arg to <4 x i32>
282*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
283*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
284*9880d681SAndroid Build Coastguard Worker}
285*9880d681SAndroid Build Coastguard Worker
286*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
287*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v2i32_v2i64:
288*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
289*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <2 x i32> %arg to <2 x i64>
290*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
291*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
292*9880d681SAndroid Build Coastguard Worker}
293*9880d681SAndroid Build Coastguard Worker
294*9880d681SAndroid Build Coastguard Workerdefine i16 @smullWithInconsistentExtensions(<8 x i8> %vec) {
295*9880d681SAndroid Build Coastguard Worker; If one operand has a zero-extend and the other a sign-extend, smull
296*9880d681SAndroid Build Coastguard Worker; cannot be used.
297*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smullWithInconsistentExtensions:
298*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
299*9880d681SAndroid Build Coastguard Worker  %1 = sext <8 x i8> %vec to <8 x i16>
300*9880d681SAndroid Build Coastguard Worker  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
301*9880d681SAndroid Build Coastguard Worker  %3 = extractelement <8 x i16> %2, i32 0
302*9880d681SAndroid Build Coastguard Worker  ret i16 %3
303*9880d681SAndroid Build Coastguard Worker}
304*9880d681SAndroid Build Coastguard Worker
305*9880d681SAndroid Build Coastguard Workerdefine void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
306*9880d681SAndroid Build Coastguard Workerentry:
307*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: distribute:
308*9880d681SAndroid Build Coastguard Worker; CHECK: umull [[REG1:(v[0-9]+.8h)]], {{v[0-9]+}}.8b, [[REG2:(v[0-9]+.8b)]]
309*9880d681SAndroid Build Coastguard Worker; CHECK: umlal [[REG1]], {{v[0-9]+}}.8b, [[REG2]]
310*9880d681SAndroid Build Coastguard Worker  %0 = trunc i32 %mul to i8
311*9880d681SAndroid Build Coastguard Worker  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
312*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
313*9880d681SAndroid Build Coastguard Worker  %3 = tail call <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8* %src, i32 1)
314*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <16 x i8> %3 to <2 x double>
315*9880d681SAndroid Build Coastguard Worker  %5 = extractelement <2 x double> %4, i32 1
316*9880d681SAndroid Build Coastguard Worker  %6 = bitcast double %5 to <8 x i8>
317*9880d681SAndroid Build Coastguard Worker  %7 = zext <8 x i8> %6 to <8 x i16>
318*9880d681SAndroid Build Coastguard Worker  %8 = zext <8 x i8> %2 to <8 x i16>
319*9880d681SAndroid Build Coastguard Worker  %9 = extractelement <2 x double> %4, i32 0
320*9880d681SAndroid Build Coastguard Worker  %10 = bitcast double %9 to <8 x i8>
321*9880d681SAndroid Build Coastguard Worker  %11 = zext <8 x i8> %10 to <8 x i16>
322*9880d681SAndroid Build Coastguard Worker  %12 = add <8 x i16> %7, %11
323*9880d681SAndroid Build Coastguard Worker  %13 = mul <8 x i16> %12, %8
324*9880d681SAndroid Build Coastguard Worker  %14 = bitcast i16* %dst to i8*
325*9880d681SAndroid Build Coastguard Worker  tail call void @llvm.aarch64.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
326*9880d681SAndroid Build Coastguard Worker  ret void
327*9880d681SAndroid Build Coastguard Worker}
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
332*9880d681SAndroid Build Coastguard Worker
333