xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/arm64-vadd.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn8b:
5*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.8b
6*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
7*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
8*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
9*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %tmp3
10*9880d681SAndroid Build Coastguard Worker}
11*9880d681SAndroid Build Coastguard Worker
12*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
13*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn4h:
14*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.4h
15*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
16*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
17*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
18*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
19*9880d681SAndroid Build Coastguard Worker}
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
22*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2s:
23*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.2s
24*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
25*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
26*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
27*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
28*9880d681SAndroid Build Coastguard Worker}
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
31*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_16b:
32*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.8b
33*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: addhn2.16b
34*9880d681SAndroid Build Coastguard Worker  %vaddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
35*9880d681SAndroid Build Coastguard Worker  %vaddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
36*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
37*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %res
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
41*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_8h:
42*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.4h
43*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: addhn2.8h
44*9880d681SAndroid Build Coastguard Worker  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
45*9880d681SAndroid Build Coastguard Worker  %vaddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
46*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
47*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
48*9880d681SAndroid Build Coastguard Worker}
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
51*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_4s:
52*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.2s
53*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: addhn2.4s
54*9880d681SAndroid Build Coastguard Worker  %vaddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
55*9880d681SAndroid Build Coastguard Worker  %vaddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
56*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
57*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
58*9880d681SAndroid Build Coastguard Worker}
59*9880d681SAndroid Build Coastguard Worker
60*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
61*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
62*9880d681SAndroid Build Coastguard Workerdeclare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
66*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn8b:
67*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.8b
68*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
69*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
70*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
71*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %tmp3
72*9880d681SAndroid Build Coastguard Worker}
73*9880d681SAndroid Build Coastguard Worker
74*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
75*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn4h:
76*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.4h
77*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
78*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
79*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
80*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
81*9880d681SAndroid Build Coastguard Worker}
82*9880d681SAndroid Build Coastguard Worker
83*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
84*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn2s:
85*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.2s
86*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
87*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
88*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
89*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
93*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn2_16b:
94*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.8b
95*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: raddhn2.16b
96*9880d681SAndroid Build Coastguard Worker  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
97*9880d681SAndroid Build Coastguard Worker  %vraddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
98*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
99*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %res
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
103*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn2_8h:
104*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.4h
105*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: raddhn2.8h
106*9880d681SAndroid Build Coastguard Worker  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
107*9880d681SAndroid Build Coastguard Worker  %vraddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
108*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
109*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
110*9880d681SAndroid Build Coastguard Worker}
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
113*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: raddhn2_4s:
114*9880d681SAndroid Build Coastguard Worker;CHECK: raddhn.2s
115*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: raddhn2.4s
116*9880d681SAndroid Build Coastguard Worker  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
117*9880d681SAndroid Build Coastguard Worker  %vraddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
118*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
119*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
120*9880d681SAndroid Build Coastguard Worker}
121*9880d681SAndroid Build Coastguard Worker
122*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
123*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
124*9880d681SAndroid Build Coastguard Workerdeclare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
127*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddl8h:
128*9880d681SAndroid Build Coastguard Worker;CHECK: saddl.8h
129*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
130*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i8>, <8 x i8>* %B
131*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
132*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
133*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <8 x i16> %tmp3, %tmp4
134*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp5
135*9880d681SAndroid Build Coastguard Worker}
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
138*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddl4s:
139*9880d681SAndroid Build Coastguard Worker;CHECK: saddl.4s
140*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
141*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i16>, <4 x i16>* %B
142*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
143*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
144*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <4 x i32> %tmp3, %tmp4
145*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp5
146*9880d681SAndroid Build Coastguard Worker}
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
149*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddl2d:
150*9880d681SAndroid Build Coastguard Worker;CHECK: saddl.2d
151*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i32>, <2 x i32>* %A
152*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i32>, <2 x i32>* %B
153*9880d681SAndroid Build Coastguard Worker  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
154*9880d681SAndroid Build Coastguard Worker  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
155*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <2 x i64> %tmp3, %tmp4
156*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp5
157*9880d681SAndroid Build Coastguard Worker}
158*9880d681SAndroid Build Coastguard Worker
159*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
160*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: saddl2_8h:
161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: saddl2.8h v0, v0, v1
162*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
163*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %a to <2 x i64>
164*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
165*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
166*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = sext <8 x i8> %tmp1 to <8 x i16>
167*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
168*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
169*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
170*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = sext <8 x i8> %tmp3 to <8 x i16>
171*9880d681SAndroid Build Coastguard Worker  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
172*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %add.i
173*9880d681SAndroid Build Coastguard Worker}
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
176*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: saddl2_4s:
177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: saddl2.4s v0, v0, v1
178*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
179*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %a to <2 x i64>
180*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
181*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
182*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = sext <4 x i16> %tmp1 to <4 x i32>
183*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
184*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
185*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
186*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = sext <4 x i16> %tmp3 to <4 x i32>
187*9880d681SAndroid Build Coastguard Worker  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
188*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add.i
189*9880d681SAndroid Build Coastguard Worker}
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
192*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: saddl2_2d:
193*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: saddl2.2d v0, v0, v1
194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
195*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %a to <2 x i64>
196*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
197*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
198*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = sext <2 x i32> %tmp1 to <2 x i64>
199*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
200*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
201*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
202*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = sext <2 x i32> %tmp3 to <2 x i64>
203*9880d681SAndroid Build Coastguard Worker  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
204*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add.i
205*9880d681SAndroid Build Coastguard Worker}
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
208*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddl8h:
209*9880d681SAndroid Build Coastguard Worker;CHECK: uaddl.8h
210*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
211*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
212*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
213*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
214*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <8 x i16> %tmp3, %tmp4
215*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp5
216*9880d681SAndroid Build Coastguard Worker}
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
219*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddl4s:
220*9880d681SAndroid Build Coastguard Worker;CHECK: uaddl.4s
221*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
222*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
223*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
224*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
225*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <4 x i32> %tmp3, %tmp4
226*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
230*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddl2d:
231*9880d681SAndroid Build Coastguard Worker;CHECK: uaddl.2d
232*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
233*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
234*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
235*9880d681SAndroid Build Coastguard Worker  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
236*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <2 x i64> %tmp3, %tmp4
237*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
238*9880d681SAndroid Build Coastguard Worker}
239*9880d681SAndroid Build Coastguard Worker
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
242*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: uaddl2_8h:
243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: uaddl2.8h v0, v0, v1
244*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
245*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %a to <2 x i64>
246*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
247*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
248*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = zext <8 x i8> %tmp1 to <8 x i16>
249*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
250*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
251*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
252*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = zext <8 x i8> %tmp3 to <8 x i16>
253*9880d681SAndroid Build Coastguard Worker  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
254*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %add.i
255*9880d681SAndroid Build Coastguard Worker}
256*9880d681SAndroid Build Coastguard Worker
257*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
258*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: uaddl2_4s:
259*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: uaddl2.4s v0, v0, v1
260*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
261*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %a to <2 x i64>
262*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
263*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
264*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = zext <4 x i16> %tmp1 to <4 x i32>
265*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
266*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
267*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
268*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = zext <4 x i16> %tmp3 to <4 x i32>
269*9880d681SAndroid Build Coastguard Worker  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
270*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add.i
271*9880d681SAndroid Build Coastguard Worker}
272*9880d681SAndroid Build Coastguard Worker
273*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
274*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: uaddl2_2d:
275*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: uaddl2.2d v0, v0, v1
276*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
277*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %a to <2 x i64>
278*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
279*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
280*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i.i = zext <2 x i32> %tmp1 to <2 x i64>
281*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
282*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
283*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
284*9880d681SAndroid Build Coastguard Worker  %vmovl.i.i5.i = zext <2 x i32> %tmp3 to <2 x i64>
285*9880d681SAndroid Build Coastguard Worker  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
286*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add.i
287*9880d681SAndroid Build Coastguard Worker}
288*9880d681SAndroid Build Coastguard Worker
289*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
290*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw8h:
291*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw.8h
292*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
293*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i8>, <8 x i8>* %B
294*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
295*9880d681SAndroid Build Coastguard Worker  %tmp4 = add <8 x i16> %tmp1, %tmp3
296*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp4
297*9880d681SAndroid Build Coastguard Worker}
298*9880d681SAndroid Build Coastguard Worker
299*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
300*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw4s:
301*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw.4s
302*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
303*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i16>, <4 x i16>* %B
304*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
305*9880d681SAndroid Build Coastguard Worker  %tmp4 = add <4 x i32> %tmp1, %tmp3
306*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp4
307*9880d681SAndroid Build Coastguard Worker}
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
310*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw2d:
311*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw.2d
312*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
313*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i32>, <2 x i32>* %B
314*9880d681SAndroid Build Coastguard Worker  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
315*9880d681SAndroid Build Coastguard Worker  %tmp4 = add <2 x i64> %tmp1, %tmp3
316*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp4
317*9880d681SAndroid Build Coastguard Worker}
318*9880d681SAndroid Build Coastguard Worker
319*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
320*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw2_8h:
321*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw2.8h
322*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <16 x i8>, <16 x i8>* %B
325*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
326*9880d681SAndroid Build Coastguard Worker        %ext2 = zext <8 x i8> %high2 to <8 x i16>
327*9880d681SAndroid Build Coastguard Worker
328*9880d681SAndroid Build Coastguard Worker        %res = add <8 x i16> %tmp1, %ext2
329*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %res
330*9880d681SAndroid Build Coastguard Worker}
331*9880d681SAndroid Build Coastguard Worker
332*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
333*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw2_4s:
334*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw2.4s
335*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
336*9880d681SAndroid Build Coastguard Worker
337*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
338*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
339*9880d681SAndroid Build Coastguard Worker        %ext2 = zext <4 x i16> %high2 to <4 x i32>
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker        %res = add <4 x i32> %tmp1, %ext2
342*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %res
343*9880d681SAndroid Build Coastguard Worker}
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
346*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddw2_2d:
347*9880d681SAndroid Build Coastguard Worker;CHECK: uaddw2.2d
348*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
351*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
352*9880d681SAndroid Build Coastguard Worker        %ext2 = zext <2 x i32> %high2 to <2 x i64>
353*9880d681SAndroid Build Coastguard Worker
354*9880d681SAndroid Build Coastguard Worker        %res = add <2 x i64> %tmp1, %ext2
355*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %res
356*9880d681SAndroid Build Coastguard Worker}
357*9880d681SAndroid Build Coastguard Worker
358*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
359*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw8h:
360*9880d681SAndroid Build Coastguard Worker;CHECK: saddw.8h
361*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
362*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i8>, <8 x i8>* %B
363*9880d681SAndroid Build Coastguard Worker        %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
364*9880d681SAndroid Build Coastguard Worker        %tmp4 = add <8 x i16> %tmp1, %tmp3
365*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp4
366*9880d681SAndroid Build Coastguard Worker}
367*9880d681SAndroid Build Coastguard Worker
368*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
369*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw4s:
370*9880d681SAndroid Build Coastguard Worker;CHECK: saddw.4s
371*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
372*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i16>, <4 x i16>* %B
373*9880d681SAndroid Build Coastguard Worker        %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
374*9880d681SAndroid Build Coastguard Worker        %tmp4 = add <4 x i32> %tmp1, %tmp3
375*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp4
376*9880d681SAndroid Build Coastguard Worker}
377*9880d681SAndroid Build Coastguard Worker
378*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
379*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw2d:
380*9880d681SAndroid Build Coastguard Worker;CHECK: saddw.2d
381*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
382*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i32>, <2 x i32>* %B
383*9880d681SAndroid Build Coastguard Worker        %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
384*9880d681SAndroid Build Coastguard Worker        %tmp4 = add <2 x i64> %tmp1, %tmp3
385*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp4
386*9880d681SAndroid Build Coastguard Worker}
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
389*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw2_8h:
390*9880d681SAndroid Build Coastguard Worker;CHECK: saddw2.8h
391*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
392*9880d681SAndroid Build Coastguard Worker
393*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <16 x i8>, <16 x i8>* %B
394*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
395*9880d681SAndroid Build Coastguard Worker        %ext2 = sext <8 x i8> %high2 to <8 x i16>
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Worker        %res = add <8 x i16> %tmp1, %ext2
398*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %res
399*9880d681SAndroid Build Coastguard Worker}
400*9880d681SAndroid Build Coastguard Worker
401*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
402*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw2_4s:
403*9880d681SAndroid Build Coastguard Worker;CHECK: saddw2.4s
404*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
405*9880d681SAndroid Build Coastguard Worker
406*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
407*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
408*9880d681SAndroid Build Coastguard Worker        %ext2 = sext <4 x i16> %high2 to <4 x i32>
409*9880d681SAndroid Build Coastguard Worker
410*9880d681SAndroid Build Coastguard Worker        %res = add <4 x i32> %tmp1, %ext2
411*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %res
412*9880d681SAndroid Build Coastguard Worker}
413*9880d681SAndroid Build Coastguard Worker
414*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
415*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddw2_2d:
416*9880d681SAndroid Build Coastguard Worker;CHECK: saddw2.2d
417*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
418*9880d681SAndroid Build Coastguard Worker
419*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
420*9880d681SAndroid Build Coastguard Worker        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
421*9880d681SAndroid Build Coastguard Worker        %ext2 = sext <2 x i32> %high2 to <2 x i64>
422*9880d681SAndroid Build Coastguard Worker
423*9880d681SAndroid Build Coastguard Worker        %res = add <2 x i64> %tmp1, %ext2
424*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %res
425*9880d681SAndroid Build Coastguard Worker}
426*9880d681SAndroid Build Coastguard Worker
427*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
428*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp4h:
429*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.4h
430*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
431*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
432*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
433*9880d681SAndroid Build Coastguard Worker}
434*9880d681SAndroid Build Coastguard Worker
435*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
436*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp2s:
437*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.2s
438*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
439*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
440*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
441*9880d681SAndroid Build Coastguard Worker}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdefine <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
444*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp1d:
445*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.1d
446*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i32>, <2 x i32>* %A
447*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
448*9880d681SAndroid Build Coastguard Worker        ret <1 x i64> %tmp3
449*9880d681SAndroid Build Coastguard Worker}
450*9880d681SAndroid Build Coastguard Worker
451*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
452*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp8h:
453*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.8h
454*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <16 x i8>, <16 x i8>* %A
455*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
456*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp3
457*9880d681SAndroid Build Coastguard Worker}
458*9880d681SAndroid Build Coastguard Worker
459*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
460*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp4s:
461*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.4s
462*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
463*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
464*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp3
465*9880d681SAndroid Build Coastguard Worker}
466*9880d681SAndroid Build Coastguard Worker
467*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
468*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: saddlp2d:
469*9880d681SAndroid Build Coastguard Worker;CHECK: saddlp.2d
470*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
471*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
472*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp3
473*9880d681SAndroid Build Coastguard Worker}
474*9880d681SAndroid Build Coastguard Worker
475*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16>  @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
476*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
477*9880d681SAndroid Build Coastguard Workerdeclare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
478*9880d681SAndroid Build Coastguard Worker
479*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16>  @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
480*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
481*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
482*9880d681SAndroid Build Coastguard Worker
483*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
484*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp4h:
485*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.4h
486*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
487*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
488*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
489*9880d681SAndroid Build Coastguard Worker}
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
492*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp2s:
493*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.2s
494*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
495*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
496*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
497*9880d681SAndroid Build Coastguard Worker}
498*9880d681SAndroid Build Coastguard Worker
499*9880d681SAndroid Build Coastguard Workerdefine <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
500*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp1d:
501*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.1d
502*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i32>, <2 x i32>* %A
503*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
504*9880d681SAndroid Build Coastguard Worker        ret <1 x i64> %tmp3
505*9880d681SAndroid Build Coastguard Worker}
506*9880d681SAndroid Build Coastguard Worker
507*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
508*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp8h:
509*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.8h
510*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <16 x i8>, <16 x i8>* %A
511*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
512*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp3
513*9880d681SAndroid Build Coastguard Worker}
514*9880d681SAndroid Build Coastguard Worker
515*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
516*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp4s:
517*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.4s
518*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
519*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
520*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp3
521*9880d681SAndroid Build Coastguard Worker}
522*9880d681SAndroid Build Coastguard Worker
523*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
524*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uaddlp2d:
525*9880d681SAndroid Build Coastguard Worker;CHECK: uaddlp.2d
526*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
527*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
528*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp3
529*9880d681SAndroid Build Coastguard Worker}
530*9880d681SAndroid Build Coastguard Worker
531*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16>  @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
532*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
533*9880d681SAndroid Build Coastguard Workerdeclare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
534*9880d681SAndroid Build Coastguard Worker
535*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16>  @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
536*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
537*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
538*9880d681SAndroid Build Coastguard Worker
539*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
540*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sadalp4h:
541*9880d681SAndroid Build Coastguard Worker;CHECK: sadalp.4h
542*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
543*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
544*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <4 x i16>, <4 x i16>* %B
545*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
546*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp5
547*9880d681SAndroid Build Coastguard Worker}
548*9880d681SAndroid Build Coastguard Worker
549*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
550*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sadalp2s:
551*9880d681SAndroid Build Coastguard Worker;CHECK: sadalp.2s
552*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
553*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
554*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <2 x i32>, <2 x i32>* %B
555*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i32> %tmp3, %tmp4
556*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp5
557*9880d681SAndroid Build Coastguard Worker}
558*9880d681SAndroid Build Coastguard Worker
559*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
560*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sadalp8h:
561*9880d681SAndroid Build Coastguard Worker;CHECK: sadalp.8h
562*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <16 x i8>, <16 x i8>* %A
563*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
564*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <8 x i16>, <8 x i16>* %B
565*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
566*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp5
567*9880d681SAndroid Build Coastguard Worker}
568*9880d681SAndroid Build Coastguard Worker
569*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
570*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sadalp4s:
571*9880d681SAndroid Build Coastguard Worker;CHECK: sadalp.4s
572*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
573*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
574*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <4 x i32>, <4 x i32>* %B
575*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
576*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp5
577*9880d681SAndroid Build Coastguard Worker}
578*9880d681SAndroid Build Coastguard Worker
579*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
580*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sadalp2d:
581*9880d681SAndroid Build Coastguard Worker;CHECK: sadalp.2d
582*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
583*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
584*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <2 x i64>, <2 x i64>* %B
585*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i64> %tmp3, %tmp4
586*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp5
587*9880d681SAndroid Build Coastguard Worker}
588*9880d681SAndroid Build Coastguard Worker
589*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
590*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uadalp4h:
591*9880d681SAndroid Build Coastguard Worker;CHECK: uadalp.4h
592*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
593*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
594*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <4 x i16>, <4 x i16>* %B
595*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
596*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp5
597*9880d681SAndroid Build Coastguard Worker}
598*9880d681SAndroid Build Coastguard Worker
599*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
600*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uadalp2s:
601*9880d681SAndroid Build Coastguard Worker;CHECK: uadalp.2s
602*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
603*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
604*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <2 x i32>, <2 x i32>* %B
605*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i32> %tmp3, %tmp4
606*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp5
607*9880d681SAndroid Build Coastguard Worker}
608*9880d681SAndroid Build Coastguard Worker
609*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
610*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uadalp8h:
611*9880d681SAndroid Build Coastguard Worker;CHECK: uadalp.8h
612*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <16 x i8>, <16 x i8>* %A
613*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
614*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <8 x i16>, <8 x i16>* %B
615*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
616*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp5
617*9880d681SAndroid Build Coastguard Worker}
618*9880d681SAndroid Build Coastguard Worker
619*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
620*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uadalp4s:
621*9880d681SAndroid Build Coastguard Worker;CHECK: uadalp.4s
622*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
623*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
624*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <4 x i32>, <4 x i32>* %B
625*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
626*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp5
627*9880d681SAndroid Build Coastguard Worker}
628*9880d681SAndroid Build Coastguard Worker
629*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
630*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: uadalp2d:
631*9880d681SAndroid Build Coastguard Worker;CHECK: uadalp.2d
632*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
633*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
634*9880d681SAndroid Build Coastguard Worker        %tmp4 = load <2 x i64>, <2 x i64>* %B
635*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i64> %tmp3, %tmp4
636*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp5
637*9880d681SAndroid Build Coastguard Worker}
638*9880d681SAndroid Build Coastguard Worker
639*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
640*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_8b:
641*9880d681SAndroid Build Coastguard Worker;CHECK: addp.8b
642*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i8>, <8 x i8>* %A
643*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i8>, <8 x i8>* %B
644*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
645*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %tmp3
646*9880d681SAndroid Build Coastguard Worker}
647*9880d681SAndroid Build Coastguard Worker
648*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
649*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_16b:
650*9880d681SAndroid Build Coastguard Worker;CHECK: addp.16b
651*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <16 x i8>, <16 x i8>* %A
652*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <16 x i8>, <16 x i8>* %B
653*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
654*9880d681SAndroid Build Coastguard Worker        ret <16 x i8> %tmp3
655*9880d681SAndroid Build Coastguard Worker}
656*9880d681SAndroid Build Coastguard Worker
657*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
658*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_4h:
659*9880d681SAndroid Build Coastguard Worker;CHECK: addp.4h
660*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i16>, <4 x i16>* %A
661*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i16>, <4 x i16>* %B
662*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
663*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
664*9880d681SAndroid Build Coastguard Worker}
665*9880d681SAndroid Build Coastguard Worker
666*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
667*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_8h:
668*9880d681SAndroid Build Coastguard Worker;CHECK: addp.8h
669*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
670*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
671*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
672*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %tmp3
673*9880d681SAndroid Build Coastguard Worker}
674*9880d681SAndroid Build Coastguard Worker
675*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
676*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_2s:
677*9880d681SAndroid Build Coastguard Worker;CHECK: addp.2s
678*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i32>, <2 x i32>* %A
679*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i32>, <2 x i32>* %B
680*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
681*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
682*9880d681SAndroid Build Coastguard Worker}
683*9880d681SAndroid Build Coastguard Worker
684*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
685*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_4s:
686*9880d681SAndroid Build Coastguard Worker;CHECK: addp.4s
687*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
688*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
689*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
690*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %tmp3
691*9880d681SAndroid Build Coastguard Worker}
692*9880d681SAndroid Build Coastguard Worker
693*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
694*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addp_2d:
695*9880d681SAndroid Build Coastguard Worker;CHECK: addp.2d
696*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
697*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
698*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
699*9880d681SAndroid Build Coastguard Worker        ret <2 x i64> %tmp3
700*9880d681SAndroid Build Coastguard Worker}
701*9880d681SAndroid Build Coastguard Worker
702*9880d681SAndroid Build Coastguard Workerdeclare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
703*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
704*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
705*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
706*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
707*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
708*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
709*9880d681SAndroid Build Coastguard Worker
710*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
711*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: faddp_2s:
712*9880d681SAndroid Build Coastguard Worker;CHECK: faddp.2s
713*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x float>, <2 x float>* %A
714*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x float>, <2 x float>* %B
715*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
716*9880d681SAndroid Build Coastguard Worker        ret <2 x float> %tmp3
717*9880d681SAndroid Build Coastguard Worker}
718*9880d681SAndroid Build Coastguard Worker
719*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
720*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: faddp_4s:
721*9880d681SAndroid Build Coastguard Worker;CHECK: faddp.4s
722*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x float>, <4 x float>* %A
723*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x float>, <4 x float>* %B
724*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
725*9880d681SAndroid Build Coastguard Worker        ret <4 x float> %tmp3
726*9880d681SAndroid Build Coastguard Worker}
727*9880d681SAndroid Build Coastguard Worker
728*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
729*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: faddp_2d:
730*9880d681SAndroid Build Coastguard Worker;CHECK: faddp.2d
731*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x double>, <2 x double>* %A
732*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x double>, <2 x double>* %B
733*9880d681SAndroid Build Coastguard Worker        %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
734*9880d681SAndroid Build Coastguard Worker        ret <2 x double> %tmp3
735*9880d681SAndroid Build Coastguard Worker}
736*9880d681SAndroid Build Coastguard Worker
737*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
738*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
739*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
740*9880d681SAndroid Build Coastguard Worker
741*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
742*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: uaddl2_duprhs
743*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext.16b
744*9880d681SAndroid Build Coastguard Worker; CHECK: uaddl2.2d
745*9880d681SAndroid Build Coastguard Worker  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
746*9880d681SAndroid Build Coastguard Worker  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
747*9880d681SAndroid Build Coastguard Worker
748*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
749*9880d681SAndroid Build Coastguard Worker
750*9880d681SAndroid Build Coastguard Worker  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
751*9880d681SAndroid Build Coastguard Worker  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
752*9880d681SAndroid Build Coastguard Worker
753*9880d681SAndroid Build Coastguard Worker  %res = add <2 x i64> %lhs.ext, %rhs.ext
754*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
755*9880d681SAndroid Build Coastguard Worker}
756*9880d681SAndroid Build Coastguard Worker
757*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
758*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: saddl2_duplhs
759*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext.16b
760*9880d681SAndroid Build Coastguard Worker; CHECK: saddl2.2d
761*9880d681SAndroid Build Coastguard Worker  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
762*9880d681SAndroid Build Coastguard Worker  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
763*9880d681SAndroid Build Coastguard Worker
764*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
765*9880d681SAndroid Build Coastguard Worker
766*9880d681SAndroid Build Coastguard Worker  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
767*9880d681SAndroid Build Coastguard Worker  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
768*9880d681SAndroid Build Coastguard Worker
769*9880d681SAndroid Build Coastguard Worker  %res = add <2 x i64> %lhs.ext, %rhs.ext
770*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
771*9880d681SAndroid Build Coastguard Worker}
772*9880d681SAndroid Build Coastguard Worker
773*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
774*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: usubl2_duprhs
775*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext.16b
776*9880d681SAndroid Build Coastguard Worker; CHECK: usubl2.2d
777*9880d681SAndroid Build Coastguard Worker  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
778*9880d681SAndroid Build Coastguard Worker  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
779*9880d681SAndroid Build Coastguard Worker
780*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
781*9880d681SAndroid Build Coastguard Worker
782*9880d681SAndroid Build Coastguard Worker  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
783*9880d681SAndroid Build Coastguard Worker  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
784*9880d681SAndroid Build Coastguard Worker
785*9880d681SAndroid Build Coastguard Worker  %res = sub <2 x i64> %lhs.ext, %rhs.ext
786*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
787*9880d681SAndroid Build Coastguard Worker}
788*9880d681SAndroid Build Coastguard Worker
789*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
790*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: ssubl2_duplhs
791*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext.16b
792*9880d681SAndroid Build Coastguard Worker; CHECK: ssubl2.2d
793*9880d681SAndroid Build Coastguard Worker  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
794*9880d681SAndroid Build Coastguard Worker  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
795*9880d681SAndroid Build Coastguard Worker
796*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
797*9880d681SAndroid Build Coastguard Worker
798*9880d681SAndroid Build Coastguard Worker  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
799*9880d681SAndroid Build Coastguard Worker  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
800*9880d681SAndroid Build Coastguard Worker
801*9880d681SAndroid Build Coastguard Worker  %res = sub <2 x i64> %lhs.ext, %rhs.ext
802*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
803*9880d681SAndroid Build Coastguard Worker}
804*9880d681SAndroid Build Coastguard Worker
805*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
806*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn8b_natural:
807*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.8b
808*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
809*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
810*9880d681SAndroid Build Coastguard Worker        %sum = add <8 x i16> %tmp1, %tmp2
811*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
812*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
813*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %narrowed
814*9880d681SAndroid Build Coastguard Worker}
815*9880d681SAndroid Build Coastguard Worker
816*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
817*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn4h_natural:
818*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.4h
819*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
820*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
821*9880d681SAndroid Build Coastguard Worker        %sum = add <4 x i32> %tmp1, %tmp2
822*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
823*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
824*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %narrowed
825*9880d681SAndroid Build Coastguard Worker}
826*9880d681SAndroid Build Coastguard Worker
827*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
828*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2s_natural:
829*9880d681SAndroid Build Coastguard Worker;CHECK: addhn.2s
830*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
831*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
832*9880d681SAndroid Build Coastguard Worker        %sum = add <2 x i64> %tmp1, %tmp2
833*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
834*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
835*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %narrowed
836*9880d681SAndroid Build Coastguard Worker}
837*9880d681SAndroid Build Coastguard Worker
838*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
839*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_16b_natural:
840*9880d681SAndroid Build Coastguard Worker;CHECK: addhn2.16b
841*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
842*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
843*9880d681SAndroid Build Coastguard Worker        %sum = add <8 x i16> %tmp1, %tmp2
844*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
845*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
846*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
847*9880d681SAndroid Build Coastguard Worker        ret <16 x i8> %res
848*9880d681SAndroid Build Coastguard Worker}
849*9880d681SAndroid Build Coastguard Worker
850*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
851*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_8h_natural:
852*9880d681SAndroid Build Coastguard Worker;CHECK: addhn2.8h
853*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
854*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
855*9880d681SAndroid Build Coastguard Worker        %sum = add <4 x i32> %tmp1, %tmp2
856*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
857*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
858*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
859*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %res
860*9880d681SAndroid Build Coastguard Worker}
861*9880d681SAndroid Build Coastguard Worker
862*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
863*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: addhn2_4s_natural:
864*9880d681SAndroid Build Coastguard Worker;CHECK: addhn2.4s
865*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
866*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
867*9880d681SAndroid Build Coastguard Worker        %sum = add <2 x i64> %tmp1, %tmp2
868*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
869*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
870*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
871*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %res
872*9880d681SAndroid Build Coastguard Worker}
873*9880d681SAndroid Build Coastguard Worker
874*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
875*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn8b_natural:
876*9880d681SAndroid Build Coastguard Worker;CHECK: subhn.8b
877*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
878*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
879*9880d681SAndroid Build Coastguard Worker        %diff = sub <8 x i16> %tmp1, %tmp2
880*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
881*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
882*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %narrowed
883*9880d681SAndroid Build Coastguard Worker}
884*9880d681SAndroid Build Coastguard Worker
885*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
886*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn4h_natural:
887*9880d681SAndroid Build Coastguard Worker;CHECK: subhn.4h
888*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
889*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
890*9880d681SAndroid Build Coastguard Worker        %diff = sub <4 x i32> %tmp1, %tmp2
891*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
892*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
893*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %narrowed
894*9880d681SAndroid Build Coastguard Worker}
895*9880d681SAndroid Build Coastguard Worker
896*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
897*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn2s_natural:
898*9880d681SAndroid Build Coastguard Worker;CHECK: subhn.2s
899*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
900*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
901*9880d681SAndroid Build Coastguard Worker        %diff = sub <2 x i64> %tmp1, %tmp2
902*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
903*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
904*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %narrowed
905*9880d681SAndroid Build Coastguard Worker}
906*9880d681SAndroid Build Coastguard Worker
907*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
908*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn2_16b_natural:
909*9880d681SAndroid Build Coastguard Worker;CHECK: subhn2.16b
910*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <8 x i16>, <8 x i16>* %A
911*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <8 x i16>, <8 x i16>* %B
912*9880d681SAndroid Build Coastguard Worker        %diff = sub <8 x i16> %tmp1, %tmp2
913*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
914*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
915*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
916*9880d681SAndroid Build Coastguard Worker        ret <16 x i8> %res
917*9880d681SAndroid Build Coastguard Worker}
918*9880d681SAndroid Build Coastguard Worker
919*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
920*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn2_8h_natural:
921*9880d681SAndroid Build Coastguard Worker;CHECK: subhn2.8h
922*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <4 x i32>, <4 x i32>* %A
923*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <4 x i32>, <4 x i32>* %B
924*9880d681SAndroid Build Coastguard Worker        %diff = sub <4 x i32> %tmp1, %tmp2
925*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
926*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
927*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
928*9880d681SAndroid Build Coastguard Worker        ret <8 x i16> %res
929*9880d681SAndroid Build Coastguard Worker}
930*9880d681SAndroid Build Coastguard Worker
931*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
932*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: subhn2_4s_natural:
933*9880d681SAndroid Build Coastguard Worker;CHECK: subhn2.4s
934*9880d681SAndroid Build Coastguard Worker        %tmp1 = load <2 x i64>, <2 x i64>* %A
935*9880d681SAndroid Build Coastguard Worker        %tmp2 = load <2 x i64>, <2 x i64>* %B
936*9880d681SAndroid Build Coastguard Worker        %diff = sub <2 x i64> %tmp1, %tmp2
937*9880d681SAndroid Build Coastguard Worker        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
938*9880d681SAndroid Build Coastguard Worker        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
939*9880d681SAndroid Build Coastguard Worker        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
940*9880d681SAndroid Build Coastguard Worker        ret <4 x i32> %res
941*9880d681SAndroid Build Coastguard Worker}
942