xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
6*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
7*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
8*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
9*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32)
10*9880d681SAndroid Build Coastguard Workerdeclare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16)
11*9880d681SAndroid Build Coastguard Worker
12*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>)
13*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>)
14*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>)
15*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
16*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
17*9880d681SAndroid Build Coastguard Workerdeclare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16)
18*9880d681SAndroid Build Coastguard Worker
19*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>)
20*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>)
21*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>)
22*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
23*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
24*9880d681SAndroid Build Coastguard Workerdeclare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16)
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Worker;-----------------------------------------------------------------------------
27*9880d681SAndroid Build Coastguard Worker; RDMA Vector
28*9880d681SAndroid Build Coastguard Worker; test for SIMDThreeSameVectorSQRDMLxHTiedHS
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
31*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v4i16:
32*9880d681SAndroid Build Coastguard Worker   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
33*9880d681SAndroid Build Coastguard Worker   %retval =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc,  <4 x i16> %prod)
34*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.4h
35*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.4h, v1.4h, v2.4h
36*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4h v0,    v1,    v2
37*9880d681SAndroid Build Coastguard Worker   ret <4 x i16> %retval
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
41*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v8i16:
42*9880d681SAndroid Build Coastguard Worker   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
43*9880d681SAndroid Build Coastguard Worker   %retval =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
44*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.8h
45*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.8h, v1.8h, v2.8h
46*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2
47*9880d681SAndroid Build Coastguard Worker   ret <8 x i16> %retval
48*9880d681SAndroid Build Coastguard Worker}
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
51*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v2i32:
52*9880d681SAndroid Build Coastguard Worker   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
53*9880d681SAndroid Build Coastguard Worker   %retval =  call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
54*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.2s
55*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.2s, v1.2s, v2.2s
56*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.2s v0,    v1,    v2
57*9880d681SAndroid Build Coastguard Worker   ret <2 x i32> %retval
58*9880d681SAndroid Build Coastguard Worker}
59*9880d681SAndroid Build Coastguard Worker
60*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
61*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v4i32:
62*9880d681SAndroid Build Coastguard Worker   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
63*9880d681SAndroid Build Coastguard Worker   %retval =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
64*9880d681SAndroid Build Coastguard Worker; CHECK-V81:        sqrdmulh    v1.4s, v1.4s, v2.4s
65*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.4s, v1.4s, v2.4s
66*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4s v0,    v1,    v2
67*9880d681SAndroid Build Coastguard Worker   ret <4 x i32> %retval
68*9880d681SAndroid Build Coastguard Worker}
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
71*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v4i16:
72*9880d681SAndroid Build Coastguard Worker   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
73*9880d681SAndroid Build Coastguard Worker   %retval =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
74*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.4h
75*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.4h, v1.4h, v2.4h
76*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4h v0,    v1,    v2
77*9880d681SAndroid Build Coastguard Worker   ret <4 x i16> %retval
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
81*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v8i16:
82*9880d681SAndroid Build Coastguard Worker   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
83*9880d681SAndroid Build Coastguard Worker   %retval =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
84*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.8h
85*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.8h, v1.8h, v2.8h
86*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.8h v0,    v1,    v2
87*9880d681SAndroid Build Coastguard Worker   ret <8 x i16> %retval
88*9880d681SAndroid Build Coastguard Worker}
89*9880d681SAndroid Build Coastguard Worker
90*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
91*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v2i32:
92*9880d681SAndroid Build Coastguard Worker   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
93*9880d681SAndroid Build Coastguard Worker   %retval =  call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
94*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.2s
95*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.2s, v1.2s, v2.2s
96*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.2s v0,    v1,    v2
97*9880d681SAndroid Build Coastguard Worker   ret <2 x i32> %retval
98*9880d681SAndroid Build Coastguard Worker}
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v4i32:
102*9880d681SAndroid Build Coastguard Worker   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
103*9880d681SAndroid Build Coastguard Worker   %retval =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
104*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.4s
105*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.4s, v1.4s, v2.4s
106*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4s v0,    v1,    v2
107*9880d681SAndroid Build Coastguard Worker   ret <4 x i32> %retval
108*9880d681SAndroid Build Coastguard Worker}
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Worker;-----------------------------------------------------------------------------
111*9880d681SAndroid Build Coastguard Worker; RDMA Vector, by element
112*9880d681SAndroid Build Coastguard Worker; tests for vXiYY_indexed in SIMDIndexedSQRDMLxHSDTied
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
115*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_lane_s16:
116*9880d681SAndroid Build Coastguard Workerentry:
117*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
118*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
119*9880d681SAndroid Build Coastguard Worker  %retval =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
120*9880d681SAndroid Build Coastguard Worker; CHECK-V8a :       sqrdmulh    v1.4h, v1.4h, v2.h[3]
121*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.4h, v1.4h, v2.h[3]
122*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4h v0,    v1,    v2[3]
123*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %retval
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
127*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlahq_lane_s16:
128*9880d681SAndroid Build Coastguard Workerentry:
129*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
130*9880d681SAndroid Build Coastguard Worker  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
131*9880d681SAndroid Build Coastguard Worker  %retval =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
132*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.h[2]
133*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.8h, v1.8h, v2.h[2]
134*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.8h v0,    v1,    v2[2]
135*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %retval
136*9880d681SAndroid Build Coastguard Worker}
137*9880d681SAndroid Build Coastguard Worker
138*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
139*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_lane_s32:
140*9880d681SAndroid Build Coastguard Workerentry:
141*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
142*9880d681SAndroid Build Coastguard Worker  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
143*9880d681SAndroid Build Coastguard Worker  %retval =  call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
144*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.s[1]
145*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.2s, v1.2s, v2.s[1]
146*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.2s v0,    v1,    v2[1]
147*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %retval
148*9880d681SAndroid Build Coastguard Worker}
149*9880d681SAndroid Build Coastguard Worker
150*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
151*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlahq_lane_s32:
152*9880d681SAndroid Build Coastguard Workerentry:
153*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
154*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
155*9880d681SAndroid Build Coastguard Worker  %retval =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
156*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.s[0]
157*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v0.4s, v1.4s, v2.s[0]
158*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4s v0,    v1,    v2[0]
159*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %retval
160*9880d681SAndroid Build Coastguard Worker}
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
163*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_lane_s16:
164*9880d681SAndroid Build Coastguard Workerentry:
165*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
166*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
167*9880d681SAndroid Build Coastguard Worker  %retval =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
168*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.h[3]
169*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.4h, v1.4h, v2.h[3]
170*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4h v0,    v1,    v2[3]
171*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %retval
172*9880d681SAndroid Build Coastguard Worker}
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
175*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlshq_lane_s16:
176*9880d681SAndroid Build Coastguard Workerentry:
177*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
178*9880d681SAndroid Build Coastguard Worker  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
179*9880d681SAndroid Build Coastguard Worker  %retval =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
180*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.h[2]
181*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.8h, v1.8h, v2.h[2]
182*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.8h v0,    v1,    v2[2]
183*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %retval
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_lane_s32:
188*9880d681SAndroid Build Coastguard Workerentry:
189*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
190*9880d681SAndroid Build Coastguard Worker  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
191*9880d681SAndroid Build Coastguard Worker  %retval =  call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
192*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.s[1]
193*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.2s, v1.2s, v2.s[1]
194*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.2s v0,    v1,    v2[1]
195*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %retval
196*9880d681SAndroid Build Coastguard Worker}
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
199*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlshq_lane_s32:
200*9880d681SAndroid Build Coastguard Workerentry:
201*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
202*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
203*9880d681SAndroid Build Coastguard Worker  %retval =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
204*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.s[0]
205*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v0.4s, v1.4s, v2.s[0]
206*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4s v0,    v1,    v2[0]
207*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %retval
208*9880d681SAndroid Build Coastguard Worker}
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Worker;-----------------------------------------------------------------------------
211*9880d681SAndroid Build Coastguard Worker; RDMA Vector, by element, extracted
212*9880d681SAndroid Build Coastguard Worker; i16 tests are for vXi16_indexed in SIMDIndexedSQRDMLxHSDTied, with IR in ACLE style
213*9880d681SAndroid Build Coastguard Worker; i32 tests are for   "def : Pat" in SIMDIndexedSQRDMLxHSDTied
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
216*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
217*9880d681SAndroid Build Coastguard Workerentry:
218*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
219*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
220*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
221*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
222*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i16> %retval_vec, i64 0
223*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, v0.4h, v1.h[1]
224*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    {{v[2-9]+}}.4h, v0.4h, v1.h[1]
225*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4h {{v[2-9]+}},    v0,    v1[1]
226*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
230*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s16:
231*9880d681SAndroid Build Coastguard Workerentry:
232*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
233*9880d681SAndroid Build Coastguard Worker  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
234*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
235*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
236*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <8 x i16> %retval_vec, i64 0
237*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, v0.8h, v1.h[1]
238*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    {{v[2-9]+}}.8h, v0.8h, v1.h[1]
239*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.8h {{v[2-9]+}},    v0,    v1[1]
240*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
241*9880d681SAndroid Build Coastguard Worker}
242*9880d681SAndroid Build Coastguard Worker
243*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
244*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
245*9880d681SAndroid Build Coastguard Workerentry:
246*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
247*9880d681SAndroid Build Coastguard Worker  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
248*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <2 x i32> %prod, i64 0
249*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
250*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v0.2s, v0.2s, v1.s[0]
251*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v2.2s, v0.2s, v1.s[0]
252*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.2s v2,    v0,    v1[0]
253*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
254*9880d681SAndroid Build Coastguard Worker}
255*9880d681SAndroid Build Coastguard Worker
256*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
257*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32:
258*9880d681SAndroid Build Coastguard Workerentry:
259*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
260*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
261*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <4 x i32> %prod, i64 0
262*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
263*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v0.4s, v0.4s, v1.s[0]
264*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    v2.4s, v0.4s, v1.s[0]
265*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4s v2,    v0,    v1[0]
266*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
267*9880d681SAndroid Build Coastguard Worker}
268*9880d681SAndroid Build Coastguard Worker
269*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
270*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
271*9880d681SAndroid Build Coastguard Workerentry:
272*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
273*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
274*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
275*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
276*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i16> %retval_vec, i64 0
277*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, v0.4h, v1.h[1]
278*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    {{v[2-9]+}}.4h, v0.4h, v1.h[1]
279*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4h {{v[2-9]+}},    v0,    v1[1]
280*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
281*9880d681SAndroid Build Coastguard Worker}
282*9880d681SAndroid Build Coastguard Worker
283*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
284*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s16:
285*9880d681SAndroid Build Coastguard Workerentry:
286*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
287*9880d681SAndroid Build Coastguard Worker  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
288*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
289*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
290*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <8 x i16> %retval_vec, i64 0
291*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, v0.8h, v1.h[1]
292*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    {{v[2-9]+}}.8h, v0.8h, v1.h[1]
293*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.8h {{v[2-9]+}},    v0,    v1[1]
294*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
295*9880d681SAndroid Build Coastguard Worker}
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
298*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
299*9880d681SAndroid Build Coastguard Workerentry:
300*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
301*9880d681SAndroid Build Coastguard Worker  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
302*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <2 x i32> %prod, i64 0
303*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
304*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v0.2s, v0.2s, v1.s[0]
305*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v2.2s, v0.2s, v1.s[0]
306*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.2s v2,    v0,    v1[0]
307*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
308*9880d681SAndroid Build Coastguard Worker}
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
311*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32:
312*9880d681SAndroid Build Coastguard Workerentry:
313*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
314*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
315*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <4 x i32> %prod, i64 0
316*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
317*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    v0.4s, v0.4s, v1.s[0]
318*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    v2.4s, v0.4s, v1.s[0]
319*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4s v2,    v0,    v1[0]
320*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
321*9880d681SAndroid Build Coastguard Worker}
322*9880d681SAndroid Build Coastguard Worker
323*9880d681SAndroid Build Coastguard Worker;-----------------------------------------------------------------------------
324*9880d681SAndroid Build Coastguard Worker; RDMA Scalar
325*9880d681SAndroid Build Coastguard Worker; test for "def : Pat" near SIMDThreeScalarHSTied in AArch64InstInfo.td
326*9880d681SAndroid Build Coastguard Worker
327*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
328*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v1i16:
329*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
330*9880d681SAndroid Build Coastguard Worker  %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
331*9880d681SAndroid Build Coastguard Worker  %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec,  <4 x i16> %y_vec)
332*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
333*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec,  <4 x i16> %prod_vec)
334*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i16> %retval_vec, i64 0
335*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
336*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
337*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
338*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
339*9880d681SAndroid Build Coastguard Worker}
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
342*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_v1i32:
343*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
344*9880d681SAndroid Build Coastguard Worker  %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
345*9880d681SAndroid Build Coastguard Worker  %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
346*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
347*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc_vec,  <4 x i32> %prod_vec)
348*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i32> %retval_vec, i64 0
349*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
350*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
351*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4s {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
352*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
353*9880d681SAndroid Build Coastguard Worker}
354*9880d681SAndroid Build Coastguard Worker
355*9880d681SAndroid Build Coastguard Worker
356*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
357*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v1i16:
358*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
359*9880d681SAndroid Build Coastguard Worker  %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
360*9880d681SAndroid Build Coastguard Worker  %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec,  <4 x i16> %y_vec)
361*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
362*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec,  <4 x i16> %prod_vec)
363*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i16> %retval_vec, i64 0
364*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
365*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
366*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4h {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
367*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
368*9880d681SAndroid Build Coastguard Worker}
369*9880d681SAndroid Build Coastguard Worker
370*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
371*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_v1i32:
372*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
373*9880d681SAndroid Build Coastguard Worker  %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
374*9880d681SAndroid Build Coastguard Worker  %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
375*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
376*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc_vec,  <4 x i32> %prod_vec)
377*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i32> %retval_vec, i64 0
378*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
379*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
380*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.4s {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
381*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
382*9880d681SAndroid Build Coastguard Worker}
383*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) {
384*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_i32:
385*9880d681SAndroid Build Coastguard Worker  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %rhs)
386*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc,  i32 %prod)
387*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
388*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
389*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
390*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
391*9880d681SAndroid Build Coastguard Worker}
392*9880d681SAndroid Build Coastguard Worker
393*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) {
394*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_i32:
395*9880d681SAndroid Build Coastguard Worker  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %rhs)
396*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc,  i32 %prod)
397*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
398*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
399*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
400*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
401*9880d681SAndroid Build Coastguard Worker}
402*9880d681SAndroid Build Coastguard Worker
403*9880d681SAndroid Build Coastguard Worker;-----------------------------------------------------------------------------
404*9880d681SAndroid Build Coastguard Worker; RDMA Scalar, by element
405*9880d681SAndroid Build Coastguard Worker; i16 tests are performed via tests in above chapter, with IR in ACLE style
406*9880d681SAndroid Build Coastguard Worker; i32 tests are for i32_indexed in SIMDIndexedSQRDMLxHSDTied
407*9880d681SAndroid Build Coastguard Worker
408*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
409*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_extract_i16:
410*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %y_vec, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
411*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
412*9880d681SAndroid Build Coastguard Worker  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %shuffle)
413*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
414*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
415*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <4 x i16> %retval_vec, i32 0
416*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
417*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
418*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}},    {{v[0-9]+}}, v0[1]
419*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
420*9880d681SAndroid Build Coastguard Worker}
421*9880d681SAndroid Build Coastguard Worker
422*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
423*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlah_extract_i32:
424*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <4 x i32> %rhs, i32 3
425*9880d681SAndroid Build Coastguard Worker  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %extract)
426*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc,  i32 %prod)
427*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
428*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlah   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
429*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
430*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
431*9880d681SAndroid Build Coastguard Worker}
432*9880d681SAndroid Build Coastguard Worker
433*9880d681SAndroid Build Coastguard Workerdefine i16 @test_sqrdmlshq_extract_i16(i16 %acc, i16 %x, <8 x i16> %y_vec) {
434*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlshq_extract_i16:
435*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %y_vec, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1>
436*9880d681SAndroid Build Coastguard Worker  %x_vec = insertelement <8 x i16> undef, i16 %x, i64 0
437*9880d681SAndroid Build Coastguard Worker  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x_vec, <8 x i16> %shuffle)
438*9880d681SAndroid Build Coastguard Worker  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
439*9880d681SAndroid Build Coastguard Worker  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
440*9880d681SAndroid Build Coastguard Worker  %retval = extractelement <8 x i16> %retval_vec, i32 0
441*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
442*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
443*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.8h {{v[0-9]+}},    {{v[0-9]+}}, v0[1]
444*9880d681SAndroid Build Coastguard Worker  ret i16 %retval
445*9880d681SAndroid Build Coastguard Worker}
446*9880d681SAndroid Build Coastguard Worker
447*9880d681SAndroid Build Coastguard Workerdefine i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
448*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_sqrdmlsh_extract_i32:
449*9880d681SAndroid Build Coastguard Worker  %extract = extractelement <4 x i32> %rhs, i32 3
450*9880d681SAndroid Build Coastguard Worker  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %extract)
451*9880d681SAndroid Build Coastguard Worker  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc,  i32 %prod)
452*9880d681SAndroid Build Coastguard Worker; CHECK-V8a:        sqrdmulh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
453*9880d681SAndroid Build Coastguard Worker; CHECK-V81a:       sqrdmlsh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
454*9880d681SAndroid Build Coastguard Worker; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
455*9880d681SAndroid Build Coastguard Worker  ret i32 %retval
456*9880d681SAndroid Build Coastguard Worker}
457