xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/vuzp.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8:
5*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
6*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 d17, d16
9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
12*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
13*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
14*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
15*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
16*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
17*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
18*9880d681SAndroid Build Coastguard Worker}
19*9880d681SAndroid Build Coastguard Worker
20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_Qres:
22*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
29*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
30*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
31*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
32*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
36*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi16:
37*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
39*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 d17, d16
41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 d16, d17, d16
42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
44*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
45*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
46*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
47*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
48*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
49*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp5
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi16_Qres:
54*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 [[LDR0]], [[LDR1]]
58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
61*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
62*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
63*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
64*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
65*9880d681SAndroid Build Coastguard Worker}
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Worker; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi8:
71*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 q9, q8
75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 q8, q9, q8
76*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
77*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
79*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
80*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
81*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
82*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
83*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <16 x i8> %tmp3, %tmp4
84*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp5
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
88*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi8_QQres:
89*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 q9, q8
93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
95*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
96*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
97*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
98*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
99*9880d681SAndroid Build Coastguard Worker	ret <32 x i8> %tmp3
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
103*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16:
104*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 q9, q8
108*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 q8, q9, q8
109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
110*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
111*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
112*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
113*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
114*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
115*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
116*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
117*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
118*9880d681SAndroid Build Coastguard Worker}
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
121*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_QQres:
122*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
125*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 q9, q8
126*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
127*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
129*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
130*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
131*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
132*9880d681SAndroid Build Coastguard Worker	ret <16 x i16> %tmp3
133*9880d681SAndroid Build Coastguard Worker}
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
136*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi32:
137*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.32 q9, q8
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i32 q8, q9, q8
142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
145*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
146*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
147*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
148*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
149*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
150*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
151*9880d681SAndroid Build Coastguard Worker}
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
154*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi32_QQres:
155*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
158*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.32 q9, q8
159*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
160*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
162*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
163*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
164*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
165*9880d681SAndroid Build Coastguard Worker	ret <8 x i32> %tmp3
166*9880d681SAndroid Build Coastguard Worker}
167*9880d681SAndroid Build Coastguard Worker
168*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
169*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQf:
170*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
171*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
172*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
173*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.32 q9, q8
174*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.f32 q8, q9, q8
175*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
176*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
178*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
179*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
180*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
181*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
182*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <4 x float> %tmp3, %tmp4
183*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp5
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQf_QQres:
188*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
191*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.32 q9, q8
192*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
193*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
195*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
196*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
197*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
198*9880d681SAndroid Build Coastguard Worker	ret <8 x float> %tmp3
199*9880d681SAndroid Build Coastguard Worker}
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Worker; Undef shuffle indices should not prevent matching to VUZP:
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
204*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_undef:
205*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
206*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
207*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
208*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 d17, d16
209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
211*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
212*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
213*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
214*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
215*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
216*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
217*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
218*9880d681SAndroid Build Coastguard Worker}
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
221*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_undef_Qres:
222*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
224*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
225*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
226*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
227*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
228*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
229*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
230*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
231*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
232*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
233*9880d681SAndroid Build Coastguard Worker}
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_undef:
237*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
239*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
240*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 q9, q8
241*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 q8, q9, q8
242*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
244*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
245*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
246*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
247*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
248*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
249*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
250*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
254*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_undef_QQres:
255*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
257*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
258*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vuzp.16 q9, q8
259*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
260*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
261*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
262*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
263*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
264*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
265*9880d681SAndroid Build Coastguard Worker	ret <16 x i16> %tmp3
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
269*9880d681SAndroid Build Coastguard Workerentry:
270*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vuzp_lower_shufflemask_undef
271*9880d681SAndroid Build Coastguard Worker  ; CHECK: vuzp
272*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
273*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
274*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
275*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %0
276*9880d681SAndroid Build Coastguard Worker}
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
279*9880d681SAndroid Build Coastguard Workerentry:
280*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
281*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
282*9880d681SAndroid Build Coastguard Worker  ; CHECK: vuzp
283*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
284*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
285*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
286*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %0
287*9880d681SAndroid Build Coastguard Worker}
288*9880d681SAndroid Build Coastguard Worker
289*9880d681SAndroid Build Coastguard Workerdefine void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
290*9880d681SAndroid Build Coastguard Workerentry:
291*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
292*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
293*9880d681SAndroid Build Coastguard Worker  ; CHECK: vuzp
294*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
295*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
296*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
297*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %0, <4 x i32>* %C
298*9880d681SAndroid Build Coastguard Worker  ret void
299*9880d681SAndroid Build Coastguard Worker}
300*9880d681SAndroid Build Coastguard Worker
301*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
302*9880d681SAndroid Build Coastguard Worker; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
303*9880d681SAndroid Build Coastguard Worker; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
304*9880d681SAndroid Build Coastguard Worker; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
305*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc
306*9880d681SAndroid Build Coastguard Worker; CHECK: vmovn.i32
307*9880d681SAndroid Build Coastguard Worker; CHECK: vmovn.i32
308*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp
309*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl
310*9880d681SAndroid Build Coastguard Worker  %c = icmp ult <8 x i32> %cmp0, %cmp1
311*9880d681SAndroid Build Coastguard Worker  %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
312*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %res
313*9880d681SAndroid Build Coastguard Worker}
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Worker; Shuffle the result from the compare with a <4 x i8>.
316*9880d681SAndroid Build Coastguard Worker; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able
317*9880d681SAndroid Build Coastguard Worker; to perform the vuzp and get the vbsl mask.
318*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
319*9880d681SAndroid Build Coastguard Worker                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
320*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle
321*9880d681SAndroid Build Coastguard Worker; CHECK: vmovl
322*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp
323*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl
324*9880d681SAndroid Build Coastguard Worker  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
325*9880d681SAndroid Build Coastguard Worker  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
326*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
327*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
328*9880d681SAndroid Build Coastguard Worker  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
329*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %rv
330*9880d681SAndroid Build Coastguard Worker}
331*9880d681SAndroid Build Coastguard Worker
332*9880d681SAndroid Build Coastguard Worker; Use an undef value for the <4 x i8> that is being shuffled with the compare result.
333*9880d681SAndroid Build Coastguard Worker; This produces a build_vector with some of the operands undefs.
334*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
335*9880d681SAndroid Build Coastguard Worker                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
336*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
337*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp
338*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl
339*9880d681SAndroid Build Coastguard Worker  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
340*9880d681SAndroid Build Coastguard Worker  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
341*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
342*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
343*9880d681SAndroid Build Coastguard Worker  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
344*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %rv
345*9880d681SAndroid Build Coastguard Worker}
346*9880d681SAndroid Build Coastguard Worker
347*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
348*9880d681SAndroid Build Coastguard Worker                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
349*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
350*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp
351*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl
352*9880d681SAndroid Build Coastguard Worker  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
353*9880d681SAndroid Build Coastguard Worker  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
354*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
355*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
356*9880d681SAndroid Build Coastguard Worker  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
357*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %rv
358*9880d681SAndroid Build Coastguard Worker}
359*9880d681SAndroid Build Coastguard Worker
360*9880d681SAndroid Build Coastguard Worker; We're using large data types here, and we have to fill with undef values until we
361*9880d681SAndroid Build Coastguard Worker; get some vector size that we can represent.
362*9880d681SAndroid Build Coastguard Workerdefine <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
363*9880d681SAndroid Build Coastguard Worker                            <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {
364*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_wide_type
365*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl
366*9880d681SAndroid Build Coastguard Worker  %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
367*9880d681SAndroid Build Coastguard Worker  %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
368*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <5 x i32> %cmp0, %cmp1
369*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
370*9880d681SAndroid Build Coastguard Worker  %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
371*9880d681SAndroid Build Coastguard Worker  ret <10 x i8> %rv
372*9880d681SAndroid Build Coastguard Worker}
373