xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/vzip.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi8:
5*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
6*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 d17, d16
9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
12*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
13*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
14*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
15*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
16*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
17*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
18*9880d681SAndroid Build Coastguard Worker}
19*9880d681SAndroid Build Coastguard Worker
20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi8_Qres:
22*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
29*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
30*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
31*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
32*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
36*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi16:
37*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
39*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.16 d17, d16
41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 d16, d17, d16
42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
44*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
45*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
46*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
47*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
48*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
49*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp5
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi16_Qres:
54*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.16 [[LDR0]], [[LDR1]]
58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
61*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
62*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
63*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
64*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
65*9880d681SAndroid Build Coastguard Worker}
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Worker; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi8:
71*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 q9, q8
75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 q8, q9, q8
76*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
77*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
79*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
80*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
81*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
82*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
83*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <16 x i8> %tmp3, %tmp4
84*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp5
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
88*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi8_QQres:
89*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 q9, q8
93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
95*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
96*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
97*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
98*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
99*9880d681SAndroid Build Coastguard Worker	ret <32 x i8> %tmp3
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
103*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi16:
104*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.16 q9, q8
108*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 q8, q9, q8
109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
110*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
111*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
112*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
113*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
114*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
115*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
116*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
117*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
118*9880d681SAndroid Build Coastguard Worker}
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
121*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi16_QQres:
122*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
125*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.16 q9, q8
126*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
127*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
129*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
130*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
131*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
132*9880d681SAndroid Build Coastguard Worker	ret <16 x i16> %tmp3
133*9880d681SAndroid Build Coastguard Worker}
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
136*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi32:
137*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.32 q9, q8
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i32 q8, q9, q8
142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
145*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
146*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
147*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
148*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
149*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
150*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
151*9880d681SAndroid Build Coastguard Worker}
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
154*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi32_QQres:
155*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
158*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.32 q9, q8
159*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
160*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
162*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
163*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
164*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
165*9880d681SAndroid Build Coastguard Worker	ret <8 x i32> %tmp3
166*9880d681SAndroid Build Coastguard Worker}
167*9880d681SAndroid Build Coastguard Worker
168*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
169*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQf:
170*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
171*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
172*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
173*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.32 q9, q8
174*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.f32 q8, q9, q8
175*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
176*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
178*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
179*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
180*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
181*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
182*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <4 x float> %tmp3, %tmp4
183*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp5
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQf_QQres:
188*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
191*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.32 q9, q8
192*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
193*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
195*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
196*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
197*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
198*9880d681SAndroid Build Coastguard Worker	ret <8 x float> %tmp3
199*9880d681SAndroid Build Coastguard Worker}
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Worker; Undef shuffle indices should not prevent matching to VZIP:
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
204*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi8_undef:
205*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
206*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
207*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
208*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 d17, d16
209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
211*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
212*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
213*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
214*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
215*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
216*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
217*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
218*9880d681SAndroid Build Coastguard Worker}
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
221*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipi8_undef_Qres:
222*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
224*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
225*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
226*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
227*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
228*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
229*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
230*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
231*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
232*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
233*9880d681SAndroid Build Coastguard Worker}
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi8_undef:
237*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
239*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
240*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 q9, q8
241*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 q8, q9, q8
242*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
244*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
245*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
246*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
247*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
248*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
249*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <16 x i8> %tmp3, %tmp4
250*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp5
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
254*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vzipQi8_undef_QQres:
255*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
257*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
258*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzip.8 q9, q8
259*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
260*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
261*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
262*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
263*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
264*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
265*9880d681SAndroid Build Coastguard Worker	ret <32 x i8> %tmp3
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
269*9880d681SAndroid Build Coastguard Workerentry:
270*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vzip_lower_shufflemask_undef
271*9880d681SAndroid Build Coastguard Worker  ; CHECK: vzip
272*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
273*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
274*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
275*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %0
276*9880d681SAndroid Build Coastguard Worker}
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
279*9880d681SAndroid Build Coastguard Workerentry:
280*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vzip_lower_shufflemask_zeroed
281*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
282*9880d681SAndroid Build Coastguard Worker  ; CHECK: vzip
283*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
284*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
285*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %0
286*9880d681SAndroid Build Coastguard Worker}
287*9880d681SAndroid Build Coastguard Worker
288*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
289*9880d681SAndroid Build Coastguard Workerentry:
290*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vzip_lower_shufflemask_vuzp
291*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vuzp
292*9880d681SAndroid Build Coastguard Worker  ; CHECK: vzip
293*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
294*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
295*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %0
296*9880d681SAndroid Build Coastguard Worker}
297*9880d681SAndroid Build Coastguard Worker
298*9880d681SAndroid Build Coastguard Workerdefine void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
299*9880d681SAndroid Build Coastguard Workerentry:
300*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
301*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
302*9880d681SAndroid Build Coastguard Worker  ; CHECK: vzip
303*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
304*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
305*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %0, <4 x i32>* %B
306*9880d681SAndroid Build Coastguard Worker  ret void
307*9880d681SAndroid Build Coastguard Worker}
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Workerdefine void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) {
310*9880d681SAndroid Build Coastguard Workerentry:
311*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vzip_vext_factor
312*9880d681SAndroid Build Coastguard Worker  ; CHECK: vext.16 d16, d16, d17, #3
313*9880d681SAndroid Build Coastguard Worker  ; CHECK: vzip
314*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
315*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3>
316*9880d681SAndroid Build Coastguard Worker  store <4 x i16> %0, <4 x i16>* %B
317*9880d681SAndroid Build Coastguard Worker  ret void
318*9880d681SAndroid Build Coastguard Worker}
319