xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/vtrn.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8:
5*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
6*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 d17, d16
9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
12*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
13*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
14*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
15*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
16*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
17*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
18*9880d681SAndroid Build Coastguard Worker}
19*9880d681SAndroid Build Coastguard Worker
20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_Qres:
22*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
29*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
30*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
31*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
32*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
36*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni16:
37*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
39*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 d17, d16
41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 d16, d17, d16
42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
44*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
45*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
46*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
47*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
48*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
49*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp5
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni16_Qres:
54*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 [[LDR0]], [[LDR1]]
58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
61*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
62*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
63*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
64*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
65*9880d681SAndroid Build Coastguard Worker}
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
68*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni32:
69*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
71*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 d17, d16
73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i32 d16, d17, d16
74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
76*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
77*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
78*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
79*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
80*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i32> %tmp3, %tmp4
81*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp5
82*9880d681SAndroid Build Coastguard Worker}
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind {
85*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni32_Qres:
86*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
87*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
88*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
93*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
94*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
95*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
96*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp3
97*9880d681SAndroid Build Coastguard Worker}
98*9880d681SAndroid Build Coastguard Worker
99*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
100*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnf:
101*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
102*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 d17, d16
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.f32 d16, d17, d16
106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
108*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %A
109*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x float>, <2 x float>* %B
110*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
111*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
112*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <2 x float> %tmp3, %tmp4
113*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp5
114*9880d681SAndroid Build Coastguard Worker}
115*9880d681SAndroid Build Coastguard Worker
116*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind {
117*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnf_Qres:
118*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
119*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
120*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
121*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
122*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
125*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %A
126*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x float>, <2 x float>* %B
127*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
128*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp3
129*9880d681SAndroid Build Coastguard Worker}
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
132*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi8:
133*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 q9, q8
137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 q8, q9, q8
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
141*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
142*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
143*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
144*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
145*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <16 x i8> %tmp3, %tmp4
146*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp5
147*9880d681SAndroid Build Coastguard Worker}
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
150*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi8_QQres:
151*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
152*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
153*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
154*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 q9, q8
155*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
158*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
159*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
160*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
161*9880d681SAndroid Build Coastguard Worker	ret <32 x i8> %tmp3
162*9880d681SAndroid Build Coastguard Worker}
163*9880d681SAndroid Build Coastguard Worker
164*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
165*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16:
166*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
167*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
169*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 q9, q8
170*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 q8, q9, q8
171*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
172*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
173*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
174*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
175*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
176*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
177*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
178*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
179*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
180*9880d681SAndroid Build Coastguard Worker}
181*9880d681SAndroid Build Coastguard Worker
182*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
183*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_QQres:
184*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
186*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
187*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 q9, q8
188*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
191*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
192*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
193*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
194*9880d681SAndroid Build Coastguard Worker	ret <16 x i16> %tmp3
195*9880d681SAndroid Build Coastguard Worker}
196*9880d681SAndroid Build Coastguard Worker
197*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
198*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi32:
199*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
200*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
201*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
202*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 q9, q8
203*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i32 q8, q9, q8
204*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
205*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
206*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
207*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
208*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
209*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
210*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
211*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
212*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
213*9880d681SAndroid Build Coastguard Worker}
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
216*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi32_QQres:
217*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
220*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 q9, q8
221*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
222*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
224*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
225*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
226*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
227*9880d681SAndroid Build Coastguard Worker	ret <8 x i32> %tmp3
228*9880d681SAndroid Build Coastguard Worker}
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
231*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQf:
232*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
233*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
234*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
235*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 q9, q8
236*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.f32 q8, q9, q8
237*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
239*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
240*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
241*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
242*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
243*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
244*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <4 x float> %tmp3, %tmp4
245*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp5
246*9880d681SAndroid Build Coastguard Worker}
247*9880d681SAndroid Build Coastguard Worker
248*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
249*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQf_QQres:
250*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
251*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
252*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
253*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.32 q9, q8
254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
257*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
258*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
259*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
260*9880d681SAndroid Build Coastguard Worker	ret <8 x float> %tmp3
261*9880d681SAndroid Build Coastguard Worker}
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker
264*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
265*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_undef:
266*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
267*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d16, [r1]
268*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr d17, [r0]
269*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 d17, d16
270*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i8 d16, d17, d16
271*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
272*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
273*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
274*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
275*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
276*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
277*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
278*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
279*9880d681SAndroid Build Coastguard Worker}
280*9880d681SAndroid Build Coastguard Worker
281*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
282*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_undef_Qres:
283*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
284*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
285*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
286*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
287*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
288*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
289*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
290*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
291*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
292*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
293*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
294*9880d681SAndroid Build Coastguard Worker}
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
297*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_undef:
298*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
299*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
300*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
301*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 q9, q8
302*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vadd.i16 q8, q9, q8
303*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r0, r1, d16
304*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmov r2, r3, d17
305*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
306*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
307*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
308*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
309*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
310*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
311*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
312*9880d681SAndroid Build Coastguard Worker}
313*9880d681SAndroid Build Coastguard Worker
314*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
315*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_undef_QQres:
316*9880d681SAndroid Build Coastguard Worker; CHECK:       @ BB#0:
317*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
318*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
319*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vtrn.16 q9, q8
320*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
321*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
322*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    mov pc, lr
323*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
324*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
325*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
326*9880d681SAndroid Build Coastguard Worker	ret <16 x i16> %tmp3
327*9880d681SAndroid Build Coastguard Worker}
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
330*9880d681SAndroid Build Coastguard Workerentry:
331*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vtrn_lower_shufflemask_undef
332*9880d681SAndroid Build Coastguard Worker  ; CHECK: vtrn
333*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
334*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
335*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
336*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %0
337*9880d681SAndroid Build Coastguard Worker}
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Worker; Here we get a build_vector node, where all the incoming extract_element
340*9880d681SAndroid Build Coastguard Worker; values do modify the type. However, we get different input types, as some of
341*9880d681SAndroid Build Coastguard Worker; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
342*9880d681SAndroid Build Coastguard Worker; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
343*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
344*9880d681SAndroid Build Coastguard Worker                                             <4 x i32> %cmp0, <4 x i32> %cmp1,
345*9880d681SAndroid Build Coastguard Worker                                             <4 x i16> %cmp2, <4 x i16> %cmp3) {
346*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vtrn_mismatched_builvector0
347*9880d681SAndroid Build Coastguard Worker  ; CHECK: vmovn.i32
348*9880d681SAndroid Build Coastguard Worker  ; CHECK: vtrn
349*9880d681SAndroid Build Coastguard Worker  ; CHECK: vbsl
350*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
351*9880d681SAndroid Build Coastguard Worker  %c1 = icmp ult <4 x i16> %cmp2, %cmp3
352*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
353*9880d681SAndroid Build Coastguard Worker  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
354*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %rv
355*9880d681SAndroid Build Coastguard Worker}
356*9880d681SAndroid Build Coastguard Worker
357*9880d681SAndroid Build Coastguard Worker; Here we get a build_vector node, where half the incoming extract_element
358*9880d681SAndroid Build Coastguard Worker; values do not modify the type (the values form cmp2), but half of them do
359*9880d681SAndroid Build Coastguard Worker; (from the icmp operation).
360*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
361*9880d681SAndroid Build Coastguard Worker                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
362*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: vtrn_mismatched_builvector1
363*9880d681SAndroid Build Coastguard Worker  ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
364*9880d681SAndroid Build Coastguard Worker  ; CHECK: vmovl
365*9880d681SAndroid Build Coastguard Worker  ; CHECK: vtrn.8
366*9880d681SAndroid Build Coastguard Worker  ; CHECK: vbsl
367*9880d681SAndroid Build Coastguard Worker  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
368*9880d681SAndroid Build Coastguard Worker  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
369*9880d681SAndroid Build Coastguard Worker  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
370*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
371*9880d681SAndroid Build Coastguard Worker  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
372*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %rv
373*9880d681SAndroid Build Coastguard Worker}
374*9880d681SAndroid Build Coastguard Worker
375*9880d681SAndroid Build Coastguard Worker; Negative test that should not generate a vtrn
376*9880d681SAndroid Build Coastguard Workerdefine void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
377*9880d681SAndroid Build Coastguard Workerentry:
378*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: lower_twice_no_vtrn
379*9880d681SAndroid Build Coastguard Worker  ; CHECK: @ BB#0:
380*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
381*9880d681SAndroid Build Coastguard Worker  ; CHECK: mov pc, lr
382*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
383*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
384*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
385*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %0, <8 x i16>* %C
386*9880d681SAndroid Build Coastguard Worker  ret void
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Worker; Negative test that should not generate a vtrn
390*9880d681SAndroid Build Coastguard Workerdefine void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
391*9880d681SAndroid Build Coastguard Workerentry:
392*9880d681SAndroid Build Coastguard Worker  ; CHECK-LABEL: upper_twice_no_vtrn
393*9880d681SAndroid Build Coastguard Worker  ; CHECK: @ BB#0:
394*9880d681SAndroid Build Coastguard Worker  ; CHECK-NOT: vtrn
395*9880d681SAndroid Build Coastguard Worker  ; CHECK: mov pc, lr
396*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
397*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
398*9880d681SAndroid Build Coastguard Worker  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
399*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %0, <8 x i16>* %C
400*9880d681SAndroid Build Coastguard Worker  ret void
401*9880d681SAndroid Build Coastguard Worker}
402