xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; Combine tests involving AVX target shuffles
7*9880d681SAndroid Build Coastguard Worker
8*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8)
9*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8)
10*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8)
11*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8)
12*9880d681SAndroid Build Coastguard Worker
13*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
14*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
15*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
16*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8)
19*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8)
20*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8)
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_identity(<4 x float> %a0) {
23*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_identity:
24*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
25*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
26*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
27*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>  %1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
28*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
29*9880d681SAndroid Build Coastguard Worker}
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movddup(<4 x float> %a0) {
32*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movddup:
33*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
34*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
35*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
36*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
37*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movddup_load(<4 x float> *%a0) {
40*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movddup_load:
41*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
42*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
43*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
44*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float> *%a0
45*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
46*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
47*9880d681SAndroid Build Coastguard Worker}
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movshdup(<4 x float> %a0) {
50*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movshdup:
51*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
52*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
53*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
54*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 undef, i32 1, i32 3, i32 3>)
55*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movsldup(<4 x float> %a0) {
59*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movsldup:
60*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
61*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
62*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
63*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)
64*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
65*9880d681SAndroid Build Coastguard Worker}
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {
68*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_unpckh:
69*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
70*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
71*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
72*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
73*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
74*9880d681SAndroid Build Coastguard Worker}
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {
77*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_unpckl:
78*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
79*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
80*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
81*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
82*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
83*9880d681SAndroid Build Coastguard Worker}
84*9880d681SAndroid Build Coastguard Worker
85*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {
86*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_identity:
87*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
88*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
89*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>)
90*9880d681SAndroid Build Coastguard Worker  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>)
91*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
92*9880d681SAndroid Build Coastguard Worker}
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_10326u4u(<8 x float> %a0) {
95*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_10326u4u:
96*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
97*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
98*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
99*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 0, i32 1, i32 2, i32 undef>)
100*9880d681SAndroid Build Coastguard Worker  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 undef>)
101*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
102*9880d681SAndroid Build Coastguard Worker}
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) {
105*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_vperm2f128_8f32:
106*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
107*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
108*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
109*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
110*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
111*9880d681SAndroid Build Coastguard Worker  %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
112*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %3
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) {
116*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
117*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
118*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
119*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
120*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
121*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
122*9880d681SAndroid Build Coastguard Worker  %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
123*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %3
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) {
127*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
128*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
130*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
131*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
132*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
133*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
134*9880d681SAndroid Build Coastguard Worker  %3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
135*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %3
136*9880d681SAndroid Build Coastguard Worker}
137*9880d681SAndroid Build Coastguard Worker
138*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movddup(<8 x float> %a0) {
139*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movddup:
140*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
141*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
142*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
143*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
144*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movddup_load(<8 x float> *%a0) {
147*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movddup_load:
148*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
149*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
150*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
151*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x float>, <8 x float> *%a0
152*9880d681SAndroid Build Coastguard Worker  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
153*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
154*9880d681SAndroid Build Coastguard Worker}
155*9880d681SAndroid Build Coastguard Worker
156*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movshdup(<8 x float> %a0) {
157*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movshdup:
158*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
159*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
160*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 5, i32 7, i32 7>)
162*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
163*9880d681SAndroid Build Coastguard Worker}
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
166*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movsldup:
167*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
168*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
169*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
170*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>)
171*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
172*9880d681SAndroid Build Coastguard Worker}
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
175*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_2f64_identity:
176*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
177*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
178*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
179*9880d681SAndroid Build Coastguard Worker  %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>  %1, <2 x i64> <i64 2, i64 0>)
180*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %2
181*9880d681SAndroid Build Coastguard Worker}
182*9880d681SAndroid Build Coastguard Worker
183*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
184*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_2f64_movddup:
185*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
186*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
187*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
188*9880d681SAndroid Build Coastguard Worker  %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
189*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %1
190*9880d681SAndroid Build Coastguard Worker}
191*9880d681SAndroid Build Coastguard Worker
192*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) {
193*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f64_identity:
194*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
195*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
196*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
197*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>  %1, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
198*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %2
199*9880d681SAndroid Build Coastguard Worker}
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vpermilvar_4f64_movddup(<4 x double> %a0) {
202*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f64_movddup:
203*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
204*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
205*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
206*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
207*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %1
208*9880d681SAndroid Build Coastguard Worker}
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
211*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_4stage:
212*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
213*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
214*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
215*9880d681SAndroid Build Coastguard Worker  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
216*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>  %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)
217*9880d681SAndroid Build Coastguard Worker  %3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>  %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)
218*9880d681SAndroid Build Coastguard Worker  %4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>  %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
219*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %4
220*9880d681SAndroid Build Coastguard Worker}
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {
223*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_4stage:
224*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
225*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
226*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
228*9880d681SAndroid Build Coastguard Worker  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
229*9880d681SAndroid Build Coastguard Worker  %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
230*9880d681SAndroid Build Coastguard Worker  %4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>  %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
231*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %4
232*9880d681SAndroid Build Coastguard Worker}
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
235*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_as_insertps:
236*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
237*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
238*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
239*9880d681SAndroid Build Coastguard Worker  %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
240*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 1, i32 4>
241*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
242*9880d681SAndroid Build Coastguard Worker}
243