xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
6*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
7*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_pslldq:
11*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
12*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
13*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
14*9880d681SAndroid Build Coastguard Worker  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
17*9880d681SAndroid Build Coastguard Worker}
18*9880d681SAndroid Build Coastguard Worker
19*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_psrldq:
21*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
24*9880d681SAndroid Build Coastguard Worker  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_vpermd:
31*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
32*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker  %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %tmp2
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_vpermps:
42*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
45*9880d681SAndroid Build Coastguard Worker  %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %tmp2
49*9880d681SAndroid Build Coastguard Worker}
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
52*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128:
53*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
57*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <4 x i64> %1 to <32 x i8>
59*9880d681SAndroid Build Coastguard Worker  %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
60*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <32 x i8> %3 to <4 x i64>
61*9880d681SAndroid Build Coastguard Worker  %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
62*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %5
63*9880d681SAndroid Build Coastguard Worker}
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
66*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permq_pshufb_as_vpblendd:
67*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
68*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpxor %ymm1, %ymm1, %ymm1
69*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
71*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
72*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <4 x i64> %1 to <32 x i8>
73*9880d681SAndroid Build Coastguard Worker  %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
74*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %3
75*9880d681SAndroid Build Coastguard Worker}
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
78*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
79*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
80*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
81*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
82*9880d681SAndroid Build Coastguard Worker  %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
83*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %1
84*9880d681SAndroid Build Coastguard Worker}
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
87*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
88*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
92*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
93*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <4 x i64> %1 to <32 x i8>
94*9880d681SAndroid Build Coastguard Worker  %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
95*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <32 x i8> %3 to <8 x i32>
96*9880d681SAndroid Build Coastguard Worker  %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
97*9880d681SAndroid Build Coastguard Worker  %6 = bitcast <8 x i32> %5 to <32 x i8>
98*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %6
99*9880d681SAndroid Build Coastguard Worker}
100*9880d681SAndroid Build Coastguard Worker
101*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
102*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
103*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker  %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
107*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %1
108*9880d681SAndroid Build Coastguard Worker}
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
111*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
112*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
113*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
114*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
115*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
116*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
117*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <4 x i64> %1 to <32 x i8>
118*9880d681SAndroid Build Coastguard Worker  %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
119*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <32 x i8> %3 to <8 x i32>
120*9880d681SAndroid Build Coastguard Worker  %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
121*9880d681SAndroid Build Coastguard Worker  %6 = bitcast <8 x i32> %5 to <32 x i8>
122*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %6
123*9880d681SAndroid Build Coastguard Worker}
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
126*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
127*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0
129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
131*9880d681SAndroid Build Coastguard Worker  %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
132*9880d681SAndroid Build Coastguard Worker  %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
133*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %2
134*9880d681SAndroid Build Coastguard Worker}
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
137*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
138*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
143*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
144*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
145*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
146*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
147*9880d681SAndroid Build Coastguard Worker}
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
150*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
151*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
152*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
153*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
154*9880d681SAndroid Build Coastguard Worker  %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
155*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %1
156*9880d681SAndroid Build Coastguard Worker}
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
159*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
160*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
162*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0
163*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
164*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
165*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
166*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
167*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
168*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %3
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
172*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
173*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
174*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
175*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
176*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <4 x float> %a to <16 x i8>
177*9880d681SAndroid Build Coastguard Worker  %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
178*9880d681SAndroid Build Coastguard Worker  %3 = bitcast <16 x i8> %2 to <4 x float>
179*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %3
180*9880d681SAndroid Build Coastguard Worker}
181*9880d681SAndroid Build Coastguard Worker
182*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
183*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
184*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
186*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
187*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
188*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
189*9880d681SAndroid Build Coastguard Worker  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
190*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
191*9880d681SAndroid Build Coastguard Worker}
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
194*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
195*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
196*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
197*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
198*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
199*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
200*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <4 x double> %1 to <8 x float>
201*9880d681SAndroid Build Coastguard Worker  %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
202*9880d681SAndroid Build Coastguard Worker  %4 = bitcast <8 x float> %3 to <4 x double>
203*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %4
204*9880d681SAndroid Build Coastguard Worker}
205*9880d681SAndroid Build Coastguard Worker
206*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
207*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_permq:
208*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
211*9880d681SAndroid Build Coastguard Worker  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
212*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %1
213*9880d681SAndroid Build Coastguard Worker}
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
216*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permps_as_permpd:
217*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
220*9880d681SAndroid Build Coastguard Worker  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
221*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
222*9880d681SAndroid Build Coastguard Worker}
223*9880d681SAndroid Build Coastguard Worker
224*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
225*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pslldq:
226*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
227*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
228*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
229*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
230*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res0
231*9880d681SAndroid Build Coastguard Worker}
232*9880d681SAndroid Build Coastguard Worker
233*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
234*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_psrldq:
235*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
236*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
237*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
238*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
239*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res0
240*9880d681SAndroid Build Coastguard Worker}
241*9880d681SAndroid Build Coastguard Worker
242*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
243*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pshuflw:
244*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
245*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
246*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
247*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
248*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res0
249*9880d681SAndroid Build Coastguard Worker}
250*9880d681SAndroid Build Coastguard Worker
251*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
252*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pshufhw:
253*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
256*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
257*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res0
258*9880d681SAndroid Build Coastguard Worker}
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
261*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_not_as_pshufw:
262*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0:
263*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
264*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
265*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
266*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
267*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res1
268*9880d681SAndroid Build Coastguard Worker}
269