xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-v1.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workertarget triple = "x86_64-unknown-unknown"
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
8*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf2i1_1_0:
9*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
10*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
11*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf2i1_1_0:
14*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
15*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
16*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
17*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
18*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
19*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
20*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
21*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
22*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
23*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 0>
24*9880d681SAndroid Build Coastguard Worker  ret <2 x i1> %b
25*9880d681SAndroid Build Coastguard Worker}
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Workerdefine <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
28*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf2i1_1_2:
29*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
30*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movl $1, %eax
31*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovq %rax, %xmm1
32*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
33*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker;
35*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf2i1_1_2:
36*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
37*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
38*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
39*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
40*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    movb $1, %al
41*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %eax, %k0
42*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm1
43*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
44*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
45*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %xmm0, %xmm0, %k0
46*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
47*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
48*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <2 x i1> %a, <2 x i1> <i1 1, i1 0>, <2 x i32> <i32 1, i32 2>
49*9880d681SAndroid Build Coastguard Worker  ret <2 x i1> %b
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Worker
53*9880d681SAndroid Build Coastguard Workerdefine <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
54*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf4i1_3_2_10:
55*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
56*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
57*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
58*9880d681SAndroid Build Coastguard Worker;
59*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
60*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
61*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpslld $31, %xmm0, %xmm0
62*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmd %xmm0, %xmm0, %k0
63*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2d %k0, %xmm0
64*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
65*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpslld $31, %xmm0, %xmm0
66*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmd %xmm0, %xmm0, %k0
67*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2d %k0, %xmm0
68*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
69*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
70*9880d681SAndroid Build Coastguard Worker  ret <4 x i1> %b
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %a1, <8 x i64> %b1) {
74*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
75*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
76*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
77*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
78*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
79*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
80*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermq %zmm1, %zmm2, %zmm1
81*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
82*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
83*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
84*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
85*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
86*9880d681SAndroid Build Coastguard Worker;
87*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
88*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
89*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
90*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
91*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
92*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
93*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
94*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
95*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
96*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
97*9880d681SAndroid Build Coastguard Worker  %a2 = icmp eq <8 x i64> %a, %a1
98*9880d681SAndroid Build Coastguard Worker  %b2 = icmp eq <8 x i64> %b, %b1
99*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i1> %a2, <8 x i1> %b2, <8 x i32> <i32 3, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
100*9880d681SAndroid Build Coastguard Worker  ret <8 x i1> %c
101*9880d681SAndroid Build Coastguard Worker}
102*9880d681SAndroid Build Coastguard Worker
103*9880d681SAndroid Build Coastguard Workerdefine <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <16 x i32> %b, <16 x i32> %a1, <16 x i32> %b1) {
104*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
105*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
106*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
107*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpcmpeqd %zmm3, %zmm1, %k2
108*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
109*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k2} {z}
110*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
111*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
112*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2d %zmm1, %zmm3, %zmm2
113*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpslld $31, %zmm2, %zmm1
114*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
115*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
116*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
117*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
118*9880d681SAndroid Build Coastguard Worker;
119*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
120*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
121*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
122*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
123*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2d %k1, %zmm0
124*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2d %k0, %zmm1
125*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
126*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2d %zmm0, %zmm2, %zmm1
127*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpslld $31, %zmm1, %zmm0
128*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
129*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2b %k0, %xmm0
130*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
131*9880d681SAndroid Build Coastguard Worker  %a2 = icmp eq <16 x i32> %a, %a1
132*9880d681SAndroid Build Coastguard Worker  %b2 = icmp eq <16 x i32> %b, %b1
133*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i1> %a2, <16 x i1> %b2, <16 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
134*9880d681SAndroid Build Coastguard Worker  ret <16 x i1> %c
135*9880d681SAndroid Build Coastguard Worker}
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Workerdefine <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) {
138*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
139*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
140*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
141*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
142*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
143*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
144*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
145*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
146*9880d681SAndroid Build Coastguard Worker;
147*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
148*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
149*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllw $7, %ymm0, %ymm0
150*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovb2m %ymm0, %k0
151*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2w %k0, %zmm0
152*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqu16 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
153*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermw %zmm0, %zmm1, %zmm0
154*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllw $15, %zmm0, %zmm0
155*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovw2m %zmm0, %k0
156*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2b %k0, %ymm0
157*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
158*9880d681SAndroid Build Coastguard Worker  %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
159*9880d681SAndroid Build Coastguard Worker  ret <32 x i1> %b
160*9880d681SAndroid Build Coastguard Worker}
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Workerdefine <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
163*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
164*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
165*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
166*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
167*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
168*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti32x4 $1, %zmm1, %xmm1
169*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpbroadcastq %xmm1, %zmm1
170*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
171*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
172*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
173*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
174*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
175*9880d681SAndroid Build Coastguard Worker;
176*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
177*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
178*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
179*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
180*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
181*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpbroadcastq %xmm0, %zmm0
182*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
183*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
184*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
185*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
186*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
187*9880d681SAndroid Build Coastguard Worker  %c = shufflevector < 8 x i1> %b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
188*9880d681SAndroid Build Coastguard Worker  ret <8 x i1> %c
189*9880d681SAndroid Build Coastguard Worker}
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
192*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
193*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
194*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
195*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
196*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
197*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
198*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
199*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
200*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
201*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
202*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
203*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
204*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
205*9880d681SAndroid Build Coastguard Worker;
206*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
207*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
208*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
209*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
210*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
211*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
212*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
213*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
214*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
215*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
216*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
217*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
218*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
219*9880d681SAndroid Build Coastguard Worker  %c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
220*9880d681SAndroid Build Coastguard Worker  %d = bitcast <8 x i1> %c to i8
221*9880d681SAndroid Build Coastguard Worker  ret i8 %d
222*9880d681SAndroid Build Coastguard Worker}
223*9880d681SAndroid Build Coastguard Worker
224*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
225*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
226*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
227*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
228*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
229*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
230*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
231*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
232*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
233*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
234*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
235*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
236*9880d681SAndroid Build Coastguard Worker;
237*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
238*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
239*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
240*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
241*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
242*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
243*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
244*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
245*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
246*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
247*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
248*9880d681SAndroid Build Coastguard Worker  %c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
249*9880d681SAndroid Build Coastguard Worker  %d = bitcast <8 x i1> %c to i8
250*9880d681SAndroid Build Coastguard Worker  ret i8 %d
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
254*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
255*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
256*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
257*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
258*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
259*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
260*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
261*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
262*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
263*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
264*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
265*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
266*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
267*9880d681SAndroid Build Coastguard Worker;
268*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
269*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
270*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
271*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
272*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
273*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
274*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
275*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
276*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
277*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
278*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
279*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
280*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
281*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
282*9880d681SAndroid Build Coastguard Worker  %d = bitcast <8 x i1>%c to i8
283*9880d681SAndroid Build Coastguard Worker  ret i8 %d
284*9880d681SAndroid Build Coastguard Worker}
285*9880d681SAndroid Build Coastguard Worker
286*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
287*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
288*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
289*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
290*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
291*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
292*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
293*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpxord %zmm2, %zmm2, %zmm2
294*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
295*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm0
296*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
297*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
298*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
299*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
300*9880d681SAndroid Build Coastguard Worker;
301*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
302*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
303*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
304*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
305*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
306*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpxord %zmm2, %zmm2, %zmm2
307*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
308*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm2, %zmm0
309*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
310*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
311*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
312*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
313*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
314*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0>
315*9880d681SAndroid Build Coastguard Worker  %d = bitcast <8 x i1>%c to i8
316*9880d681SAndroid Build Coastguard Worker  ret i8 %d
317*9880d681SAndroid Build Coastguard Worker}
318*9880d681SAndroid Build Coastguard Worker
319*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
320*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
321*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
322*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
323*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
324*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movb $51, %al
325*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %eax, %k2
326*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2} {z}
327*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
328*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
329*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
330*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm0
331*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
332*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
333*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
334*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
335*9880d681SAndroid Build Coastguard Worker;
336*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
337*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
338*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %edi, %k0
339*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    movb $51, %al
340*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %eax, %k1
341*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k1, %zmm0
342*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm1
343*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
344*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
345*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
346*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
347*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
348*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
349*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
350*9880d681SAndroid Build Coastguard Worker  %b = bitcast i8 %a to <8 x i1>
351*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1>
352*9880d681SAndroid Build Coastguard Worker  %c1 = bitcast <8 x i1>%c to i8
353*9880d681SAndroid Build Coastguard Worker  ret i8 %c1
354*9880d681SAndroid Build Coastguard Worker}
355*9880d681SAndroid Build Coastguard Worker
356*9880d681SAndroid Build Coastguard Workerdefine i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
357*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
358*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
359*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
360*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
361*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
362*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
363*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
364*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
365*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
366*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
367*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
368*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
369*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
370*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
371*9880d681SAndroid Build Coastguard Worker;
372*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
373*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
374*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllw $15, %xmm0, %xmm0
375*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovw2m %xmm0, %k0
376*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
377*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
378*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
379*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
380*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllq $63, %zmm2, %zmm0
381*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
382*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovb %k0, %eax
383*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
384*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
385*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
386*9880d681SAndroid Build Coastguard Worker  %c1 = bitcast <8 x i1>%c to i8
387*9880d681SAndroid Build Coastguard Worker  ret i8 %c1
388*9880d681SAndroid Build Coastguard Worker}
389*9880d681SAndroid Build Coastguard Worker
390*9880d681SAndroid Build Coastguard Worker
391*9880d681SAndroid Build Coastguard Workerdefine i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
392*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
393*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
394*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %edi, %k1
395*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
396*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
397*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpbroadcastd %xmm0, %zmm0
398*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
399*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
400*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, %eax
401*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
402*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
403*9880d681SAndroid Build Coastguard Worker;
404*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
405*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
406*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovw %edi, %k0
407*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2d %k0, %zmm0
408*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpbroadcastd %xmm0, %zmm0
409*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpslld $31, %zmm0, %zmm0
410*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
411*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovw %k0, %eax
412*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
413*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
414*9880d681SAndroid Build Coastguard Worker  %b = bitcast i16 %a to <16 x i1>
415*9880d681SAndroid Build Coastguard Worker  %c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer
416*9880d681SAndroid Build Coastguard Worker  %d = bitcast <16 x i1> %c to i16
417*9880d681SAndroid Build Coastguard Worker  ret i16 %d
418*9880d681SAndroid Build Coastguard Worker}
419*9880d681SAndroid Build Coastguard Worker
420*9880d681SAndroid Build Coastguard Workerdefine i64 @shuf64i1_zero(i64 %a) {
421*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuf64i1_zero:
422*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
423*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    pushq %rbp
424*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:  .Ltmp0:
425*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    .cfi_def_cfa_offset 16
426*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:  .Ltmp1:
427*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    .cfi_offset %rbp, -16
428*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movq %rsp, %rbp
429*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:  .Ltmp2:
430*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    .cfi_def_cfa_register %rbp
431*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    andq $-32, %rsp
432*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    subq $96, %rsp
433*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
434*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
435*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
436*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
437*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
438*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
439*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
440*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
441*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
442*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
443*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
444*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
445*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
446*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
447*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    kmovw %k0, (%rsp)
448*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movl (%rsp), %ecx
449*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movq %rcx, %rax
450*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    shlq $32, %rax
451*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    orq %rcx, %rax
452*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    movq %rbp, %rsp
453*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    popq %rbp
454*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
455*9880d681SAndroid Build Coastguard Worker;
456*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-LABEL: shuf64i1_zero:
457*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ:       # BB#0:
458*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovq %rdi, %k0
459*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovm2b %k0, %zmm0
460*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpbroadcastb %xmm0, %zmm0
461*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpsllw $7, %zmm0, %zmm0
462*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    vpmovb2m %zmm0, %k0
463*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    kmovq %k0, %rax
464*9880d681SAndroid Build Coastguard Worker; VL_BW_DQ-NEXT:    retq
465*9880d681SAndroid Build Coastguard Worker  %b = bitcast i64 %a to <64 x i1>
466*9880d681SAndroid Build Coastguard Worker  %c = shufflevector < 64 x i1> %b, <64 x i1> undef, <64 x i32> zeroinitializer
467*9880d681SAndroid Build Coastguard Worker  %d = bitcast <64 x i1> %c to i64
468*9880d681SAndroid Build Coastguard Worker  ret i64 %d
469*9880d681SAndroid Build Coastguard Worker}
470