xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workertarget triple = "x86_64-unknown-unknown"
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) {
8*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
9*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
10*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
11*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
12*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
13*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) {
17*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
18*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
20*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
21*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
22*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
23*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
24*9880d681SAndroid Build Coastguard Worker}
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) {
27*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
28*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
29*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
30*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
31*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
32*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) {
36*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz:
37*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
38*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
39*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
40*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
41*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16>
42*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
43*9880d681SAndroid Build Coastguard Worker}
44*9880d681SAndroid Build Coastguard Worker
45*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) {
46*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_vunpcklps_swap:
47*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
48*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13]
49*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13>
51*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
52*9880d681SAndroid Build Coastguard Worker}
53*9880d681SAndroid Build Coastguard Worker
54*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
55*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
56*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
57*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
58*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
59*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
60*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
61*9880d681SAndroid Build Coastguard Worker}
62*9880d681SAndroid Build Coastguard Worker
63*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
64*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
65*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
66*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
67*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
68*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
69*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29>
70*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) {
74*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
75*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
76*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
77*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
78*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
79*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) {
83*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f:
84*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
85*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
86*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
87*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
88*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31>
89*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) {
93*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
94*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
96*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
97*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
98*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
99*9880d681SAndroid Build Coastguard Worker}
100*9880d681SAndroid Build Coastguard Worker
101*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) {
102*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
103*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
104*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
105*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
107*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
108*9880d681SAndroid Build Coastguard Worker}
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13(<16 x float> %a, <16 x float> %b) {
111*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13:
112*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
113*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,3,4,5,6,6]
114*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
115*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13>
116*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
117*9880d681SAndroid Build Coastguard Worker}
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12(<16 x float> %a, <16 x float> %b) {
120*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12:
121*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
122*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4,8,8,10,8,12,12,14,12]
123*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
124*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4, i32 8, i32 8, i32 10, i32 8, i32 12, i32 12, i32 14, i32 12>
125*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
126*9880d681SAndroid Build Coastguard Worker}
127*9880d681SAndroid Build Coastguard Worker
128*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12(<16 x float> %a, <16 x float> %b) {
129*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12:
130*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
131*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[3,0,3,0,7,4,7,4,11,8,11,8,15,12,15,12]
132*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
133*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12>
134*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
135*9880d681SAndroid Build Coastguard Worker}
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) {
138*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
139*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
140*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
141*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
142*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
143*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
144*9880d681SAndroid Build Coastguard Worker}
145*9880d681SAndroid Build Coastguard Worker
146*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) {
147*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04:
148*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
149*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
150*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
151*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
152*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
153*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
154*9880d681SAndroid Build Coastguard Worker}
155*9880d681SAndroid Build Coastguard Worker
156*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
157*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
158*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
159*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
160*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
162*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
163*9880d681SAndroid Build Coastguard Worker}
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
166*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
167*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
168*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
169*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
170*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
171*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16>
172*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
173*9880d681SAndroid Build Coastguard Worker}
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28(<16 x i32> %a, <16 x i32> %b) {
176*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28:
177*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
178*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28]
179*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
180*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
181*9880d681SAndroid Build Coastguard Worker;
182*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28:
183*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
184*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1,2,3],zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zmm1[16,17,18,19],zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zmm1[32,33,34,35],zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zmm1[48,49,50,51]
185*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
186*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 20, i32 9, i32 10, i32 11, i32 24, i32 13, i32 14, i32 15, i32 28>
187*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shuffle
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a)  {
191*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
192*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
193*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
194*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermps %zmm0, %zmm1, %zmm0
195*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
196*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
197*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %c
198*9880d681SAndroid Build Coastguard Worker}
199*9880d681SAndroid Build Coastguard Worker
200*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a)  {
201*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
202*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
203*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
204*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermd %zmm0, %zmm1, %zmm0
205*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
206*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
207*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
208*9880d681SAndroid Build Coastguard Worker}
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b)  {
211*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
212*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
213*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
214*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
215*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
216*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
217*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
218*9880d681SAndroid Build Coastguard Worker}
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b)  {
221*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
222*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
223*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
224*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
225*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
226*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
227*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %c
228*9880d681SAndroid Build Coastguard Worker}
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b)  {
231*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
232*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
233*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
234*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2ps (%rdi), %zmm1, %zmm0
235*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
236*9880d681SAndroid Build Coastguard Worker  %c = load <16 x float>, <16 x float>* %b
237*9880d681SAndroid Build Coastguard Worker  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
238*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %d
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b)  {
242*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
243*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
244*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
245*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2d (%rdi), %zmm1, %zmm0
246*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
247*9880d681SAndroid Build Coastguard Worker  %c = load <16 x i32>, <16 x i32>* %b
248*9880d681SAndroid Build Coastguard Worker  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
249*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %d
250*9880d681SAndroid Build Coastguard Worker}
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b)  {
253*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
254*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
255*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,1,2,19,u,u,u,u,u,u,u,u,u,u,u,u>
256*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
257*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
258*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
259*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
260*9880d681SAndroid Build Coastguard Worker}
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
263*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_extract_256:
264*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
265*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovups (%rsi), %zmm0
266*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
267*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
268*9880d681SAndroid Build Coastguard Worker  %ptr_a = bitcast float* %a to <16 x float>*
269*9880d681SAndroid Build Coastguard Worker  %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
270*9880d681SAndroid Build Coastguard Worker  %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32>  <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
271*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %v2
272*9880d681SAndroid Build Coastguard Worker}
273*9880d681SAndroid Build Coastguard Worker
274*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b)  {
275*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12:
276*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
277*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
278*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
279*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
280*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
281*9880d681SAndroid Build Coastguard Worker}
282*9880d681SAndroid Build Coastguard Worker
283*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b)  {
284*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12:
285*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
286*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
287*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
288*9880d681SAndroid Build Coastguard Worker  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
289*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %c
290*9880d681SAndroid Build Coastguard Worker}
291*9880d681SAndroid Build Coastguard Worker
292*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c(<16 x float> %a, <16 x float> %b) {
293*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c:
294*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
295*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[0,1],zmm1[0,0],zmm0[4,5],zmm1[4,4],zmm0[8,9],zmm1[8,8],zmm0[12,13],zmm1[12,12]
296*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
297*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
298*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %shuffle
299*9880d681SAndroid Build Coastguard Worker}
300