xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
8*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512:
9*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
10*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdx, %k1
11*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rdi) {%k1}
12*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rsi)
13*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
14*9880d681SAndroid Build Coastguard Worker;
15*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512:
16*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
17*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
18*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
19*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
20*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%ecx) {%k1}
21*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%eax)
22*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
23*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
24*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
25*9880d681SAndroid Build Coastguard Worker  ret void
26*9880d681SAndroid Build Coastguard Worker}
27*9880d681SAndroid Build Coastguard Worker
28*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Workerdefine void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
31*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512:
32*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
33*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edx, %k1
34*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rdi) {%k1}
35*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rsi)
36*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
37*9880d681SAndroid Build Coastguard Worker;
38*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512:
39*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
40*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
42*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
43*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%ecx) {%k1}
44*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%eax)
45*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
46*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
47*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
48*9880d681SAndroid Build Coastguard Worker  ret void
49*9880d681SAndroid Build Coastguard Worker}
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
52*9880d681SAndroid Build Coastguard Worker
53*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
54*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_w_512:
55*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
56*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
57*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edx, %k1
58*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rsi), %zmm0 {%k1}
59*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1} {z}
60*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
61*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
62*9880d681SAndroid Build Coastguard Worker;
63*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_loadu_w_512:
64*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
65*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
66*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu16 (%ecx), %zmm0
68*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
69*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu16 (%eax), %zmm0 {%k1}
70*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu16 (%ecx), %zmm1 {%k1} {z}
71*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
72*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
73*9880d681SAndroid Build Coastguard Worker  %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1)
74*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask)
75*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask)
76*9880d681SAndroid Build Coastguard Worker  %res2 = add <32 x i16> %res, %res1
77*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res2
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
83*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_b_512:
84*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
85*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
86*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdx, %k1
87*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0 {%k1}
88*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1} {z}
89*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
90*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
91*9880d681SAndroid Build Coastguard Worker;
92*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_loadu_b_512:
93*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
94*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
95*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
96*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu8 (%ecx), %zmm0
97*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
98*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu8 (%eax), %zmm0 {%k1}
99*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vmovdqu8 (%ecx), %zmm1 {%k1} {z}
100*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
101*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
102*9880d681SAndroid Build Coastguard Worker  %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1)
103*9880d681SAndroid Build Coastguard Worker  %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask)
104*9880d681SAndroid Build Coastguard Worker  %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask)
105*9880d681SAndroid Build Coastguard Worker  %res2 = add <64 x i8> %res, %res1
106*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res2
107*9880d681SAndroid Build Coastguard Worker}
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Workerdeclare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
110*9880d681SAndroid Build Coastguard Worker
111*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) {
112*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psll_dq_512:
113*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
114*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpslldq {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
115*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
116*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
117*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
118*9880d681SAndroid Build Coastguard Worker;
119*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psll_dq_512:
120*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
121*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpslldq {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
122*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
123*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
124*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
125*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
126*9880d681SAndroid Build Coastguard Worker  %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
127*9880d681SAndroid Build Coastguard Worker  %res2 = add <8 x i64> %res, %res1
128*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res2
129*9880d681SAndroid Build Coastguard Worker}
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) {
132*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psll_load_dq_512:
133*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
136*9880d681SAndroid Build Coastguard Worker;
137*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psll_load_dq_512:
138*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
139*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
140*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
141*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
142*9880d681SAndroid Build Coastguard Worker  %x0 = load <8 x i64>, <8 x i64> *%p0
143*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
144*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Workerdeclare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) {
150*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psrl_dq_512:
151*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
152*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
153*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
154*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
155*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
156*9880d681SAndroid Build Coastguard Worker;
157*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psrl_dq_512:
158*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
159*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
160*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
161*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
162*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
163*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
164*9880d681SAndroid Build Coastguard Worker  %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
165*9880d681SAndroid Build Coastguard Worker  %res2 = add <8 x i64> %res, %res1
166*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res2
167*9880d681SAndroid Build Coastguard Worker}
168*9880d681SAndroid Build Coastguard Worker
169*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) {
170*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psrl_load_dq_512:
171*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
172*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrldq {{.*#+}} zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
173*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
174*9880d681SAndroid Build Coastguard Worker;
175*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psrl_load_dq_512:
176*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
177*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
178*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpsrldq {{.*#+}} zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
179*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
180*9880d681SAndroid Build Coastguard Worker  %x0 = load <8 x i64>, <8 x i64> *%p0
181*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
182*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res
183*9880d681SAndroid Build Coastguard Worker}
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
186*9880d681SAndroid Build Coastguard Worker
187*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
188*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
189*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
190*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
191*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdi, %k1
192*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
193*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
194*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
195*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
196*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
197*9880d681SAndroid Build Coastguard Worker;
198*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512:
199*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
200*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpalignr {{.*#+}} zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
201*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
202*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpalignr {{.*#+}} zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
203*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpalignr {{.*#+}} zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
204*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
205*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
206*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
207*9880d681SAndroid Build Coastguard Worker  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
208*9880d681SAndroid Build Coastguard Worker  %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
209*9880d681SAndroid Build Coastguard Worker  %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
210*9880d681SAndroid Build Coastguard Worker  %res3 = add <64 x i8> %res, %res1
211*9880d681SAndroid Build Coastguard Worker  %res4 = add <64 x i8> %res3, %res2
212*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res4
213*9880d681SAndroid Build Coastguard Worker}
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32)
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
218*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
219*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
220*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshufhw {{.*#+}} zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
221*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %esi, %k1
222*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
223*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
226*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker;
228*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
229*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
230*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshufhw {{.*#+}} zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
231*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
232*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
233*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
234*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
235*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
236*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
237*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
238*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
239*9880d681SAndroid Build Coastguard Worker  %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
240*9880d681SAndroid Build Coastguard Worker  %res3 = add <32 x i16> %res, %res1
241*9880d681SAndroid Build Coastguard Worker  %res4 = add <32 x i16> %res3, %res2
242*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res4
243*9880d681SAndroid Build Coastguard Worker}
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32)
246*9880d681SAndroid Build Coastguard Worker
247*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
248*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
249*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
250*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshuflw {{.*#+}} zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
251*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %esi, %k1
252*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
253*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
254*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
255*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
256*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
257*9880d681SAndroid Build Coastguard Worker;
258*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
259*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
260*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshuflw {{.*#+}} zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
261*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
262*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
263*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
264*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
265*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
266*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
267*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
268*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
269*9880d681SAndroid Build Coastguard Worker  %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
270*9880d681SAndroid Build Coastguard Worker  %res3 = add <32 x i16> %res, %res1
271*9880d681SAndroid Build Coastguard Worker  %res4 = add <32 x i16> %res3, %res2
272*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res4
273*9880d681SAndroid Build Coastguard Worker}
274*9880d681SAndroid Build Coastguard Worker
275*9880d681SAndroid Build Coastguard Workerdefine i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
276*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpeq_b:
277*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
278*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
279*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %k0, %rax
280*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
281*9880d681SAndroid Build Coastguard Worker;
282*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpeq_b:
283*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
284*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    subl $12, %esp
285*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:  .Ltmp0:
286*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
287*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
288*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq %k0, (%esp)
289*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl (%esp), %eax
290*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
291*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    addl $12, %esp
292*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
293*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
294*9880d681SAndroid Build Coastguard Worker  ret i64 %res
295*9880d681SAndroid Build Coastguard Worker}
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Workerdefine i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
298*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpeq_b:
299*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
300*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdi, %k1
301*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
302*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %k0, %rax
303*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
304*9880d681SAndroid Build Coastguard Worker;
305*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpeq_b:
306*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
307*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    subl $12, %esp
308*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:  .Ltmp1:
309*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
310*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
311*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
312*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq %k0, (%esp)
313*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl (%esp), %eax
314*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
315*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    addl $12, %esp
316*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
317*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
318*9880d681SAndroid Build Coastguard Worker  ret i64 %res
319*9880d681SAndroid Build Coastguard Worker}
320*9880d681SAndroid Build Coastguard Worker
321*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
322*9880d681SAndroid Build Coastguard Worker
323*9880d681SAndroid Build Coastguard Workerdefine i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
324*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpeq_w:
325*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
326*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
327*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %k0, %eax
328*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
329*9880d681SAndroid Build Coastguard Worker;
330*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpeq_w:
331*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
332*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
333*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd %k0, %eax
334*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
335*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
336*9880d681SAndroid Build Coastguard Worker  ret i32 %res
337*9880d681SAndroid Build Coastguard Worker}
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Workerdefine i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
340*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpeq_w:
341*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
342*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edi, %k1
343*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
344*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %k0, %eax
345*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
346*9880d681SAndroid Build Coastguard Worker;
347*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpeq_w:
348*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
349*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
350*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
351*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd %k0, %eax
352*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
353*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
354*9880d681SAndroid Build Coastguard Worker  ret i32 %res
355*9880d681SAndroid Build Coastguard Worker}
356*9880d681SAndroid Build Coastguard Worker
357*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
358*9880d681SAndroid Build Coastguard Worker
359*9880d681SAndroid Build Coastguard Workerdefine i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
360*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpgt_b:
361*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
362*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
363*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %k0, %rax
364*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
365*9880d681SAndroid Build Coastguard Worker;
366*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpgt_b:
367*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
368*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    subl $12, %esp
369*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:  .Ltmp2:
370*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
371*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
372*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq %k0, (%esp)
373*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl (%esp), %eax
374*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
375*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    addl $12, %esp
376*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
377*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
378*9880d681SAndroid Build Coastguard Worker  ret i64 %res
379*9880d681SAndroid Build Coastguard Worker}
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Workerdefine i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
382*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpgt_b:
383*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
384*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdi, %k1
385*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
386*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %k0, %rax
387*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
388*9880d681SAndroid Build Coastguard Worker;
389*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpgt_b:
390*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
391*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    subl $12, %esp
392*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:  .Ltmp3:
393*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
394*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
395*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
396*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq %k0, (%esp)
397*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl (%esp), %eax
398*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
399*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    addl $12, %esp
400*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
401*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
402*9880d681SAndroid Build Coastguard Worker  ret i64 %res
403*9880d681SAndroid Build Coastguard Worker}
404*9880d681SAndroid Build Coastguard Worker
405*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Workerdefine i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
408*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpgt_w:
409*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
410*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
411*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %k0, %eax
412*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
413*9880d681SAndroid Build Coastguard Worker;
414*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpgt_w:
415*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
416*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
417*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd %k0, %eax
418*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
419*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
420*9880d681SAndroid Build Coastguard Worker  ret i32 %res
421*9880d681SAndroid Build Coastguard Worker}
422*9880d681SAndroid Build Coastguard Worker
423*9880d681SAndroid Build Coastguard Workerdefine i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
424*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpgt_w:
425*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
426*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edi, %k1
427*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
428*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %k0, %eax
429*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
430*9880d681SAndroid Build Coastguard Worker;
431*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpgt_w:
432*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
433*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
434*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
435*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd %k0, %eax
436*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
437*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
438*9880d681SAndroid Build Coastguard Worker  ret i32 %res
439*9880d681SAndroid Build Coastguard Worker}
440*9880d681SAndroid Build Coastguard Worker
441*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
444*9880d681SAndroid Build Coastguard Worker
445*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
446*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
447*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
448*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
449*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdi, %k1
450*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
451*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb %zmm3, %zmm2, %zmm0
452*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
453*9880d681SAndroid Build Coastguard Worker;
454*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
455*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
456*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
457*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
458*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
459*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddb %zmm3, %zmm2, %zmm0
460*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
461*9880d681SAndroid Build Coastguard Worker  %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
462*9880d681SAndroid Build Coastguard Worker  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
463*9880d681SAndroid Build Coastguard Worker  %res2 = add <64 x i8> %res, %res1
464*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res2
465*9880d681SAndroid Build Coastguard Worker}
466*9880d681SAndroid Build Coastguard Worker
467*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
468*9880d681SAndroid Build Coastguard Worker
469*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
470*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
471*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
472*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
473*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovq %rdi, %k1
474*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
475*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb %zmm3, %zmm2, %zmm0
476*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
477*9880d681SAndroid Build Coastguard Worker;
478*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
479*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
480*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
481*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
482*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
483*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddb %zmm3, %zmm2, %zmm0
484*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
485*9880d681SAndroid Build Coastguard Worker  %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
486*9880d681SAndroid Build Coastguard Worker  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
487*9880d681SAndroid Build Coastguard Worker  %res2 = add <64 x i8> %res, %res1
488*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res2
489*9880d681SAndroid Build Coastguard Worker}
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
492*9880d681SAndroid Build Coastguard Worker
493*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
494*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
495*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
496*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
497*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edi, %k1
498*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
499*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm0
500*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
501*9880d681SAndroid Build Coastguard Worker;
502*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
503*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
504*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
505*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
506*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
507*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm0
508*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
509*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
510*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
511*9880d681SAndroid Build Coastguard Worker  %res2 = add <32 x i16> %res, %res1
512*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res2
513*9880d681SAndroid Build Coastguard Worker}
514*9880d681SAndroid Build Coastguard Worker
515*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
518*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
519*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
520*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
521*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    kmovd %edi, %k1
522*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
523*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm0
524*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
525*9880d681SAndroid Build Coastguard Worker;
526*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
527*9880d681SAndroid Build Coastguard Worker; AVX512F-32:       # BB#0:
528*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
529*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
530*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
531*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm0
532*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT:    retl
533*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
534*9880d681SAndroid Build Coastguard Worker  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
535*9880d681SAndroid Build Coastguard Worker  %res2 = add <32 x i16> %res, %res1
536*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res2
537*9880d681SAndroid Build Coastguard Worker}
538*9880d681SAndroid Build Coastguard Worker
539