1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Workerdefine void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { 8*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512: 9*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 10*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdx, %k1 11*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} 12*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rsi) 13*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 14*9880d681SAndroid Build Coastguard Worker; 15*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512: 16*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 17*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 18*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 19*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 20*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} 21*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%eax) 22*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 23*9880d681SAndroid Build Coastguard Worker call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) 24*9880d681SAndroid Build Coastguard Worker call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) 25*9880d681SAndroid Build Coastguard Worker ret void 26*9880d681SAndroid Build Coastguard Worker} 27*9880d681SAndroid Build Coastguard Worker 28*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) 29*9880d681SAndroid Build Coastguard Worker 30*9880d681SAndroid Build Coastguard Workerdefine void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { 31*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512: 32*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 33*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edx, %k1 34*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} 35*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rsi) 36*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 37*9880d681SAndroid Build Coastguard Worker; 38*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512: 39*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 40*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 41*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 42*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 43*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} 44*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%eax) 45*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 46*9880d681SAndroid Build Coastguard Worker call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) 47*9880d681SAndroid Build Coastguard Worker call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) 48*9880d681SAndroid Build Coastguard Worker ret void 49*9880d681SAndroid Build Coastguard Worker} 50*9880d681SAndroid Build Coastguard Worker 51*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32) 52*9880d681SAndroid Build Coastguard Worker 53*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) { 54*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_w_512: 55*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 56*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0 57*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edx, %k1 58*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} 59*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} 60*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 61*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 62*9880d681SAndroid Build Coastguard Worker; 63*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_loadu_w_512: 64*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 65*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 66*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 67*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm0 68*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 69*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} 70*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} 71*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm1, %zmm0, %zmm0 72*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 73*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) 74*9880d681SAndroid Build Coastguard Worker %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) 75*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) 76*9880d681SAndroid Build Coastguard Worker %res2 = add <32 x i16> %res, %res1 77*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %res2 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) 81*9880d681SAndroid Build Coastguard Worker 82*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) { 83*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_b_512: 84*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 85*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0 86*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdx, %k1 87*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} 88*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} 89*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 90*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 91*9880d681SAndroid Build Coastguard Worker; 92*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_loadu_b_512: 93*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 94*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 95*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 96*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm0 97*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 98*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} 99*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} 100*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddb %zmm1, %zmm0, %zmm0 101*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 102*9880d681SAndroid Build Coastguard Worker %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) 103*9880d681SAndroid Build Coastguard Worker %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) 104*9880d681SAndroid Build Coastguard Worker %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) 105*9880d681SAndroid Build Coastguard Worker %res2 = add <64 x i8> %res, %res1 106*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %res2 107*9880d681SAndroid Build Coastguard Worker} 108*9880d681SAndroid Build Coastguard Worker 109*9880d681SAndroid Build Coastguard Workerdeclare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) 110*9880d681SAndroid Build Coastguard Worker 111*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) { 112*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psll_dq_512: 113*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 114*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpslldq {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] 115*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] 116*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 117*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 118*9880d681SAndroid Build Coastguard Worker; 119*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psll_dq_512: 120*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 121*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpslldq {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] 122*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] 123*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 124*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 125*9880d681SAndroid Build Coastguard Worker %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) 126*9880d681SAndroid Build Coastguard Worker %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 127*9880d681SAndroid Build Coastguard Worker %res2 = add <8 x i64> %res, %res1 128*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %res2 129*9880d681SAndroid Build Coastguard Worker} 130*9880d681SAndroid Build Coastguard Worker 131*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) { 132*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psll_load_dq_512: 133*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 136*9880d681SAndroid Build Coastguard Worker; 137*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psll_load_dq_512: 138*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 139*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 140*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 141*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 142*9880d681SAndroid Build Coastguard Worker %x0 = load <8 x i64>, <8 x i64> *%p0 143*9880d681SAndroid Build Coastguard Worker %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 144*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %res 145*9880d681SAndroid Build Coastguard Worker} 146*9880d681SAndroid Build Coastguard Worker 147*9880d681SAndroid Build Coastguard Workerdeclare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) { 150*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psrl_dq_512: 151*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 152*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero 153*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 154*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 155*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 156*9880d681SAndroid Build Coastguard Worker; 157*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psrl_dq_512: 158*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 159*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero 160*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 161*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 162*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 163*9880d681SAndroid Build Coastguard Worker %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) 164*9880d681SAndroid Build Coastguard Worker %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 165*9880d681SAndroid Build Coastguard Worker %res2 = add <8 x i64> %res, %res1 166*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %res2 167*9880d681SAndroid Build Coastguard Worker} 168*9880d681SAndroid Build Coastguard Worker 169*9880d681SAndroid Build Coastguard Workerdefine <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) { 170*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_psrl_load_dq_512: 171*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 172*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 173*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 174*9880d681SAndroid Build Coastguard Worker; 175*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_psrl_load_dq_512: 176*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 177*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 178*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpsrldq {{.*#+}} zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 179*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 180*9880d681SAndroid Build Coastguard Worker %x0 = load <8 x i64>, <8 x i64> *%p0 181*9880d681SAndroid Build Coastguard Worker %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 182*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %res 183*9880d681SAndroid Build Coastguard Worker} 184*9880d681SAndroid Build Coastguard Worker 185*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) 186*9880d681SAndroid Build Coastguard Worker 187*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { 188*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512: 189*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 190*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpalignr {{.*#+}} zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 191*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdi, %k1 192*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpalignr {{.*#+}} zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 193*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 194*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 195*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 196*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 197*9880d681SAndroid Build Coastguard Worker; 198*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512: 199*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 200*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpalignr {{.*#+}} zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 201*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 202*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpalignr {{.*#+}} zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 203*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpalignr {{.*#+}} zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 204*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 205*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0 206*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 207*9880d681SAndroid Build Coastguard Worker %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) 208*9880d681SAndroid Build Coastguard Worker %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) 209*9880d681SAndroid Build Coastguard Worker %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) 210*9880d681SAndroid Build Coastguard Worker %res3 = add <64 x i8> %res, %res1 211*9880d681SAndroid Build Coastguard Worker %res4 = add <64 x i8> %res3, %res2 212*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %res4 213*9880d681SAndroid Build Coastguard Worker} 214*9880d681SAndroid Build Coastguard Worker 215*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32) 216*9880d681SAndroid Build Coastguard Worker 217*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 218*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 219*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 220*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshufhw {{.*#+}} zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 221*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %esi, %k1 222*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 223*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 226*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 227*9880d681SAndroid Build Coastguard Worker; 228*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 229*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 230*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshufhw {{.*#+}} zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 231*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 232*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 233*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 234*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 235*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0 236*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 237*9880d681SAndroid Build Coastguard Worker %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 238*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 239*9880d681SAndroid Build Coastguard Worker %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 240*9880d681SAndroid Build Coastguard Worker %res3 = add <32 x i16> %res, %res1 241*9880d681SAndroid Build Coastguard Worker %res4 = add <32 x i16> %res3, %res2 242*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %res4 243*9880d681SAndroid Build Coastguard Worker} 244*9880d681SAndroid Build Coastguard Worker 245*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32) 246*9880d681SAndroid Build Coastguard Worker 247*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 248*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 249*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 250*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshuflw {{.*#+}} zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 251*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %esi, %k1 252*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 253*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 254*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 255*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 256*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 257*9880d681SAndroid Build Coastguard Worker; 258*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 259*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 260*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshuflw {{.*#+}} zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 261*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 262*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 263*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 264*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 265*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0 266*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 267*9880d681SAndroid Build Coastguard Worker %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 268*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 269*9880d681SAndroid Build Coastguard Worker %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 270*9880d681SAndroid Build Coastguard Worker %res3 = add <32 x i16> %res, %res1 271*9880d681SAndroid Build Coastguard Worker %res4 = add <32 x i16> %res3, %res2 272*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %res4 273*9880d681SAndroid Build Coastguard Worker} 274*9880d681SAndroid Build Coastguard Worker 275*9880d681SAndroid Build Coastguard Workerdefine i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 276*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpeq_b: 277*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 278*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 279*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %k0, %rax 280*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 281*9880d681SAndroid Build Coastguard Worker; 282*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpeq_b: 283*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 284*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: subl $12, %esp 285*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .Ltmp0: 286*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 287*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 288*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq %k0, (%esp) 289*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl (%esp), %eax 290*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 291*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: addl $12, %esp 292*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 293*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 294*9880d681SAndroid Build Coastguard Worker ret i64 %res 295*9880d681SAndroid Build Coastguard Worker} 296*9880d681SAndroid Build Coastguard Worker 297*9880d681SAndroid Build Coastguard Workerdefine i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 298*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpeq_b: 299*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 300*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdi, %k1 301*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 302*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %k0, %rax 303*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 304*9880d681SAndroid Build Coastguard Worker; 305*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpeq_b: 306*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 307*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: subl $12, %esp 308*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .Ltmp1: 309*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 310*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 311*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 312*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq %k0, (%esp) 313*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl (%esp), %eax 314*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 315*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: addl $12, %esp 316*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 317*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 318*9880d681SAndroid Build Coastguard Worker ret i64 %res 319*9880d681SAndroid Build Coastguard Worker} 320*9880d681SAndroid Build Coastguard Worker 321*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 322*9880d681SAndroid Build Coastguard Worker 323*9880d681SAndroid Build Coastguard Workerdefine i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 324*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpeq_w: 325*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 326*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 327*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %k0, %eax 328*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 329*9880d681SAndroid Build Coastguard Worker; 330*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpeq_w: 331*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 332*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 333*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd %k0, %eax 334*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 335*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 336*9880d681SAndroid Build Coastguard Worker ret i32 %res 337*9880d681SAndroid Build Coastguard Worker} 338*9880d681SAndroid Build Coastguard Worker 339*9880d681SAndroid Build Coastguard Workerdefine i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 340*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpeq_w: 341*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 342*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edi, %k1 343*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 344*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %k0, %eax 345*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 346*9880d681SAndroid Build Coastguard Worker; 347*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpeq_w: 348*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 349*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 350*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 351*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd %k0, %eax 352*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 353*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 354*9880d681SAndroid Build Coastguard Worker ret i32 %res 355*9880d681SAndroid Build Coastguard Worker} 356*9880d681SAndroid Build Coastguard Worker 357*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 358*9880d681SAndroid Build Coastguard Worker 359*9880d681SAndroid Build Coastguard Workerdefine i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 360*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpgt_b: 361*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 362*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 363*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %k0, %rax 364*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 365*9880d681SAndroid Build Coastguard Worker; 366*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpgt_b: 367*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 368*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: subl $12, %esp 369*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .Ltmp2: 370*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 371*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 372*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq %k0, (%esp) 373*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl (%esp), %eax 374*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 375*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: addl $12, %esp 376*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 377*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 378*9880d681SAndroid Build Coastguard Worker ret i64 %res 379*9880d681SAndroid Build Coastguard Worker} 380*9880d681SAndroid Build Coastguard Worker 381*9880d681SAndroid Build Coastguard Workerdefine i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 382*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpgt_b: 383*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 384*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdi, %k1 385*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 386*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %k0, %rax 387*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 388*9880d681SAndroid Build Coastguard Worker; 389*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpgt_b: 390*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 391*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: subl $12, %esp 392*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .Ltmp3: 393*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 394*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 395*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 396*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq %k0, (%esp) 397*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl (%esp), %eax 398*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 399*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: addl $12, %esp 400*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 401*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 402*9880d681SAndroid Build Coastguard Worker ret i64 %res 403*9880d681SAndroid Build Coastguard Worker} 404*9880d681SAndroid Build Coastguard Worker 405*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 406*9880d681SAndroid Build Coastguard Worker 407*9880d681SAndroid Build Coastguard Workerdefine i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 408*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_pcmpgt_w: 409*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 410*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 411*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %k0, %eax 412*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 413*9880d681SAndroid Build Coastguard Worker; 414*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_pcmpgt_w: 415*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 416*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 417*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd %k0, %eax 418*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 419*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 420*9880d681SAndroid Build Coastguard Worker ret i32 %res 421*9880d681SAndroid Build Coastguard Worker} 422*9880d681SAndroid Build Coastguard Worker 423*9880d681SAndroid Build Coastguard Workerdefine i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 424*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_mask_pcmpgt_w: 425*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 426*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edi, %k1 427*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 428*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %k0, %eax 429*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 430*9880d681SAndroid Build Coastguard Worker; 431*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_mask_pcmpgt_w: 432*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 433*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 434*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 435*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd %k0, %eax 436*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 437*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 438*9880d681SAndroid Build Coastguard Worker ret i32 %res 439*9880d681SAndroid Build Coastguard Worker} 440*9880d681SAndroid Build Coastguard Worker 441*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 442*9880d681SAndroid Build Coastguard Worker 443*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 444*9880d681SAndroid Build Coastguard Worker 445*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 446*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 447*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 448*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 449*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdi, %k1 450*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 451*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0 452*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 453*9880d681SAndroid Build Coastguard Worker; 454*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 455*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 456*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 457*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 458*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 459*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0 460*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 461*9880d681SAndroid Build Coastguard Worker %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 462*9880d681SAndroid Build Coastguard Worker %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 463*9880d681SAndroid Build Coastguard Worker %res2 = add <64 x i8> %res, %res1 464*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %res2 465*9880d681SAndroid Build Coastguard Worker} 466*9880d681SAndroid Build Coastguard Worker 467*9880d681SAndroid Build Coastguard Workerdeclare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 468*9880d681SAndroid Build Coastguard Worker 469*9880d681SAndroid Build Coastguard Workerdefine <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 470*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 471*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 472*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 473*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovq %rdi, %k1 474*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 475*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0 476*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 477*9880d681SAndroid Build Coastguard Worker; 478*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 479*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 480*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 481*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 482*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 483*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0 484*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 485*9880d681SAndroid Build Coastguard Worker %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 486*9880d681SAndroid Build Coastguard Worker %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 487*9880d681SAndroid Build Coastguard Worker %res2 = add <64 x i8> %res, %res1 488*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %res2 489*9880d681SAndroid Build Coastguard Worker} 490*9880d681SAndroid Build Coastguard Worker 491*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 492*9880d681SAndroid Build Coastguard Worker 493*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 494*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 495*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 496*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 497*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edi, %k1 498*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 499*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 500*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 501*9880d681SAndroid Build Coastguard Worker; 502*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 503*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 504*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 505*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 506*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 507*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 508*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 509*9880d681SAndroid Build Coastguard Worker %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 510*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 511*9880d681SAndroid Build Coastguard Worker %res2 = add <32 x i16> %res, %res1 512*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %res2 513*9880d681SAndroid Build Coastguard Worker} 514*9880d681SAndroid Build Coastguard Worker 515*9880d681SAndroid Build Coastguard Workerdeclare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 516*9880d681SAndroid Build Coastguard Worker 517*9880d681SAndroid Build Coastguard Workerdefine <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 518*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 519*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 520*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 521*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: kmovd %edi, %k1 522*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 523*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 524*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 525*9880d681SAndroid Build Coastguard Worker; 526*9880d681SAndroid Build Coastguard Worker; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 527*9880d681SAndroid Build Coastguard Worker; AVX512F-32: # BB#0: 528*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 529*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 530*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 531*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 532*9880d681SAndroid Build Coastguard Worker; AVX512F-32-NEXT: retl 533*9880d681SAndroid Build Coastguard Worker %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 534*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 535*9880d681SAndroid Build Coastguard Worker %res2 = add <32 x i16> %res, %res1 536*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %res2 537*9880d681SAndroid Build Coastguard Worker} 538*9880d681SAndroid Build Coastguard Worker 539