1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test1(i8 * %addr) { 5*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test1: 6*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 9*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <64 x i8>* 10*9880d681SAndroid Build Coastguard Worker %res = load <64 x i8>, <64 x i8>* %vaddr, align 1 11*9880d681SAndroid Build Coastguard Worker ret <64 x i8>%res 12*9880d681SAndroid Build Coastguard Worker} 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Workerdefine void @test2(i8 * %addr, <64 x i8> %data) { 15*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test2: 16*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 17*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 %zmm0, (%rdi) 18*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 19*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <64 x i8>* 20*9880d681SAndroid Build Coastguard Worker store <64 x i8>%data, <64 x i8>* %vaddr, align 1 21*9880d681SAndroid Build Coastguard Worker ret void 22*9880d681SAndroid Build Coastguard Worker} 23*9880d681SAndroid Build Coastguard Worker 24*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { 25*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test3: 26*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 29*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} 30*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 31*9880d681SAndroid Build Coastguard Worker %mask = icmp ne <64 x i8> %mask1, zeroinitializer 32*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <64 x i8>* 33*9880d681SAndroid Build Coastguard Worker %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 34*9880d681SAndroid Build Coastguard Worker %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old 35*9880d681SAndroid Build Coastguard Worker ret <64 x i8>%res 36*9880d681SAndroid Build Coastguard Worker} 37*9880d681SAndroid Build Coastguard Worker 38*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) { 39*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test4: 40*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k1 43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 44*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 45*9880d681SAndroid Build Coastguard Worker %mask = icmp ne <64 x i8> %mask1, zeroinitializer 46*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <64 x i8>* 47*9880d681SAndroid Build Coastguard Worker %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 48*9880d681SAndroid Build Coastguard Worker %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer 49*9880d681SAndroid Build Coastguard Worker ret <64 x i8>%res 50*9880d681SAndroid Build Coastguard Worker} 51*9880d681SAndroid Build Coastguard Worker 52*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test5(i8 * %addr) { 53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test5: 54*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 57*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <32 x i16>* 58*9880d681SAndroid Build Coastguard Worker %res = load <32 x i16>, <32 x i16>* %vaddr, align 1 59*9880d681SAndroid Build Coastguard Worker ret <32 x i16>%res 60*9880d681SAndroid Build Coastguard Worker} 61*9880d681SAndroid Build Coastguard Worker 62*9880d681SAndroid Build Coastguard Workerdefine void @test6(i8 * %addr, <32 x i16> %data) { 63*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test6: 64*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 65*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 %zmm0, (%rdi) 66*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 67*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <32 x i16>* 68*9880d681SAndroid Build Coastguard Worker store <32 x i16>%data, <32 x i16>* %vaddr, align 1 69*9880d681SAndroid Build Coastguard Worker ret void 70*9880d681SAndroid Build Coastguard Worker} 71*9880d681SAndroid Build Coastguard Worker 72*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { 73*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test7: 74*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 76*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 77*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} 78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 79*9880d681SAndroid Build Coastguard Worker %mask = icmp ne <32 x i16> %mask1, zeroinitializer 80*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <32 x i16>* 81*9880d681SAndroid Build Coastguard Worker %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 82*9880d681SAndroid Build Coastguard Worker %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old 83*9880d681SAndroid Build Coastguard Worker ret <32 x i16>%res 84*9880d681SAndroid Build Coastguard Worker} 85*9880d681SAndroid Build Coastguard Worker 86*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) { 87*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test8: 88*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k1 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 93*9880d681SAndroid Build Coastguard Worker %mask = icmp ne <32 x i16> %mask1, zeroinitializer 94*9880d681SAndroid Build Coastguard Worker %vaddr = bitcast i8* %addr to <32 x i16>* 95*9880d681SAndroid Build Coastguard Worker %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 96*9880d681SAndroid Build Coastguard Worker %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer 97*9880d681SAndroid Build Coastguard Worker ret <32 x i16>%res 98*9880d681SAndroid Build Coastguard Worker} 99*9880d681SAndroid Build Coastguard Worker 100*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_load_16xi8: 102*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftlq $48, %k0, %k0 106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrq $48, %k0, %k1 107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 108*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 110*9880d681SAndroid Build Coastguard Worker %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 111*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %res 112*9880d681SAndroid Build Coastguard Worker} 113*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 114*9880d681SAndroid Build Coastguard Worker 115*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 116*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_load_32xi8: 117*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 118*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 119*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 120*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftlq $32, %k0, %k0 121*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrq $32, %k0, %k1 122*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 125*9880d681SAndroid Build Coastguard Worker %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 126*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res 127*9880d681SAndroid Build Coastguard Worker} 128*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 129*9880d681SAndroid Build Coastguard Worker 130*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 131*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_load_8xi16: 132*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 133*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovw2m %zmm0, %k0 135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftld $24, %k0, %k0 136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrd $24, %k0, %k1 137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 140*9880d681SAndroid Build Coastguard Worker %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 141*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %res 142*9880d681SAndroid Build Coastguard Worker} 143*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 144*9880d681SAndroid Build Coastguard Worker 145*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 146*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_load_16xi16: 147*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 150*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftld $16, %k0, %k0 151*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrd $16, %k0, %k1 152*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 153*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 154*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 155*9880d681SAndroid Build Coastguard Worker %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 156*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %res 157*9880d681SAndroid Build Coastguard Worker} 158*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 159*9880d681SAndroid Build Coastguard Worker 160*9880d681SAndroid Build Coastguard Workerdefine void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 161*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_store_16xi8: 162*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 163*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 164*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 165*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 166*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftlq $48, %k0, %k0 167*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrq $48, %k0, %k1 168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 169*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 170*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 171*9880d681SAndroid Build Coastguard Worker ret void 172*9880d681SAndroid Build Coastguard Worker} 173*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 174*9880d681SAndroid Build Coastguard Worker 175*9880d681SAndroid Build Coastguard Workerdefine void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 176*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_store_32xi8: 177*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 178*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 179*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 180*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 181*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftlq $32, %k0, %k0 182*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrq $32, %k0, %k1 183*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 184*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 185*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 186*9880d681SAndroid Build Coastguard Worker ret void 187*9880d681SAndroid Build Coastguard Worker} 188*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 189*9880d681SAndroid Build Coastguard Worker 190*9880d681SAndroid Build Coastguard Workerdefine void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 191*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_store_8xi16: 192*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 193*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 195*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovw2m %zmm0, %k0 196*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftld $24, %k0, %k0 197*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrd $24, %k0, %k1 198*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 199*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 200*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 201*9880d681SAndroid Build Coastguard Worker ret void 202*9880d681SAndroid Build Coastguard Worker} 203*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 204*9880d681SAndroid Build Coastguard Worker 205*9880d681SAndroid Build Coastguard Workerdefine void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 206*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mask_store_16xi16: 207*9880d681SAndroid Build Coastguard Worker; CHECK: ## BB#0: 208*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpmovb2m %zmm0, %k0 211*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftld $16, %k0, %k0 212*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: kshiftrd $16, %k0, %k1 213*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 214*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 215*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 216*9880d681SAndroid Build Coastguard Worker ret void 217*9880d681SAndroid Build Coastguard Worker} 218*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 219