1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 5; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=SKX 6 7; To test for the case where masked load/store is not legal, we should add a run with a target 8; that does not have AVX, but that case should probably be a separate test file using less tests 9; because it takes over 1.2 seconds to codegen these tests on Haswell 4GHz if there's no maskmov. 10 11define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) { 12; AVX1-LABEL: test1: 13; AVX1: ## BB#0: 14; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 15; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 16; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 17; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 18; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 19; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 20; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 21; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 22; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 23; AVX1-NEXT: vmaskmovps 32(%rdi), %ymm1, %ymm1 24; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 25; AVX1-NEXT: retq 26; 27; AVX2-LABEL: test1: 28; AVX2: ## BB#0: 29; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 30; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 31; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 32; AVX2-NEXT: vpmaskmovd 32(%rdi), %ymm1, %ymm1 33; AVX2-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 34; AVX2-NEXT: retq 35; 36; AVX512-LABEL: test1: 37; AVX512: ## BB#0: 38; AVX512-NEXT: vpxord %zmm1, %zmm1, %zmm1 39; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 40; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 41; AVX512-NEXT: retq 42 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 43 %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef) 44 ret <16 x i32> %res 45} 46 47define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) { 48; AVX1-LABEL: test2: 49; AVX1: ## BB#0: 50; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 51; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 52; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 53; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 54; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 55; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 56; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 57; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 58; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 59; AVX1-NEXT: vmaskmovps 32(%rdi), %ymm1, %ymm1 60; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 61; AVX1-NEXT: retq 62; 63; AVX2-LABEL: test2: 64; AVX2: ## BB#0: 65; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 66; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 67; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 68; AVX2-NEXT: vpmaskmovd 32(%rdi), %ymm1, %ymm1 69; AVX2-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 70; AVX2-NEXT: retq 71; 72; AVX512-LABEL: test2: 73; AVX512: ## BB#0: 74; AVX512-NEXT: vpxord %zmm1, %zmm1, %zmm1 75; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 76; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 77; AVX512-NEXT: retq 78 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 79 %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer) 80 ret <16 x i32> %res 81} 82 83define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) { 84; AVX1-LABEL: test3: 85; AVX1: ## BB#0: 86; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 87; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 88; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 89; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm0 90; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 91; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 92; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 93; AVX1-NEXT: vpcmpeqd %xmm5, %xmm1, %xmm1 94; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 95; AVX1-NEXT: vmaskmovps %ymm3, %ymm1, 32(%rdi) 96; AVX1-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) 97; AVX1-NEXT: vzeroupper 98; AVX1-NEXT: retq 99; 100; AVX2-LABEL: test3: 101; AVX2: ## BB#0: 102; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4 103; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 104; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm1 105; AVX2-NEXT: vpmaskmovd %ymm3, %ymm1, 32(%rdi) 106; AVX2-NEXT: vpmaskmovd %ymm2, %ymm0, (%rdi) 107; AVX2-NEXT: vzeroupper 108; AVX2-NEXT: retq 109; 110; AVX512-LABEL: test3: 111; AVX512: ## BB#0: 112; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 113; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 114; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} 115; AVX512-NEXT: retq 116 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 117 call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask) 118 ret void 119} 120 121define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) { 122; AVX1-LABEL: test4: 123; AVX1: ## BB#0: 124; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 125; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 126; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 127; AVX1-NEXT: vpcmpeqd %xmm5, %xmm1, %xmm1 128; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 130; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 131; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm0 132; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 133; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm4 134; AVX1-NEXT: vblendvps %ymm0, %ymm4, %ymm2, %ymm0 135; AVX1-NEXT: vmaskmovps 32(%rdi), %ymm1, %ymm2 136; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm3, %ymm1 137; AVX1-NEXT: retq 138; 139; AVX2-LABEL: test4: 140; AVX2: ## BB#0: 141; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4 142; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm1 143; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 144; AVX2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm4 145; AVX2-NEXT: vblendvps %ymm0, %ymm4, %ymm2, %ymm0 146; AVX2-NEXT: vmaskmovps 32(%rdi), %ymm1, %ymm2 147; AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm3, %ymm1 148; AVX2-NEXT: retq 149; 150; AVX512-LABEL: test4: 151; AVX512: ## BB#0: 152; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 153; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 154; AVX512-NEXT: vmovups (%rdi), %zmm1 {%k1} 155; AVX512-NEXT: vmovaps %zmm1, %zmm0 156; AVX512-NEXT: retq 157 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 158 %res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst) 159 ret <16 x float> %res 160} 161 162define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) { 163; AVX1-LABEL: test5: 164; AVX1: ## BB#0: 165; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 166; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 167; AVX1-NEXT: vpcmpeqd %xmm4, %xmm3, %xmm3 168; AVX1-NEXT: vpmovsxdq %xmm3, %xmm5 169; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 170; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3 171; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3 172; AVX1-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0 173; AVX1-NEXT: vpmovsxdq %xmm0, %xmm4 174; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 175; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 176; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 177; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm4 178; AVX1-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0 179; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm3, %ymm1 180; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 181; AVX1-NEXT: retq 182; 183; AVX2-LABEL: test5: 184; AVX2: ## BB#0: 185; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 186; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 187; AVX2-NEXT: vpcmpeqd %xmm4, %xmm3, %xmm3 188; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3 189; AVX2-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0 190; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 191; AVX2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm4 192; AVX2-NEXT: vblendvpd %ymm0, %ymm4, %ymm1, %ymm0 193; AVX2-NEXT: vmaskmovpd 32(%rdi), %ymm3, %ymm1 194; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 195; AVX2-NEXT: retq 196; 197; AVX512F-LABEL: test5: 198; AVX512F: ## BB#0: 199; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 200; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 201; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 202; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1} 203; AVX512F-NEXT: vmovaps %zmm1, %zmm0 204; AVX512F-NEXT: retq 205; 206; SKX-LABEL: test5: 207; SKX: ## BB#0: 208; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 209; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 210; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1} 211; SKX-NEXT: vmovaps %zmm1, %zmm0 212; SKX-NEXT: retq 213 %mask = icmp eq <8 x i32> %trigger, zeroinitializer 214 %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst) 215 ret <8 x double> %res 216} 217 218define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { 219; AVX-LABEL: test6: 220; AVX: ## BB#0: 221; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 222; AVX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 223; AVX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 224; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 225; AVX-NEXT: retq 226; 227; AVX512F-LABEL: test6: 228; AVX512F: ## BB#0: 229; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 230; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 231; AVX512F-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 232; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 233; AVX512F-NEXT: retq 234; 235; SKX-LABEL: test6: 236; SKX: ## BB#0: 237; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 238; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 239; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1} 240; SKX-NEXT: vmovaps %zmm1, %zmm0 241; SKX-NEXT: retq 242 %mask = icmp eq <2 x i64> %trigger, zeroinitializer 243 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) 244 ret <2 x double> %res 245} 246 247define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %dst) { 248; AVX-LABEL: test7: 249; AVX: ## BB#0: 250; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 251; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 252; AVX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 253; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 254; AVX-NEXT: retq 255; 256; AVX512F-LABEL: test7: 257; AVX512F: ## BB#0: 258; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 259; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 260; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 261; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 262; AVX512F-NEXT: retq 263; 264; SKX-LABEL: test7: 265; SKX: ## BB#0: 266; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 267; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 268; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} 269; SKX-NEXT: vmovaps %zmm1, %zmm0 270; SKX-NEXT: retq 271 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 272 %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst) 273 ret <4 x float> %res 274} 275 276define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { 277; AVX1-LABEL: test8: 278; AVX1: ## BB#0: 279; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 280; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 281; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 282; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 283; AVX1-NEXT: retq 284; 285; AVX2-LABEL: test8: 286; AVX2: ## BB#0: 287; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 288; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 289; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 290; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 291; AVX2-NEXT: retq 292; 293; AVX512F-LABEL: test8: 294; AVX512F: ## BB#0: 295; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 296; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 297; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 298; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 299; AVX512F-NEXT: retq 300; 301; SKX-LABEL: test8: 302; SKX: ## BB#0: 303; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 304; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 305; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} 306; SKX-NEXT: vmovaps %zmm1, %zmm0 307; SKX-NEXT: retq 308 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 309 %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) 310 ret <4 x i32> %res 311} 312 313define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { 314; AVX1-LABEL: test9: 315; AVX1: ## BB#0: 316; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 317; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 318; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 319; AVX1-NEXT: retq 320; 321; AVX2-LABEL: test9: 322; AVX2: ## BB#0: 323; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 324; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 325; AVX2-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 326; AVX2-NEXT: retq 327; 328; AVX512F-LABEL: test9: 329; AVX512F: ## BB#0: 330; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 331; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 332; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 333; AVX512F-NEXT: retq 334; 335; SKX-LABEL: test9: 336; SKX: ## BB#0: 337; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 338; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 339; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} 340; SKX-NEXT: retq 341 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 342 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask) 343 ret void 344} 345 346define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) { 347; AVX1-LABEL: test10: 348; AVX1: ## BB#0: 349; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 350; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 351; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2 352; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 353; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 354; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 355; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 356; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 357; AVX1-NEXT: retq 358; 359; AVX2-LABEL: test10: 360; AVX2: ## BB#0: 361; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 362; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 363; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 364; AVX2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 365; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 366; AVX2-NEXT: retq 367; 368; AVX512F-LABEL: test10: 369; AVX512F: ## BB#0: 370; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 371; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 372; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 373; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 374; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 375; AVX512F-NEXT: retq 376; 377; SKX-LABEL: test10: 378; SKX: ## BB#0: 379; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 380; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 381; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1} 382; SKX-NEXT: vmovaps %zmm1, %zmm0 383; SKX-NEXT: retq 384 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 385 %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst) 386 ret <4 x double> %res 387} 388 389define <4 x double> @test10b(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) { 390; AVX1-LABEL: test10b: 391; AVX1: ## BB#0: 392; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 393; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 394; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 395; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 396; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 397; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 398; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 399; AVX1-NEXT: retq 400; 401; AVX2-LABEL: test10b: 402; AVX2: ## BB#0: 403; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 404; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 405; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 406; AVX2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 407; AVX2-NEXT: retq 408; 409; AVX512F-LABEL: test10b: 410; AVX512F: ## BB#0: 411; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 412; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 413; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 414; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 415; AVX512F-NEXT: retq 416; 417; SKX-LABEL: test10b: 418; SKX: ## BB#0: 419; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 420; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 421; SKX-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} 422; SKX-NEXT: retq 423 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 424 %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>zeroinitializer) 425 ret <4 x double> %res 426} 427 428define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { 429; AVX1-LABEL: test11a: 430; AVX1: ## BB#0: 431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 432; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 433; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 434; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 435; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 436; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 437; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 438; AVX1-NEXT: retq 439; 440; AVX2-LABEL: test11a: 441; AVX2: ## BB#0: 442; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 443; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 444; AVX2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 445; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 446; AVX2-NEXT: retq 447; 448; AVX512F-LABEL: test11a: 449; AVX512F: ## BB#0: 450; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 451; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 452; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 453; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 454; AVX512F-NEXT: kshiftlw $8, %k0, %k0 455; AVX512F-NEXT: kshiftrw $8, %k0, %k1 456; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1} 457; AVX512F-NEXT: vmovaps %zmm1, %zmm0 458; AVX512F-NEXT: retq 459; 460; SKX-LABEL: test11a: 461; SKX: ## BB#0: 462; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 463; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 464; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1} 465; SKX-NEXT: vmovaps %zmm1, %zmm0 466; SKX-NEXT: retq 467 %mask = icmp eq <8 x i32> %trigger, zeroinitializer 468 %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst) 469 ret <8 x float> %res 470} 471 472define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { 473; AVX1-LABEL: test11b: 474; AVX1: ## BB#0: 475; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 476; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 477; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 478; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 479; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 480; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 481; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 482; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 483; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 484; AVX1-NEXT: retq 485; 486; AVX2-LABEL: test11b: 487; AVX2: ## BB#0: 488; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 489; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 490; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 491; AVX2-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 492; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 493; AVX2-NEXT: retq 494; 495; AVX512F-LABEL: test11b: 496; AVX512F: ## BB#0: 497; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 498; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 499; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 500; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 501; AVX512F-NEXT: kshiftlw $8, %k0, %k0 502; AVX512F-NEXT: kshiftrw $8, %k0, %k1 503; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} 504; AVX512F-NEXT: vmovaps %zmm1, %zmm0 505; AVX512F-NEXT: retq 506; 507; SKX-LABEL: test11b: 508; SKX: ## BB#0: 509; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 510; SKX-NEXT: vpmovw2m %xmm0, %k1 511; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} 512; SKX-NEXT: vmovaps %zmm1, %zmm0 513; SKX-NEXT: retq 514 %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst) 515 ret <8 x i32> %res 516} 517 518define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) { 519; AVX1-LABEL: test11c: 520; AVX1: ## BB#0: 521; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 522; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 523; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 524; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 525; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 526; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 527; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 528; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 529; AVX1-NEXT: retq 530; 531; AVX2-LABEL: test11c: 532; AVX2: ## BB#0: 533; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 534; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 535; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 536; AVX2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 537; AVX2-NEXT: retq 538; 539; AVX512F-LABEL: test11c: 540; AVX512F: ## BB#0: 541; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 542; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 543; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 544; AVX512F-NEXT: kshiftlw $8, %k0, %k0 545; AVX512F-NEXT: kshiftrw $8, %k0, %k1 546; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} 547; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 548; AVX512F-NEXT: retq 549; 550; SKX-LABEL: test11c: 551; SKX: ## BB#0: 552; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 553; SKX-NEXT: vpmovw2m %xmm0, %k1 554; SKX-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} 555; SKX-NEXT: retq 556 %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer) 557 ret <8 x float> %res 558} 559 560define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) { 561; AVX1-LABEL: test11d: 562; AVX1: ## BB#0: 563; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 564; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 565; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 566; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 567; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 568; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 569; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 570; AVX1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 571; AVX1-NEXT: retq 572; 573; AVX2-LABEL: test11d: 574; AVX2: ## BB#0: 575; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 576; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 577; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 578; AVX2-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 579; AVX2-NEXT: retq 580; 581; AVX512F-LABEL: test11d: 582; AVX512F: ## BB#0: 583; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 584; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 585; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 586; AVX512F-NEXT: kshiftlw $8, %k0, %k0 587; AVX512F-NEXT: kshiftrw $8, %k0, %k1 588; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 589; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 590; AVX512F-NEXT: retq 591; 592; SKX-LABEL: test11d: 593; SKX: ## BB#0: 594; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 595; SKX-NEXT: vpmovw2m %xmm0, %k1 596; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} 597; SKX-NEXT: retq 598 %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer) 599 ret <8 x i32> %res 600} 601 602define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) { 603; AVX1-LABEL: test12: 604; AVX1: ## BB#0: 605; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 606; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 607; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 608; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 609; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 610; AVX1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) 611; AVX1-NEXT: vzeroupper 612; AVX1-NEXT: retq 613; 614; AVX2-LABEL: test12: 615; AVX2: ## BB#0: 616; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 617; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 618; AVX2-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) 619; AVX2-NEXT: vzeroupper 620; AVX2-NEXT: retq 621; 622; AVX512F-LABEL: test12: 623; AVX512F: ## BB#0: 624; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 625; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 626; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 627; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 628; AVX512F-NEXT: kshiftlw $8, %k0, %k0 629; AVX512F-NEXT: kshiftrw $8, %k0, %k1 630; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} 631; AVX512F-NEXT: retq 632; 633; SKX-LABEL: test12: 634; SKX: ## BB#0: 635; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 636; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 637; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1} 638; SKX-NEXT: retq 639 %mask = icmp eq <8 x i32> %trigger, zeroinitializer 640 call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask) 641 ret void 642} 643 644define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) { 645; AVX1-LABEL: test13: 646; AVX1: ## BB#0: 647; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 648; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 649; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 650; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm0 651; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 652; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 653; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 654; AVX1-NEXT: vpcmpeqd %xmm5, %xmm1, %xmm1 655; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 656; AVX1-NEXT: vmaskmovps %ymm3, %ymm1, 32(%rdi) 657; AVX1-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) 658; AVX1-NEXT: vzeroupper 659; AVX1-NEXT: retq 660; 661; AVX2-LABEL: test13: 662; AVX2: ## BB#0: 663; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4 664; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 665; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm1 666; AVX2-NEXT: vmaskmovps %ymm3, %ymm1, 32(%rdi) 667; AVX2-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) 668; AVX2-NEXT: vzeroupper 669; AVX2-NEXT: retq 670; 671; AVX512-LABEL: test13: 672; AVX512: ## BB#0: 673; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 674; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 675; AVX512-NEXT: vmovups %zmm1, (%rdi) {%k1} 676; AVX512-NEXT: retq 677 %mask = icmp eq <16 x i32> %trigger, zeroinitializer 678 call void @llvm.masked.store.v16f32.p0v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask) 679 ret void 680} 681 682define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { 683; AVX1-LABEL: test14: 684; AVX1: ## BB#0: 685; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 686; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 687; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 688; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 689; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 690; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 691; AVX1-NEXT: retq 692; 693; AVX2-LABEL: test14: 694; AVX2: ## BB#0: 695; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 696; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 697; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 698; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 699; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 700; AVX2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 701; AVX2-NEXT: retq 702; 703; AVX512F-LABEL: test14: 704; AVX512F: ## BB#0: 705; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 706; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 707; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 708; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 709; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 710; AVX512F-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 711; AVX512F-NEXT: retq 712; 713; SKX-LABEL: test14: 714; SKX: ## BB#0: 715; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 716; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 717; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 718; SKX-NEXT: kshiftlw $14, %k0, %k0 719; SKX-NEXT: kshiftrw $14, %k0, %k1 720; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1} 721; SKX-NEXT: retq 722 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 723 call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask) 724 ret void 725} 726 727define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { 728; AVX1-LABEL: test15: 729; AVX1: ## BB#0: 730; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 731; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 732; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 733; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 734; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 735; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 736; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 737; AVX1-NEXT: retq 738; 739; AVX2-LABEL: test15: 740; AVX2: ## BB#0: 741; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 742; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 743; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 744; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 745; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 746; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 747; AVX2-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 748; AVX2-NEXT: retq 749; 750; AVX512F-LABEL: test15: 751; AVX512F: ## BB#0: 752; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 753; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 754; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 755; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 756; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 757; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 758; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 759; AVX512F-NEXT: retq 760; 761; SKX-LABEL: test15: 762; SKX: ## BB#0: 763; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 764; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 765; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 766; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1} 767; SKX-NEXT: retq 768 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 769 call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) 770 ret void 771} 772 773define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { 774; AVX1-LABEL: test16: 775; AVX1: ## BB#0: 776; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 777; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 778; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 779; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 780; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 781; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 782; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 783; AVX1-NEXT: retq 784; 785; AVX2-LABEL: test16: 786; AVX2: ## BB#0: 787; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 788; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 789; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 790; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 791; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 792; AVX2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 793; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 794; AVX2-NEXT: retq 795; 796; AVX512F-LABEL: test16: 797; AVX512F: ## BB#0: 798; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 799; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 800; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 801; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 802; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 803; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 804; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 805; AVX512F-NEXT: retq 806; 807; SKX-LABEL: test16: 808; SKX: ## BB#0: 809; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 810; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 811; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 812; SKX-NEXT: kshiftlw $14, %k0, %k0 813; SKX-NEXT: kshiftrw $14, %k0, %k1 814; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} 815; SKX-NEXT: vmovaps %zmm1, %zmm0 816; SKX-NEXT: retq 817 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 818 %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) 819 ret <2 x float> %res 820} 821 822define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { 823; AVX1-LABEL: test17: 824; AVX1: ## BB#0: 825; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 826; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 827; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 828; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 829; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 830; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 831; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 832; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 833; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 834; AVX1-NEXT: retq 835; 836; AVX2-LABEL: test17: 837; AVX2: ## BB#0: 838; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 839; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 840; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 841; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 842; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 843; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 844; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 845; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 846; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0 847; AVX2-NEXT: retq 848; 849; AVX512F-LABEL: test17: 850; AVX512F: ## BB#0: 851; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 852; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 853; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 854; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 855; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 856; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 857; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 858; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 859; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 860; AVX512F-NEXT: retq 861; 862; SKX-LABEL: test17: 863; SKX: ## BB#0: 864; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 865; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 866; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 867; SKX-NEXT: kshiftlw $14, %k0, %k0 868; SKX-NEXT: kshiftrw $14, %k0, %k1 869; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 870; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} 871; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 872; SKX-NEXT: retq 873 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 874 %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) 875 ret <2 x i32> %res 876} 877 878define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) { 879; AVX1-LABEL: test18: 880; AVX1: ## BB#0: 881; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 882; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 883; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 884; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 885; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 886; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 887; AVX1-NEXT: retq 888; 889; AVX2-LABEL: test18: 890; AVX2: ## BB#0: 891; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 892; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 893; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 894; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 895; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 896; AVX2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 897; AVX2-NEXT: retq 898; 899; AVX512F-LABEL: test18: 900; AVX512F: ## BB#0: 901; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 902; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 903; AVX512F-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 904; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 905; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 906; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 907; AVX512F-NEXT: retq 908; 909; SKX-LABEL: test18: 910; SKX: ## BB#0: 911; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 912; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 913; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 914; SKX-NEXT: kshiftlw $14, %k0, %k0 915; SKX-NEXT: kshiftrw $14, %k0, %k1 916; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} 917; SKX-NEXT: retq 918 %mask = icmp eq <2 x i32> %trigger, zeroinitializer 919 %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef) 920 ret <2 x float> %res 921} 922 923define <4 x float> @load_all(<4 x i32> %trigger, <4 x float>* %addr) { 924; AVX-LABEL: load_all: 925; AVX: ## BB#0: 926; AVX-NEXT: vmovups (%rdi), %xmm0 927; AVX-NEXT: retq 928; 929; AVX512F-LABEL: load_all: 930; AVX512F: ## BB#0: 931; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 932; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 933; AVX512F-NEXT: retq 934; 935; SKX-LABEL: load_all: 936; SKX: ## BB#0: 937; SKX-NEXT: kxnorw %k0, %k0, %k1 938; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} 939; SKX-NEXT: retq 940 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 941 %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef) 942 ret <4 x float> %res 943} 944 945;;; Loads with Constant Masks - these should be optimized to use something other than a variable blend. 946 947; 128-bit FP vectors are supported with AVX. 948 949define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst) { 950; AVX-LABEL: mload_constmask_v4f32: 951; AVX: ## BB#0: 952; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0],xmm0[1],mem[2,3] 953; AVX-NEXT: retq 954; 955; AVX512F-LABEL: mload_constmask_v4f32: 956; AVX512F: ## BB#0: 957; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295] 958; AVX512F-NEXT: vmaskmovps (%rdi), %xmm1, %xmm2 959; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 960; AVX512F-NEXT: retq 961; 962; SKX-LABEL: mload_constmask_v4f32: 963; SKX: ## BB#0: 964; SKX-NEXT: movb $13, %al 965; SKX-NEXT: kmovw %eax, %k1 966; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} 967; SKX-NEXT: retq 968 %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst) 969 ret <4 x float> %res 970} 971 972; 128-bit integer vectors are supported with AVX2. 973 974define <4 x i32> @mload_constmask_v4i32(<4 x i32>* %addr, <4 x i32> %dst) { 975; AVX1-LABEL: mload_constmask_v4i32: 976; AVX1: ## BB#0: 977; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] 978; AVX1-NEXT: vmaskmovps (%rdi), %xmm1, %xmm1 979; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 980; AVX1-NEXT: retq 981; 982; AVX2-LABEL: mload_constmask_v4i32: 983; AVX2: ## BB#0: 984; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] 985; AVX2-NEXT: vpmaskmovd (%rdi), %xmm1, %xmm1 986; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 987; AVX2-NEXT: retq 988; 989; AVX512F-LABEL: mload_constmask_v4i32: 990; AVX512F: ## BB#0: 991; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] 992; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm1, %xmm2 993; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 994; AVX512F-NEXT: retq 995; 996; SKX-LABEL: mload_constmask_v4i32: 997; SKX: ## BB#0: 998; SKX-NEXT: movb $14, %al 999; SKX-NEXT: kmovw %eax, %k1 1000; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} 1001; SKX-NEXT: retq 1002 %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst) 1003 ret <4 x i32> %res 1004} 1005 1006; 256-bit FP vectors are supported with AVX. 1007 1008define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst) { 1009; AVX-LABEL: mload_constmask_v8f32: 1010; AVX: ## BB#0: 1011; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,0,0,0,0,0] 1012; AVX-NEXT: vmaskmovps (%rdi), %ymm1, %ymm1 1013; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 1014; AVX-NEXT: retq 1015; 1016; AVX512F-LABEL: mload_constmask_v8f32: 1017; AVX512F: ## BB#0: 1018; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1019; AVX512F-NEXT: movw $7, %ax 1020; AVX512F-NEXT: kmovw %eax, %k1 1021; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} 1022; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1023; AVX512F-NEXT: retq 1024; 1025; SKX-LABEL: mload_constmask_v8f32: 1026; SKX: ## BB#0: 1027; SKX-NEXT: movb $7, %al 1028; SKX-NEXT: kmovw %eax, %k1 1029; SKX-NEXT: vmovups (%rdi), %ymm0 {%k1} 1030; SKX-NEXT: retq 1031 %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst) 1032 ret <8 x float> %res 1033} 1034 1035define <4 x double> @mload_constmask_v4f64(<4 x double>* %addr, <4 x double> %dst) { 1036; AVX-LABEL: mload_constmask_v4f64: 1037; AVX: ## BB#0: 1038; AVX-NEXT: vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0] 1039; AVX-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm1 1040; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] 1041; AVX-NEXT: retq 1042; 1043; AVX512F-LABEL: mload_constmask_v4f64: 1044; AVX512F: ## BB#0: 1045; AVX512F-NEXT: vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0] 1046; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm2 1047; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 1048; AVX512F-NEXT: retq 1049; 1050; SKX-LABEL: mload_constmask_v4f64: 1051; SKX: ## BB#0: 1052; SKX-NEXT: movb $7, %al 1053; SKX-NEXT: kmovw %eax, %k1 1054; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} 1055; SKX-NEXT: retq 1056 %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst) 1057 ret <4 x double> %res 1058} 1059 1060; 256-bit integer vectors are supported with AVX2. 1061 1062define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) { 1063; AVX1-LABEL: mload_constmask_v8i32: 1064; AVX1: ## BB#0: 1065; AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7] 1066; AVX1-NEXT: retq 1067; 1068; AVX2-LABEL: mload_constmask_v8i32: 1069; AVX2: ## BB#0: 1070; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7] 1071; AVX2-NEXT: retq 1072; 1073; AVX512F-LABEL: mload_constmask_v8i32: 1074; AVX512F: ## BB#0: 1075; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1076; AVX512F-NEXT: movw $135, %ax 1077; AVX512F-NEXT: kmovw %eax, %k1 1078; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} 1079; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1080; AVX512F-NEXT: retq 1081; 1082; SKX-LABEL: mload_constmask_v8i32: 1083; SKX: ## BB#0: 1084; SKX-NEXT: movb $-121, %al 1085; SKX-NEXT: kmovw %eax, %k1 1086; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} 1087; SKX-NEXT: retq 1088 %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst) 1089 ret <8 x i32> %res 1090} 1091 1092define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) { 1093; AVX1-LABEL: mload_constmask_v4i64: 1094; AVX1: ## BB#0: 1095; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = mem[0],ymm0[1,2],mem[3] 1096; AVX1-NEXT: retq 1097; 1098; AVX2-LABEL: mload_constmask_v4i64: 1099; AVX2: ## BB#0: 1100; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7] 1101; AVX2-NEXT: retq 1102; 1103; AVX512F-LABEL: mload_constmask_v4i64: 1104; AVX512F: ## BB#0: 1105; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615] 1106; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm2 1107; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 1108; AVX512F-NEXT: retq 1109; 1110; SKX-LABEL: mload_constmask_v4i64: 1111; SKX: ## BB#0: 1112; SKX-NEXT: movb $9, %al 1113; SKX-NEXT: kmovw %eax, %k1 1114; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} 1115; SKX-NEXT: retq 1116 %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst) 1117 ret <4 x i64> %res 1118} 1119 1120; 512-bit FP vectors are supported with AVX512. 1121 1122define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) { 1123; AVX-LABEL: mload_constmask_v8f64: 1124; AVX: ## BB#0: 1125; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1,2],mem[3] 1126; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3] 1127; AVX-NEXT: retq 1128; 1129; AVX512-LABEL: mload_constmask_v8f64: 1130; AVX512: ## BB#0: 1131; AVX512-NEXT: movb $-121, %al 1132; AVX512-NEXT: kmovw %eax, %k1 1133; AVX512-NEXT: vmovupd (%rdi), %zmm0 {%k1} 1134; AVX512-NEXT: retq 1135 %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst) 1136 ret <8 x double> %res 1137} 1138 1139; If the pass-through operand is undef, no blend is needed. 1140 1141define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) { 1142; AVX-LABEL: mload_constmask_v4f64_undef_passthrough: 1143; AVX: ## BB#0: 1144; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0] 1145; AVX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 1146; AVX-NEXT: retq 1147; 1148; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough: 1149; AVX512F: ## BB#0: 1150; AVX512F-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0] 1151; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 1152; AVX512F-NEXT: retq 1153; 1154; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: 1155; SKX: ## BB#0: 1156; SKX-NEXT: movb $7, %al 1157; SKX-NEXT: kmovw %eax, %k1 1158; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} 1159; SKX-NEXT: retq 1160 %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef) 1161 ret <4 x double> %res 1162} 1163 1164define <4 x i64> @mload_constmask_v4i64_undef_passthrough(<4 x i64>* %addr) { 1165; AVX1-LABEL: mload_constmask_v4i64_undef_passthrough: 1166; AVX1: ## BB#0: 1167; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] 1168; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 1169; AVX1-NEXT: retq 1170; 1171; AVX2-LABEL: mload_constmask_v4i64_undef_passthrough: 1172; AVX2: ## BB#0: 1173; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] 1174; AVX2-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 1175; AVX2-NEXT: retq 1176; 1177; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough: 1178; AVX512F: ## BB#0: 1179; AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] 1180; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 1181; AVX512F-NEXT: retq 1182; 1183; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: 1184; SKX: ## BB#0: 1185; SKX-NEXT: movb $6, %al 1186; SKX-NEXT: kmovw %eax, %k1 1187; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} 1188; SKX-NEXT: retq 1189 %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef) 1190 ret <4 x i64> %res 1191} 1192 1193define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { 1194; AVX1-LABEL: test21: 1195; AVX1: ## BB#0: 1196; AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1197; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 1198; AVX1-NEXT: retq 1199; 1200; AVX2-LABEL: test21: 1201; AVX2: ## BB#0: 1202; AVX2-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1203; AVX2-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 1204; AVX2-NEXT: retq 1205; 1206; AVX512F-LABEL: test21: 1207; AVX512F: ## BB#0: 1208; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1209; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) 1210; AVX512F-NEXT: retq 1211; 1212; SKX-LABEL: test21: 1213; SKX: ## BB#0: 1214; SKX-NEXT: kxnorw %k0, %k0, %k1 1215; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} 1216; SKX-NEXT: retq 1217 %mask = icmp eq <4 x i32> %trigger, zeroinitializer 1218 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>) 1219 ret void 1220} 1221 1222; When only one element of the mask is set, reduce to a scalar store. 1223 1224define void @one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) { 1225; AVX-LABEL: one_mask_bit_set1: 1226; AVX: ## BB#0: 1227; AVX-NEXT: vmovd %xmm0, (%rdi) 1228; AVX-NEXT: retq 1229; 1230; AVX512-LABEL: one_mask_bit_set1: 1231; AVX512: ## BB#0: 1232; AVX512-NEXT: vmovd %xmm0, (%rdi) 1233; AVX512-NEXT: retq 1234 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>) 1235 ret void 1236} 1237 1238; Choose a different element to show that the correct address offset is produced. 1239 1240define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) { 1241; AVX-LABEL: one_mask_bit_set2: 1242; AVX: ## BB#0: 1243; AVX-NEXT: vextractps $2, %xmm0, 8(%rdi) 1244; AVX-NEXT: retq 1245; 1246; AVX512-LABEL: one_mask_bit_set2: 1247; AVX512: ## BB#0: 1248; AVX512-NEXT: vextractps $2, %xmm0, 8(%rdi) 1249; AVX512-NEXT: retq 1250 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>) 1251 ret void 1252} 1253 1254; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. 1255 1256define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { 1257; AVX-LABEL: one_mask_bit_set3: 1258; AVX: ## BB#0: 1259; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1260; AVX-NEXT: vmovlps %xmm0, 16(%rdi) 1261; AVX-NEXT: vzeroupper 1262; AVX-NEXT: retq 1263; 1264; AVX512F-LABEL: one_mask_bit_set3: 1265; AVX512F: ## BB#0: 1266; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 1267; AVX512F-NEXT: vmovq %xmm0, 16(%rdi) 1268; AVX512F-NEXT: retq 1269; 1270; SKX-LABEL: one_mask_bit_set3: 1271; SKX: ## BB#0: 1272; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 1273; SKX-NEXT: vmovq %xmm0, 16(%rdi) 1274; SKX-NEXT: retq 1275 call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>) 1276 ret void 1277} 1278 1279; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. 1280 1281define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) { 1282; AVX-LABEL: one_mask_bit_set4: 1283; AVX: ## BB#0: 1284; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 1285; AVX-NEXT: vmovhpd %xmm0, 24(%rdi) 1286; AVX-NEXT: vzeroupper 1287; AVX-NEXT: retq 1288; 1289; AVX512F-LABEL: one_mask_bit_set4: 1290; AVX512F: ## BB#0: 1291; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 1292; AVX512F-NEXT: vmovhpd %xmm0, 24(%rdi) 1293; AVX512F-NEXT: retq 1294; 1295; SKX-LABEL: one_mask_bit_set4: 1296; SKX: ## BB#0: 1297; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 1298; SKX-NEXT: vmovhpd %xmm0, 24(%rdi) 1299; SKX-NEXT: retq 1300 call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>) 1301 ret void 1302} 1303 1304; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected. 1305 1306define void @one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) { 1307; AVX-LABEL: one_mask_bit_set5: 1308; AVX: ## BB#0: 1309; AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 1310; AVX-NEXT: vmovlps %xmm0, 48(%rdi) 1311; AVX-NEXT: vzeroupper 1312; AVX-NEXT: retq 1313; 1314; AVX512-LABEL: one_mask_bit_set5: 1315; AVX512: ## BB#0: 1316; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 1317; AVX512-NEXT: vmovlpd %xmm0, 48(%rdi) 1318; AVX512-NEXT: retq 1319 call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>) 1320 ret void 1321} 1322 1323; When only one element of the mask is set, reduce to a scalar load. 1324 1325define <4 x i32> @load_one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) { 1326; AVX-LABEL: load_one_mask_bit_set1: 1327; AVX: ## BB#0: 1328; AVX-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 1329; AVX-NEXT: retq 1330; 1331; AVX512-LABEL: load_one_mask_bit_set1: 1332; AVX512: ## BB#0: 1333; AVX512-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 1334; AVX512-NEXT: retq 1335 %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>, <4 x i32> %val) 1336 ret <4 x i32> %res 1337} 1338 1339; Choose a different element to show that the correct address offset is produced. 1340 1341define <4 x float> @load_one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) { 1342; AVX-LABEL: load_one_mask_bit_set2: 1343; AVX: ## BB#0: 1344; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 1345; AVX-NEXT: retq 1346; 1347; AVX512-LABEL: load_one_mask_bit_set2: 1348; AVX512: ## BB#0: 1349; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 1350; AVX512-NEXT: retq 1351 %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val) 1352 ret <4 x float> %res 1353} 1354 1355; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. 1356 1357define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { 1358; AVX1-LABEL: load_one_mask_bit_set3: 1359; AVX1: ## BB#0: 1360; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1361; AVX1-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 1362; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1363; AVX1-NEXT: retq 1364; 1365; AVX2-LABEL: load_one_mask_bit_set3: 1366; AVX2: ## BB#0: 1367; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1368; AVX2-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 1369; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1370; AVX2-NEXT: retq 1371; 1372; AVX512F-LABEL: load_one_mask_bit_set3: 1373; AVX512F: ## BB#0: 1374; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 1375; AVX512F-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 1376; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1377; AVX512F-NEXT: retq 1378; 1379; SKX-LABEL: load_one_mask_bit_set3: 1380; SKX: ## BB#0: 1381; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 1382; SKX-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 1383; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1384; SKX-NEXT: retq 1385 %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val) 1386 ret <4 x i64> %res 1387} 1388 1389; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly. 1390 1391define <4 x double> @load_one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) { 1392; AVX-LABEL: load_one_mask_bit_set4: 1393; AVX: ## BB#0: 1394; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1395; AVX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 1396; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1397; AVX-NEXT: retq 1398; 1399; AVX512F-LABEL: load_one_mask_bit_set4: 1400; AVX512F: ## BB#0: 1401; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 1402; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 1403; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1404; AVX512F-NEXT: retq 1405; 1406; SKX-LABEL: load_one_mask_bit_set4: 1407; SKX: ## BB#0: 1408; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm1 1409; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 1410; SKX-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 1411; SKX-NEXT: retq 1412 %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val) 1413 ret <4 x double> %res 1414} 1415 1416; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected. 1417 1418define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) { 1419; AVX-LABEL: load_one_mask_bit_set5: 1420; AVX: ## BB#0: 1421; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1422; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1423; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1424; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1425; AVX-NEXT: retq 1426; 1427; AVX512-LABEL: load_one_mask_bit_set5: 1428; AVX512: ## BB#0: 1429; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 1430; AVX512-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 1431; AVX512-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 1432; AVX512-NEXT: retq 1433 %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x double> %val) 1434 ret <8 x double> %res 1435} 1436 1437declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) 1438declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) 1439declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) 1440declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>) 1441declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) 1442declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) 1443declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) 1444declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) 1445declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>) 1446declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) 1447declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) 1448declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) 1449declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) 1450declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) 1451declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) 1452declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) 1453declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>) 1454declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) 1455declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>) 1456declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) 1457declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) 1458declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>) 1459declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) 1460declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) 1461 1462declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>) 1463 1464define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) { 1465; AVX1-LABEL: test23: 1466; AVX1: ## BB#0: 1467; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1468; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 1469; AVX1-NEXT: vpcmpeqq %xmm5, %xmm4, %xmm4 1470; AVX1-NEXT: vpcmpeqq %xmm5, %xmm0, %xmm0 1471; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1472; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1473; AVX1-NEXT: vpcmpeqq %xmm5, %xmm4, %xmm4 1474; AVX1-NEXT: vpcmpeqq %xmm5, %xmm1, %xmm1 1475; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 1476; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 1477; AVX1-NEXT: vpcmpeqq %xmm5, %xmm4, %xmm4 1478; AVX1-NEXT: vpcmpeqq %xmm5, %xmm2, %xmm2 1479; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 1480; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1481; AVX1-NEXT: vpcmpeqq %xmm5, %xmm4, %xmm4 1482; AVX1-NEXT: vpcmpeqq %xmm5, %xmm3, %xmm3 1483; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 1484; AVX1-NEXT: vmaskmovpd 96(%rdi), %ymm3, %ymm3 1485; AVX1-NEXT: vmaskmovpd 64(%rdi), %ymm2, %ymm2 1486; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm1, %ymm1 1487; AVX1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 1488; AVX1-NEXT: retq 1489; 1490; AVX2-LABEL: test23: 1491; AVX2: ## BB#0: 1492; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4 1493; AVX2-NEXT: vpcmpeqq %ymm4, %ymm0, %ymm0 1494; AVX2-NEXT: vpcmpeqq %ymm4, %ymm1, %ymm1 1495; AVX2-NEXT: vpcmpeqq %ymm4, %ymm2, %ymm2 1496; AVX2-NEXT: vpcmpeqq %ymm4, %ymm3, %ymm3 1497; AVX2-NEXT: vpmaskmovq 96(%rdi), %ymm3, %ymm3 1498; AVX2-NEXT: vpmaskmovq 64(%rdi), %ymm2, %ymm2 1499; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm1, %ymm1 1500; AVX2-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 1501; AVX2-NEXT: retq 1502; 1503; AVX512-LABEL: test23: 1504; AVX512: ## BB#0: 1505; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 1506; AVX512-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 1507; AVX512-NEXT: vpcmpeqq %zmm2, %zmm1, %k2 1508; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k2} {z} 1509; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} 1510; AVX512-NEXT: retq 1511 %mask = icmp eq <16 x i32*> %trigger, zeroinitializer 1512 %res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer) 1513 ret <16 x i32*> %res 1514} 1515 1516%mystruct = type { i16, i16, [1 x i8*] } 1517 1518declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>) 1519 1520define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) { 1521; AVX1-LABEL: test24: 1522; AVX1: ## BB#0: 1523; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1524; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1525; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1526; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 1527; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1528; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1529; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1530; AVX1-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm4 1531; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1532; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1533; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1534; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1535; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 1536; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1537; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1538; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1539; AVX1-NEXT: vmaskmovpd 96(%rdi), %ymm1, %ymm3 1540; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1541; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1542; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1543; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1544; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 1545; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1546; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1547; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1548; AVX1-NEXT: vmaskmovpd 64(%rdi), %ymm1, %ymm2 1549; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1550; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1551; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1552; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1553; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1554; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1555; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1556; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1557; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm0, %ymm1 1558; AVX1-NEXT: vmovapd %ymm4, %ymm0 1559; AVX1-NEXT: retq 1560; 1561; AVX2-LABEL: test24: 1562; AVX2: ## BB#0: 1563; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1564; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1565; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1566; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1567; AVX2-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm4 1568; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1569; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1570; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1571; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1572; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1573; AVX2-NEXT: vpmaskmovq 96(%rdi), %ymm1, %ymm3 1574; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1575; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1576; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1577; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1578; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1579; AVX2-NEXT: vpmaskmovq 64(%rdi), %ymm1, %ymm2 1580; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1581; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1582; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1583; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1584; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1585; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm0, %ymm1 1586; AVX2-NEXT: vmovdqa %ymm4, %ymm0 1587; AVX2-NEXT: retq 1588; 1589; AVX512F-LABEL: test24: 1590; AVX512F: ## BB#0: 1591; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1592; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1593; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1594; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} 1595; AVX512F-NEXT: kshiftrw $8, %k1, %k1 1596; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z} 1597; AVX512F-NEXT: retq 1598; 1599; SKX-LABEL: test24: 1600; SKX: ## BB#0: 1601; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1602; SKX-NEXT: vpmovb2m %xmm0, %k1 1603; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} 1604; SKX-NEXT: kshiftrw $8, %k1, %k1 1605; SKX-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z} 1606; SKX-NEXT: retq 1607 %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer) 1608 ret <16 x %mystruct*> %res 1609} 1610 1611define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) { 1612; AVX1-LABEL: test_store_16i64: 1613; AVX1: ## BB#0: 1614; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1615; AVX1-NEXT: vpslld $31, %xmm5, %xmm5 1616; AVX1-NEXT: vpsrad $31, %xmm5, %xmm5 1617; AVX1-NEXT: vpmovsxdq %xmm5, %xmm6 1618; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] 1619; AVX1-NEXT: vpmovsxdq %xmm5, %xmm5 1620; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 1621; AVX1-NEXT: vmaskmovpd %ymm1, %ymm5, (%rdi) 1622; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1623; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1624; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1625; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1626; AVX1-NEXT: vpmovsxdq %xmm1, %xmm5 1627; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1628; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1629; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm5, %ymm1 1630; AVX1-NEXT: vmaskmovpd %ymm4, %ymm1, 96(%rdi) 1631; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1632; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1633; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1634; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1635; AVX1-NEXT: vpmovsxdq %xmm1, %xmm4 1636; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1637; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1638; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1 1639; AVX1-NEXT: vmaskmovpd %ymm3, %ymm1, 64(%rdi) 1640; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1641; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1642; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1643; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1644; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1645; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1646; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1647; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1648; AVX1-NEXT: vmaskmovpd %ymm2, %ymm0, 32(%rdi) 1649; AVX1-NEXT: vzeroupper 1650; AVX1-NEXT: retq 1651; 1652; AVX2-LABEL: test_store_16i64: 1653; AVX2: ## BB#0: 1654; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1655; AVX2-NEXT: vpslld $31, %xmm5, %xmm5 1656; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5 1657; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5 1658; AVX2-NEXT: vpmaskmovq %ymm1, %ymm5, (%rdi) 1659; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1660; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1661; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1662; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1663; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1664; AVX2-NEXT: vpmaskmovq %ymm4, %ymm1, 96(%rdi) 1665; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1666; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1667; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1668; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1669; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1670; AVX2-NEXT: vpmaskmovq %ymm3, %ymm1, 64(%rdi) 1671; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1672; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1673; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1674; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1675; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1676; AVX2-NEXT: vpmaskmovq %ymm2, %ymm0, 32(%rdi) 1677; AVX2-NEXT: vzeroupper 1678; AVX2-NEXT: retq 1679; 1680; AVX512F-LABEL: test_store_16i64: 1681; AVX512F: ## BB#0: 1682; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1683; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1684; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1685; AVX512F-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1} 1686; AVX512F-NEXT: kshiftrw $8, %k1, %k1 1687; AVX512F-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1} 1688; AVX512F-NEXT: retq 1689; 1690; SKX-LABEL: test_store_16i64: 1691; SKX: ## BB#0: 1692; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1693; SKX-NEXT: vpmovb2m %xmm0, %k1 1694; SKX-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1} 1695; SKX-NEXT: kshiftrw $8, %k1, %k1 1696; SKX-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1} 1697; SKX-NEXT: retq 1698 call void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask) 1699 ret void 1700} 1701declare void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask) 1702 1703define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) { 1704; AVX1-LABEL: test_store_16f64: 1705; AVX1: ## BB#0: 1706; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1707; AVX1-NEXT: vpslld $31, %xmm5, %xmm5 1708; AVX1-NEXT: vpsrad $31, %xmm5, %xmm5 1709; AVX1-NEXT: vpmovsxdq %xmm5, %xmm6 1710; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] 1711; AVX1-NEXT: vpmovsxdq %xmm5, %xmm5 1712; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 1713; AVX1-NEXT: vmaskmovpd %ymm1, %ymm5, (%rdi) 1714; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1715; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1716; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1717; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1718; AVX1-NEXT: vpmovsxdq %xmm1, %xmm5 1719; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1720; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1721; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm5, %ymm1 1722; AVX1-NEXT: vmaskmovpd %ymm4, %ymm1, 96(%rdi) 1723; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1724; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1725; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1726; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1727; AVX1-NEXT: vpmovsxdq %xmm1, %xmm4 1728; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1729; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1730; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1 1731; AVX1-NEXT: vmaskmovpd %ymm3, %ymm1, 64(%rdi) 1732; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1733; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1734; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1735; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1736; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1737; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1738; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1739; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1740; AVX1-NEXT: vmaskmovpd %ymm2, %ymm0, 32(%rdi) 1741; AVX1-NEXT: vzeroupper 1742; AVX1-NEXT: retq 1743; 1744; AVX2-LABEL: test_store_16f64: 1745; AVX2: ## BB#0: 1746; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1747; AVX2-NEXT: vpslld $31, %xmm5, %xmm5 1748; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5 1749; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5 1750; AVX2-NEXT: vmaskmovpd %ymm1, %ymm5, (%rdi) 1751; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 1752; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1753; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1754; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1755; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1756; AVX2-NEXT: vmaskmovpd %ymm4, %ymm1, 96(%rdi) 1757; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1758; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1759; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1760; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1761; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1762; AVX2-NEXT: vmaskmovpd %ymm3, %ymm1, 64(%rdi) 1763; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1764; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1765; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1766; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1767; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1768; AVX2-NEXT: vmaskmovpd %ymm2, %ymm0, 32(%rdi) 1769; AVX2-NEXT: vzeroupper 1770; AVX2-NEXT: retq 1771; 1772; AVX512F-LABEL: test_store_16f64: 1773; AVX512F: ## BB#0: 1774; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1775; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1776; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1777; AVX512F-NEXT: vmovupd %zmm1, (%rdi) {%k1} 1778; AVX512F-NEXT: kshiftrw $8, %k1, %k1 1779; AVX512F-NEXT: vmovupd %zmm2, 64(%rdi) {%k1} 1780; AVX512F-NEXT: retq 1781; 1782; SKX-LABEL: test_store_16f64: 1783; SKX: ## BB#0: 1784; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1785; SKX-NEXT: vpmovb2m %xmm0, %k1 1786; SKX-NEXT: vmovupd %zmm1, (%rdi) {%k1} 1787; SKX-NEXT: kshiftrw $8, %k1, %k1 1788; SKX-NEXT: vmovupd %zmm2, 64(%rdi) {%k1} 1789; SKX-NEXT: retq 1790 call void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask) 1791 ret void 1792} 1793declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask) 1794 1795define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) { 1796; AVX1-LABEL: test_load_16i64: 1797; AVX1: ## BB#0: 1798; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1799; AVX1-NEXT: vpslld $31, %xmm5, %xmm5 1800; AVX1-NEXT: vpsrad $31, %xmm5, %xmm5 1801; AVX1-NEXT: vpmovsxdq %xmm5, %xmm6 1802; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] 1803; AVX1-NEXT: vpmovsxdq %xmm5, %xmm5 1804; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 1805; AVX1-NEXT: vmaskmovpd (%rdi), %ymm5, %ymm6 1806; AVX1-NEXT: vblendvpd %ymm5, %ymm6, %ymm1, %ymm5 1807; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1808; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1809; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1810; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1811; AVX1-NEXT: vpmovsxdq %xmm1, %xmm6 1812; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1813; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1814; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm6, %ymm1 1815; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm1, %ymm6 1816; AVX1-NEXT: vblendvpd %ymm1, %ymm6, %ymm2, %ymm1 1817; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1818; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 1819; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 1820; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 1821; AVX1-NEXT: vpmovsxdq %xmm2, %xmm6 1822; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 1823; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1824; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm6, %ymm2 1825; AVX1-NEXT: vmaskmovpd 64(%rdi), %ymm2, %ymm6 1826; AVX1-NEXT: vblendvpd %ymm2, %ymm6, %ymm3, %ymm2 1827; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1828; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1829; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1830; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1831; AVX1-NEXT: vpmovsxdq %xmm0, %xmm3 1832; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1833; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1835; AVX1-NEXT: vmaskmovpd 96(%rdi), %ymm0, %ymm3 1836; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3 1837; AVX1-NEXT: vmovapd %ymm5, %ymm0 1838; AVX1-NEXT: retq 1839; 1840; AVX2-LABEL: test_load_16i64: 1841; AVX2: ## BB#0: 1842; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1843; AVX2-NEXT: vpslld $31, %xmm5, %xmm5 1844; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5 1845; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5 1846; AVX2-NEXT: vpmaskmovq (%rdi), %ymm5, %ymm6 1847; AVX2-NEXT: vblendvpd %ymm5, %ymm6, %ymm1, %ymm5 1848; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1849; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1850; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1851; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1852; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1853; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm1, %ymm6 1854; AVX2-NEXT: vblendvpd %ymm1, %ymm6, %ymm2, %ymm1 1855; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1856; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 1857; AVX2-NEXT: vpslld $31, %xmm2, %xmm2 1858; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 1859; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 1860; AVX2-NEXT: vpmaskmovq 64(%rdi), %ymm2, %ymm6 1861; AVX2-NEXT: vblendvpd %ymm2, %ymm6, %ymm3, %ymm2 1862; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1863; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1864; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1865; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1866; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1867; AVX2-NEXT: vpmaskmovq 96(%rdi), %ymm0, %ymm3 1868; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3 1869; AVX2-NEXT: vmovapd %ymm5, %ymm0 1870; AVX2-NEXT: retq 1871; 1872; AVX512F-LABEL: test_load_16i64: 1873; AVX512F: ## BB#0: 1874; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1875; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1876; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1877; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} 1878; AVX512F-NEXT: kshiftrw $8, %k1, %k1 1879; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1} 1880; AVX512F-NEXT: vmovaps %zmm1, %zmm0 1881; AVX512F-NEXT: vmovaps %zmm2, %zmm1 1882; AVX512F-NEXT: retq 1883; 1884; SKX-LABEL: test_load_16i64: 1885; SKX: ## BB#0: 1886; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1887; SKX-NEXT: vpmovb2m %xmm0, %k1 1888; SKX-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} 1889; SKX-NEXT: kshiftrw $8, %k1, %k1 1890; SKX-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1} 1891; SKX-NEXT: vmovaps %zmm1, %zmm0 1892; SKX-NEXT: vmovaps %zmm2, %zmm1 1893; SKX-NEXT: retq 1894 %res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) 1895 ret <16 x i64> %res 1896} 1897declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) 1898 1899define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) { 1900; AVX1-LABEL: test_load_16f64: 1901; AVX1: ## BB#0: 1902; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1903; AVX1-NEXT: vpslld $31, %xmm5, %xmm5 1904; AVX1-NEXT: vpsrad $31, %xmm5, %xmm5 1905; AVX1-NEXT: vpmovsxdq %xmm5, %xmm6 1906; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] 1907; AVX1-NEXT: vpmovsxdq %xmm5, %xmm5 1908; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 1909; AVX1-NEXT: vmaskmovpd (%rdi), %ymm5, %ymm6 1910; AVX1-NEXT: vblendvpd %ymm5, %ymm6, %ymm1, %ymm5 1911; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1912; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1913; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 1914; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 1915; AVX1-NEXT: vpmovsxdq %xmm1, %xmm6 1916; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1917; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 1918; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm6, %ymm1 1919; AVX1-NEXT: vmaskmovpd 32(%rdi), %ymm1, %ymm6 1920; AVX1-NEXT: vblendvpd %ymm1, %ymm6, %ymm2, %ymm1 1921; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1922; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 1923; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 1924; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 1925; AVX1-NEXT: vpmovsxdq %xmm2, %xmm6 1926; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 1927; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1928; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm6, %ymm2 1929; AVX1-NEXT: vmaskmovpd 64(%rdi), %ymm2, %ymm6 1930; AVX1-NEXT: vblendvpd %ymm2, %ymm6, %ymm3, %ymm2 1931; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1932; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1933; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1934; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1935; AVX1-NEXT: vpmovsxdq %xmm0, %xmm3 1936; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1937; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1938; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1939; AVX1-NEXT: vmaskmovpd 96(%rdi), %ymm0, %ymm3 1940; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3 1941; AVX1-NEXT: vmovapd %ymm5, %ymm0 1942; AVX1-NEXT: retq 1943; 1944; AVX2-LABEL: test_load_16f64: 1945; AVX2: ## BB#0: 1946; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1947; AVX2-NEXT: vpslld $31, %xmm5, %xmm5 1948; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5 1949; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5 1950; AVX2-NEXT: vmaskmovpd (%rdi), %ymm5, %ymm6 1951; AVX2-NEXT: vblendvpd %ymm5, %ymm6, %ymm1, %ymm5 1952; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1953; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1954; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 1955; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 1956; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 1957; AVX2-NEXT: vmaskmovpd 32(%rdi), %ymm1, %ymm6 1958; AVX2-NEXT: vblendvpd %ymm1, %ymm6, %ymm2, %ymm1 1959; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1960; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 1961; AVX2-NEXT: vpslld $31, %xmm2, %xmm2 1962; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 1963; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 1964; AVX2-NEXT: vmaskmovpd 64(%rdi), %ymm2, %ymm6 1965; AVX2-NEXT: vblendvpd %ymm2, %ymm6, %ymm3, %ymm2 1966; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1967; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1968; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1969; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1970; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1971; AVX2-NEXT: vmaskmovpd 96(%rdi), %ymm0, %ymm3 1972; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3 1973; AVX2-NEXT: vmovapd %ymm5, %ymm0 1974; AVX2-NEXT: retq 1975; 1976; AVX512F-LABEL: test_load_16f64: 1977; AVX512F: ## BB#0: 1978; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1979; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1980; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1981; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1} 1982; AVX512F-NEXT: kshiftrw $8, %k1, %k1 1983; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1} 1984; AVX512F-NEXT: vmovaps %zmm1, %zmm0 1985; AVX512F-NEXT: vmovaps %zmm2, %zmm1 1986; AVX512F-NEXT: retq 1987; 1988; SKX-LABEL: test_load_16f64: 1989; SKX: ## BB#0: 1990; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1991; SKX-NEXT: vpmovb2m %xmm0, %k1 1992; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1} 1993; SKX-NEXT: kshiftrw $8, %k1, %k1 1994; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1} 1995; SKX-NEXT: vmovaps %zmm1, %zmm0 1996; SKX-NEXT: vmovaps %zmm2, %zmm1 1997; SKX-NEXT: retq 1998 %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) 1999 ret <16 x double> %res 2000} 2001declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) 2002 2003define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) { 2004; AVX1-LABEL: test_load_32f64: 2005; AVX1: ## BB#0: 2006; AVX1-NEXT: pushq %rbp 2007; AVX1-NEXT: Ltmp0: 2008; AVX1-NEXT: .cfi_def_cfa_offset 16 2009; AVX1-NEXT: Ltmp1: 2010; AVX1-NEXT: .cfi_offset %rbp, -16 2011; AVX1-NEXT: movq %rsp, %rbp 2012; AVX1-NEXT: Ltmp2: 2013; AVX1-NEXT: .cfi_def_cfa_register %rbp 2014; AVX1-NEXT: andq $-32, %rsp 2015; AVX1-NEXT: subq $32, %rsp 2016; AVX1-NEXT: vmovapd 16(%rbp), %ymm8 2017; AVX1-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3] 2018; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero 2019; AVX1-NEXT: vpslld $31, %xmm9, %xmm9 2020; AVX1-NEXT: vpsrad $31, %xmm9, %xmm9 2021; AVX1-NEXT: vpmovsxdq %xmm9, %xmm10 2022; AVX1-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,3,0,1] 2023; AVX1-NEXT: vpmovsxdq %xmm9, %xmm9 2024; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm10, %ymm9 2025; AVX1-NEXT: vmaskmovpd 32(%rsi), %ymm9, %ymm10 2026; AVX1-NEXT: vblendvpd %ymm9, %ymm10, %ymm2, %ymm9 2027; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 2028; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2029; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 2030; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 2031; AVX1-NEXT: vpmovsxdq %xmm2, %xmm10 2032; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 2033; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 2034; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm10, %ymm2 2035; AVX1-NEXT: vmaskmovpd 64(%rsi), %ymm2, %ymm10 2036; AVX1-NEXT: vblendvpd %ymm2, %ymm10, %ymm3, %ymm11 2037; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] 2038; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2039; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 2040; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 2041; AVX1-NEXT: vpmovsxdq %xmm2, %xmm10 2042; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 2043; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 2044; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm10, %ymm2 2045; AVX1-NEXT: vmaskmovpd 96(%rsi), %ymm2, %ymm10 2046; AVX1-NEXT: vblendvpd %ymm2, %ymm10, %ymm4, %ymm4 2047; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2048; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3] 2049; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2050; AVX1-NEXT: vpslld $31, %xmm3, %xmm3 2051; AVX1-NEXT: vpsrad $31, %xmm3, %xmm3 2052; AVX1-NEXT: vpmovsxdq %xmm3, %xmm10 2053; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2054; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3 2055; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm10, %ymm3 2056; AVX1-NEXT: vmaskmovpd 160(%rsi), %ymm3, %ymm10 2057; AVX1-NEXT: vblendvpd %ymm3, %ymm10, %ymm6, %ymm6 2058; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 2059; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2060; AVX1-NEXT: vpslld $31, %xmm3, %xmm3 2061; AVX1-NEXT: vpsrad $31, %xmm3, %xmm3 2062; AVX1-NEXT: vpmovsxdq %xmm3, %xmm10 2063; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2064; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3 2065; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm10, %ymm3 2066; AVX1-NEXT: vmaskmovpd 192(%rsi), %ymm3, %ymm10 2067; AVX1-NEXT: vblendvpd %ymm3, %ymm10, %ymm7, %ymm7 2068; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,1,2,3] 2069; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2070; AVX1-NEXT: vpslld $31, %xmm3, %xmm3 2071; AVX1-NEXT: vpsrad $31, %xmm3, %xmm3 2072; AVX1-NEXT: vpmovsxdq %xmm3, %xmm10 2073; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2074; AVX1-NEXT: vpmovsxdq %xmm3, %xmm3 2075; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm10, %ymm3 2076; AVX1-NEXT: vmaskmovpd 224(%rsi), %ymm3, %ymm10 2077; AVX1-NEXT: vblendvpd %ymm3, %ymm10, %ymm8, %ymm3 2078; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2079; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 2080; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 2081; AVX1-NEXT: vpmovsxdq %xmm0, %xmm8 2082; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2083; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 2084; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm8, %ymm0 2085; AVX1-NEXT: vmaskmovpd (%rsi), %ymm0, %ymm8 2086; AVX1-NEXT: vblendvpd %ymm0, %ymm8, %ymm1, %ymm0 2087; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2088; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 2089; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 2090; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 2091; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2092; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 2093; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2094; AVX1-NEXT: vmaskmovpd 128(%rsi), %ymm1, %ymm2 2095; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm5, %ymm1 2096; AVX1-NEXT: vmovapd %ymm1, 128(%rdi) 2097; AVX1-NEXT: vmovapd %ymm0, (%rdi) 2098; AVX1-NEXT: vmovapd %ymm3, 224(%rdi) 2099; AVX1-NEXT: vmovapd %ymm7, 192(%rdi) 2100; AVX1-NEXT: vmovapd %ymm6, 160(%rdi) 2101; AVX1-NEXT: vmovapd %ymm4, 96(%rdi) 2102; AVX1-NEXT: vmovapd %ymm11, 64(%rdi) 2103; AVX1-NEXT: vmovapd %ymm9, 32(%rdi) 2104; AVX1-NEXT: movq %rdi, %rax 2105; AVX1-NEXT: movq %rbp, %rsp 2106; AVX1-NEXT: popq %rbp 2107; AVX1-NEXT: vzeroupper 2108; AVX1-NEXT: retq 2109; 2110; AVX2-LABEL: test_load_32f64: 2111; AVX2: ## BB#0: 2112; AVX2-NEXT: pushq %rbp 2113; AVX2-NEXT: Ltmp0: 2114; AVX2-NEXT: .cfi_def_cfa_offset 16 2115; AVX2-NEXT: Ltmp1: 2116; AVX2-NEXT: .cfi_offset %rbp, -16 2117; AVX2-NEXT: movq %rsp, %rbp 2118; AVX2-NEXT: Ltmp2: 2119; AVX2-NEXT: .cfi_def_cfa_register %rbp 2120; AVX2-NEXT: andq $-32, %rsp 2121; AVX2-NEXT: subq $32, %rsp 2122; AVX2-NEXT: vmovapd 16(%rbp), %ymm8 2123; AVX2-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3] 2124; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero 2125; AVX2-NEXT: vpslld $31, %xmm9, %xmm9 2126; AVX2-NEXT: vpsrad $31, %xmm9, %xmm9 2127; AVX2-NEXT: vpmovsxdq %xmm9, %ymm9 2128; AVX2-NEXT: vmaskmovpd 32(%rsi), %ymm9, %ymm10 2129; AVX2-NEXT: vblendvpd %ymm9, %ymm10, %ymm2, %ymm9 2130; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 2131; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2132; AVX2-NEXT: vpslld $31, %xmm2, %xmm2 2133; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 2134; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 2135; AVX2-NEXT: vmaskmovpd 64(%rsi), %ymm2, %ymm10 2136; AVX2-NEXT: vblendvpd %ymm2, %ymm10, %ymm3, %ymm11 2137; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] 2138; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2139; AVX2-NEXT: vpslld $31, %xmm2, %xmm2 2140; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 2141; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 2142; AVX2-NEXT: vmaskmovpd 96(%rsi), %ymm2, %ymm10 2143; AVX2-NEXT: vblendvpd %ymm2, %ymm10, %ymm4, %ymm4 2144; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2145; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3] 2146; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2147; AVX2-NEXT: vpslld $31, %xmm3, %xmm3 2148; AVX2-NEXT: vpsrad $31, %xmm3, %xmm3 2149; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3 2150; AVX2-NEXT: vmaskmovpd 160(%rsi), %ymm3, %ymm10 2151; AVX2-NEXT: vblendvpd %ymm3, %ymm10, %ymm6, %ymm3 2152; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1] 2153; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero 2154; AVX2-NEXT: vpslld $31, %xmm6, %xmm6 2155; AVX2-NEXT: vpsrad $31, %xmm6, %xmm6 2156; AVX2-NEXT: vpmovsxdq %xmm6, %ymm6 2157; AVX2-NEXT: vmaskmovpd 192(%rsi), %ymm6, %ymm10 2158; AVX2-NEXT: vblendvpd %ymm6, %ymm10, %ymm7, %ymm6 2159; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm2[3,1,2,3] 2160; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero 2161; AVX2-NEXT: vpslld $31, %xmm7, %xmm7 2162; AVX2-NEXT: vpsrad $31, %xmm7, %xmm7 2163; AVX2-NEXT: vpmovsxdq %xmm7, %ymm7 2164; AVX2-NEXT: vmaskmovpd 224(%rsi), %ymm7, %ymm10 2165; AVX2-NEXT: vblendvpd %ymm7, %ymm10, %ymm8, %ymm7 2166; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2167; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 2168; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 2169; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 2170; AVX2-NEXT: vmaskmovpd (%rsi), %ymm0, %ymm8 2171; AVX2-NEXT: vblendvpd %ymm0, %ymm8, %ymm1, %ymm0 2172; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2173; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 2174; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 2175; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 2176; AVX2-NEXT: vmaskmovpd 128(%rsi), %ymm1, %ymm2 2177; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm5, %ymm1 2178; AVX2-NEXT: vmovapd %ymm1, 128(%rdi) 2179; AVX2-NEXT: vmovapd %ymm0, (%rdi) 2180; AVX2-NEXT: vmovapd %ymm7, 224(%rdi) 2181; AVX2-NEXT: vmovapd %ymm6, 192(%rdi) 2182; AVX2-NEXT: vmovapd %ymm3, 160(%rdi) 2183; AVX2-NEXT: vmovapd %ymm4, 96(%rdi) 2184; AVX2-NEXT: vmovapd %ymm11, 64(%rdi) 2185; AVX2-NEXT: vmovapd %ymm9, 32(%rdi) 2186; AVX2-NEXT: movq %rdi, %rax 2187; AVX2-NEXT: movq %rbp, %rsp 2188; AVX2-NEXT: popq %rbp 2189; AVX2-NEXT: vzeroupper 2190; AVX2-NEXT: retq 2191; 2192; AVX512F-LABEL: test_load_32f64: 2193; AVX512F: ## BB#0: 2194; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm5 2195; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5 2196; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5 2197; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1 2198; AVX512F-NEXT: vmovupd 128(%rdi), %zmm3 {%k1} 2199; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 2200; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 2201; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 2202; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k2} 2203; AVX512F-NEXT: kshiftrw $8, %k1, %k1 2204; AVX512F-NEXT: vmovupd 192(%rdi), %zmm4 {%k1} 2205; AVX512F-NEXT: kshiftrw $8, %k2, %k1 2206; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1} 2207; AVX512F-NEXT: vmovaps %zmm1, %zmm0 2208; AVX512F-NEXT: vmovaps %zmm2, %zmm1 2209; AVX512F-NEXT: vmovaps %zmm3, %zmm2 2210; AVX512F-NEXT: vmovaps %zmm4, %zmm3 2211; AVX512F-NEXT: retq 2212; 2213; SKX-LABEL: test_load_32f64: 2214; SKX: ## BB#0: 2215; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 2216; SKX-NEXT: vpmovb2m %ymm0, %k1 2217; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1} 2218; SKX-NEXT: kshiftrd $16, %k1, %k2 2219; SKX-NEXT: vmovupd 128(%rdi), %zmm3 {%k2} 2220; SKX-NEXT: kshiftrw $8, %k1, %k1 2221; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1} 2222; SKX-NEXT: kshiftrw $8, %k2, %k1 2223; SKX-NEXT: vmovupd 192(%rdi), %zmm4 {%k1} 2224; SKX-NEXT: vmovaps %zmm1, %zmm0 2225; SKX-NEXT: vmovaps %zmm2, %zmm1 2226; SKX-NEXT: vmovaps %zmm3, %zmm2 2227; SKX-NEXT: vmovaps %zmm4, %zmm3 2228; SKX-NEXT: retq 2229 %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0) 2230 ret <32 x double> %res 2231} 2232 2233declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0) 2234 2235define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 2236; AVX-LABEL: test_mask_load_16xi8: 2237; AVX: ## BB#0: 2238; AVX-NEXT: vpextrb $0, %xmm0, %eax 2239; AVX-NEXT: ## implicit-def: %XMM1 2240; AVX-NEXT: testb $1, %al 2241; AVX-NEXT: je LBB50_2 2242; AVX-NEXT: ## BB#1: ## %cond.load 2243; AVX-NEXT: movzbl (%rdi), %eax 2244; AVX-NEXT: vmovd %eax, %xmm1 2245; AVX-NEXT: LBB50_2: ## %else 2246; AVX-NEXT: vpextrb $1, %xmm0, %eax 2247; AVX-NEXT: testb $1, %al 2248; AVX-NEXT: je LBB50_4 2249; AVX-NEXT: ## BB#3: ## %cond.load1 2250; AVX-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm1 2251; AVX-NEXT: LBB50_4: ## %else2 2252; AVX-NEXT: vpextrb $2, %xmm0, %eax 2253; AVX-NEXT: testb $1, %al 2254; AVX-NEXT: je LBB50_6 2255; AVX-NEXT: ## BB#5: ## %cond.load4 2256; AVX-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm1 2257; AVX-NEXT: LBB50_6: ## %else5 2258; AVX-NEXT: vpextrb $3, %xmm0, %eax 2259; AVX-NEXT: testb $1, %al 2260; AVX-NEXT: je LBB50_8 2261; AVX-NEXT: ## BB#7: ## %cond.load7 2262; AVX-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm1 2263; AVX-NEXT: LBB50_8: ## %else8 2264; AVX-NEXT: vpextrb $4, %xmm0, %eax 2265; AVX-NEXT: testb $1, %al 2266; AVX-NEXT: je LBB50_10 2267; AVX-NEXT: ## BB#9: ## %cond.load10 2268; AVX-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm1 2269; AVX-NEXT: LBB50_10: ## %else11 2270; AVX-NEXT: vpextrb $5, %xmm0, %eax 2271; AVX-NEXT: testb $1, %al 2272; AVX-NEXT: je LBB50_12 2273; AVX-NEXT: ## BB#11: ## %cond.load13 2274; AVX-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm1 2275; AVX-NEXT: LBB50_12: ## %else14 2276; AVX-NEXT: vpextrb $6, %xmm0, %eax 2277; AVX-NEXT: testb $1, %al 2278; AVX-NEXT: je LBB50_14 2279; AVX-NEXT: ## BB#13: ## %cond.load16 2280; AVX-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm1 2281; AVX-NEXT: LBB50_14: ## %else17 2282; AVX-NEXT: vpextrb $7, %xmm0, %eax 2283; AVX-NEXT: testb $1, %al 2284; AVX-NEXT: je LBB50_16 2285; AVX-NEXT: ## BB#15: ## %cond.load19 2286; AVX-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm1 2287; AVX-NEXT: LBB50_16: ## %else20 2288; AVX-NEXT: vpextrb $8, %xmm0, %eax 2289; AVX-NEXT: testb $1, %al 2290; AVX-NEXT: je LBB50_18 2291; AVX-NEXT: ## BB#17: ## %cond.load22 2292; AVX-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm1 2293; AVX-NEXT: LBB50_18: ## %else23 2294; AVX-NEXT: vpextrb $9, %xmm0, %eax 2295; AVX-NEXT: testb $1, %al 2296; AVX-NEXT: je LBB50_20 2297; AVX-NEXT: ## BB#19: ## %cond.load25 2298; AVX-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm1 2299; AVX-NEXT: LBB50_20: ## %else26 2300; AVX-NEXT: vpextrb $10, %xmm0, %eax 2301; AVX-NEXT: testb $1, %al 2302; AVX-NEXT: je LBB50_22 2303; AVX-NEXT: ## BB#21: ## %cond.load28 2304; AVX-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm1 2305; AVX-NEXT: LBB50_22: ## %else29 2306; AVX-NEXT: vpextrb $11, %xmm0, %eax 2307; AVX-NEXT: testb $1, %al 2308; AVX-NEXT: je LBB50_24 2309; AVX-NEXT: ## BB#23: ## %cond.load31 2310; AVX-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm1 2311; AVX-NEXT: LBB50_24: ## %else32 2312; AVX-NEXT: vpextrb $12, %xmm0, %eax 2313; AVX-NEXT: testb $1, %al 2314; AVX-NEXT: je LBB50_26 2315; AVX-NEXT: ## BB#25: ## %cond.load34 2316; AVX-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm1 2317; AVX-NEXT: LBB50_26: ## %else35 2318; AVX-NEXT: vpextrb $13, %xmm0, %eax 2319; AVX-NEXT: testb $1, %al 2320; AVX-NEXT: je LBB50_28 2321; AVX-NEXT: ## BB#27: ## %cond.load37 2322; AVX-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm1 2323; AVX-NEXT: LBB50_28: ## %else38 2324; AVX-NEXT: vpextrb $14, %xmm0, %eax 2325; AVX-NEXT: testb $1, %al 2326; AVX-NEXT: je LBB50_30 2327; AVX-NEXT: ## BB#29: ## %cond.load40 2328; AVX-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm1 2329; AVX-NEXT: LBB50_30: ## %else41 2330; AVX-NEXT: vpextrb $15, %xmm0, %eax 2331; AVX-NEXT: testb $1, %al 2332; AVX-NEXT: je LBB50_32 2333; AVX-NEXT: ## BB#31: ## %cond.load43 2334; AVX-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm1 2335; AVX-NEXT: LBB50_32: ## %else44 2336; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 2337; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 2338; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm0, %xmm0 2339; AVX-NEXT: retq 2340; 2341; AVX512F-LABEL: test_mask_load_16xi8: 2342; AVX512F: ## BB#0: 2343; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 2344; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 2345; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 2346; AVX512F-NEXT: kshiftlw $15, %k1, %k0 2347; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2348; AVX512F-NEXT: kmovw %k0, %eax 2349; AVX512F-NEXT: ## implicit-def: %XMM0 2350; AVX512F-NEXT: testb %al, %al 2351; AVX512F-NEXT: je LBB50_2 2352; AVX512F-NEXT: ## BB#1: ## %cond.load 2353; AVX512F-NEXT: movzbl (%rdi), %eax 2354; AVX512F-NEXT: vmovd %eax, %xmm0 2355; AVX512F-NEXT: LBB50_2: ## %else 2356; AVX512F-NEXT: kshiftlw $14, %k1, %k0 2357; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2358; AVX512F-NEXT: kmovw %k0, %eax 2359; AVX512F-NEXT: testb %al, %al 2360; AVX512F-NEXT: je LBB50_4 2361; AVX512F-NEXT: ## BB#3: ## %cond.load1 2362; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm0, %xmm0 2363; AVX512F-NEXT: LBB50_4: ## %else2 2364; AVX512F-NEXT: kshiftlw $13, %k1, %k0 2365; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2366; AVX512F-NEXT: kmovw %k0, %eax 2367; AVX512F-NEXT: testb %al, %al 2368; AVX512F-NEXT: je LBB50_6 2369; AVX512F-NEXT: ## BB#5: ## %cond.load4 2370; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm0, %xmm0 2371; AVX512F-NEXT: LBB50_6: ## %else5 2372; AVX512F-NEXT: kshiftlw $12, %k1, %k0 2373; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2374; AVX512F-NEXT: kmovw %k0, %eax 2375; AVX512F-NEXT: testb %al, %al 2376; AVX512F-NEXT: je LBB50_8 2377; AVX512F-NEXT: ## BB#7: ## %cond.load7 2378; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm0, %xmm0 2379; AVX512F-NEXT: LBB50_8: ## %else8 2380; AVX512F-NEXT: kshiftlw $11, %k1, %k0 2381; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2382; AVX512F-NEXT: kmovw %k0, %eax 2383; AVX512F-NEXT: testb %al, %al 2384; AVX512F-NEXT: je LBB50_10 2385; AVX512F-NEXT: ## BB#9: ## %cond.load10 2386; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm0, %xmm0 2387; AVX512F-NEXT: LBB50_10: ## %else11 2388; AVX512F-NEXT: kshiftlw $10, %k1, %k0 2389; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2390; AVX512F-NEXT: kmovw %k0, %eax 2391; AVX512F-NEXT: testb %al, %al 2392; AVX512F-NEXT: je LBB50_12 2393; AVX512F-NEXT: ## BB#11: ## %cond.load13 2394; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm0, %xmm0 2395; AVX512F-NEXT: LBB50_12: ## %else14 2396; AVX512F-NEXT: kshiftlw $9, %k1, %k0 2397; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2398; AVX512F-NEXT: kmovw %k0, %eax 2399; AVX512F-NEXT: testb %al, %al 2400; AVX512F-NEXT: je LBB50_14 2401; AVX512F-NEXT: ## BB#13: ## %cond.load16 2402; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm0, %xmm0 2403; AVX512F-NEXT: LBB50_14: ## %else17 2404; AVX512F-NEXT: kshiftlw $8, %k1, %k0 2405; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2406; AVX512F-NEXT: kmovw %k0, %eax 2407; AVX512F-NEXT: testb %al, %al 2408; AVX512F-NEXT: je LBB50_16 2409; AVX512F-NEXT: ## BB#15: ## %cond.load19 2410; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm0, %xmm0 2411; AVX512F-NEXT: LBB50_16: ## %else20 2412; AVX512F-NEXT: kshiftlw $7, %k1, %k0 2413; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2414; AVX512F-NEXT: kmovw %k0, %eax 2415; AVX512F-NEXT: testb %al, %al 2416; AVX512F-NEXT: je LBB50_18 2417; AVX512F-NEXT: ## BB#17: ## %cond.load22 2418; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm0, %xmm0 2419; AVX512F-NEXT: LBB50_18: ## %else23 2420; AVX512F-NEXT: kshiftlw $6, %k1, %k0 2421; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2422; AVX512F-NEXT: kmovw %k0, %eax 2423; AVX512F-NEXT: testb %al, %al 2424; AVX512F-NEXT: je LBB50_20 2425; AVX512F-NEXT: ## BB#19: ## %cond.load25 2426; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm0, %xmm0 2427; AVX512F-NEXT: LBB50_20: ## %else26 2428; AVX512F-NEXT: kshiftlw $5, %k1, %k0 2429; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2430; AVX512F-NEXT: kmovw %k0, %eax 2431; AVX512F-NEXT: testb %al, %al 2432; AVX512F-NEXT: je LBB50_22 2433; AVX512F-NEXT: ## BB#21: ## %cond.load28 2434; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm0, %xmm0 2435; AVX512F-NEXT: LBB50_22: ## %else29 2436; AVX512F-NEXT: kshiftlw $4, %k1, %k0 2437; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2438; AVX512F-NEXT: kmovw %k0, %eax 2439; AVX512F-NEXT: testb %al, %al 2440; AVX512F-NEXT: je LBB50_24 2441; AVX512F-NEXT: ## BB#23: ## %cond.load31 2442; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm0, %xmm0 2443; AVX512F-NEXT: LBB50_24: ## %else32 2444; AVX512F-NEXT: kshiftlw $3, %k1, %k0 2445; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2446; AVX512F-NEXT: kmovw %k0, %eax 2447; AVX512F-NEXT: testb %al, %al 2448; AVX512F-NEXT: je LBB50_26 2449; AVX512F-NEXT: ## BB#25: ## %cond.load34 2450; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm0, %xmm0 2451; AVX512F-NEXT: LBB50_26: ## %else35 2452; AVX512F-NEXT: kshiftlw $2, %k1, %k0 2453; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2454; AVX512F-NEXT: kmovw %k0, %eax 2455; AVX512F-NEXT: testb %al, %al 2456; AVX512F-NEXT: je LBB50_28 2457; AVX512F-NEXT: ## BB#27: ## %cond.load37 2458; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm0, %xmm0 2459; AVX512F-NEXT: LBB50_28: ## %else38 2460; AVX512F-NEXT: kshiftlw $1, %k1, %k0 2461; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2462; AVX512F-NEXT: kmovw %k0, %eax 2463; AVX512F-NEXT: testb %al, %al 2464; AVX512F-NEXT: je LBB50_30 2465; AVX512F-NEXT: ## BB#29: ## %cond.load40 2466; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm0, %xmm0 2467; AVX512F-NEXT: LBB50_30: ## %else41 2468; AVX512F-NEXT: kshiftlw $0, %k1, %k0 2469; AVX512F-NEXT: kshiftrw $15, %k0, %k0 2470; AVX512F-NEXT: kmovw %k0, %eax 2471; AVX512F-NEXT: testb %al, %al 2472; AVX512F-NEXT: je LBB50_32 2473; AVX512F-NEXT: ## BB#31: ## %cond.load43 2474; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm0 2475; AVX512F-NEXT: LBB50_32: ## %else44 2476; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 2477; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 2478; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 2479; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm0, %xmm0 2480; AVX512F-NEXT: retq 2481; 2482; SKX-LABEL: test_mask_load_16xi8: 2483; SKX: ## BB#0: 2484; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 2485; SKX-NEXT: vpmovb2m %xmm0, %k1 2486; SKX-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} 2487; SKX-NEXT: retq 2488 %res = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 2489 ret <16 x i8> %res 2490} 2491declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 2492 2493define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 2494; AVX1-LABEL: test_mask_load_32xi8: 2495; AVX1: ## BB#0: 2496; AVX1-NEXT: vpextrb $0, %xmm0, %eax 2497; AVX1-NEXT: ## implicit-def: %YMM1 2498; AVX1-NEXT: testb $1, %al 2499; AVX1-NEXT: je LBB51_2 2500; AVX1-NEXT: ## BB#1: ## %cond.load 2501; AVX1-NEXT: movzbl (%rdi), %eax 2502; AVX1-NEXT: vmovd %eax, %xmm1 2503; AVX1-NEXT: LBB51_2: ## %else 2504; AVX1-NEXT: vpextrb $1, %xmm0, %eax 2505; AVX1-NEXT: testb $1, %al 2506; AVX1-NEXT: je LBB51_4 2507; AVX1-NEXT: ## BB#3: ## %cond.load1 2508; AVX1-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 2509; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2510; AVX1-NEXT: LBB51_4: ## %else2 2511; AVX1-NEXT: vpextrb $2, %xmm0, %eax 2512; AVX1-NEXT: testb $1, %al 2513; AVX1-NEXT: je LBB51_6 2514; AVX1-NEXT: ## BB#5: ## %cond.load4 2515; AVX1-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 2516; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2517; AVX1-NEXT: LBB51_6: ## %else5 2518; AVX1-NEXT: vpextrb $3, %xmm0, %eax 2519; AVX1-NEXT: testb $1, %al 2520; AVX1-NEXT: je LBB51_8 2521; AVX1-NEXT: ## BB#7: ## %cond.load7 2522; AVX1-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 2523; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2524; AVX1-NEXT: LBB51_8: ## %else8 2525; AVX1-NEXT: vpextrb $4, %xmm0, %eax 2526; AVX1-NEXT: testb $1, %al 2527; AVX1-NEXT: je LBB51_10 2528; AVX1-NEXT: ## BB#9: ## %cond.load10 2529; AVX1-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 2530; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2531; AVX1-NEXT: LBB51_10: ## %else11 2532; AVX1-NEXT: vpextrb $5, %xmm0, %eax 2533; AVX1-NEXT: testb $1, %al 2534; AVX1-NEXT: je LBB51_12 2535; AVX1-NEXT: ## BB#11: ## %cond.load13 2536; AVX1-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 2537; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2538; AVX1-NEXT: LBB51_12: ## %else14 2539; AVX1-NEXT: vpextrb $6, %xmm0, %eax 2540; AVX1-NEXT: testb $1, %al 2541; AVX1-NEXT: je LBB51_14 2542; AVX1-NEXT: ## BB#13: ## %cond.load16 2543; AVX1-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 2544; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2545; AVX1-NEXT: LBB51_14: ## %else17 2546; AVX1-NEXT: vpextrb $7, %xmm0, %eax 2547; AVX1-NEXT: testb $1, %al 2548; AVX1-NEXT: je LBB51_16 2549; AVX1-NEXT: ## BB#15: ## %cond.load19 2550; AVX1-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 2551; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2552; AVX1-NEXT: LBB51_16: ## %else20 2553; AVX1-NEXT: vpextrb $8, %xmm0, %eax 2554; AVX1-NEXT: testb $1, %al 2555; AVX1-NEXT: je LBB51_18 2556; AVX1-NEXT: ## BB#17: ## %cond.load22 2557; AVX1-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 2558; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2559; AVX1-NEXT: LBB51_18: ## %else23 2560; AVX1-NEXT: vpextrb $9, %xmm0, %eax 2561; AVX1-NEXT: testb $1, %al 2562; AVX1-NEXT: je LBB51_20 2563; AVX1-NEXT: ## BB#19: ## %cond.load25 2564; AVX1-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 2565; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2566; AVX1-NEXT: LBB51_20: ## %else26 2567; AVX1-NEXT: vpextrb $10, %xmm0, %eax 2568; AVX1-NEXT: testb $1, %al 2569; AVX1-NEXT: je LBB51_22 2570; AVX1-NEXT: ## BB#21: ## %cond.load28 2571; AVX1-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 2572; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2573; AVX1-NEXT: LBB51_22: ## %else29 2574; AVX1-NEXT: vpextrb $11, %xmm0, %eax 2575; AVX1-NEXT: testb $1, %al 2576; AVX1-NEXT: je LBB51_24 2577; AVX1-NEXT: ## BB#23: ## %cond.load31 2578; AVX1-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 2579; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2580; AVX1-NEXT: LBB51_24: ## %else32 2581; AVX1-NEXT: vpextrb $12, %xmm0, %eax 2582; AVX1-NEXT: testb $1, %al 2583; AVX1-NEXT: je LBB51_26 2584; AVX1-NEXT: ## BB#25: ## %cond.load34 2585; AVX1-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 2586; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2587; AVX1-NEXT: LBB51_26: ## %else35 2588; AVX1-NEXT: vpextrb $13, %xmm0, %eax 2589; AVX1-NEXT: testb $1, %al 2590; AVX1-NEXT: je LBB51_28 2591; AVX1-NEXT: ## BB#27: ## %cond.load37 2592; AVX1-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 2593; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2594; AVX1-NEXT: LBB51_28: ## %else38 2595; AVX1-NEXT: vpextrb $14, %xmm0, %eax 2596; AVX1-NEXT: testb $1, %al 2597; AVX1-NEXT: je LBB51_30 2598; AVX1-NEXT: ## BB#29: ## %cond.load40 2599; AVX1-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 2600; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2601; AVX1-NEXT: LBB51_30: ## %else41 2602; AVX1-NEXT: vpextrb $15, %xmm0, %eax 2603; AVX1-NEXT: testb $1, %al 2604; AVX1-NEXT: je LBB51_32 2605; AVX1-NEXT: ## BB#31: ## %cond.load43 2606; AVX1-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 2607; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2608; AVX1-NEXT: LBB51_32: ## %else44 2609; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2610; AVX1-NEXT: vpextrb $0, %xmm2, %eax 2611; AVX1-NEXT: testb $1, %al 2612; AVX1-NEXT: je LBB51_34 2613; AVX1-NEXT: ## BB#33: ## %cond.load46 2614; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2615; AVX1-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 2616; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2617; AVX1-NEXT: LBB51_34: ## %else47 2618; AVX1-NEXT: vpextrb $1, %xmm2, %eax 2619; AVX1-NEXT: testb $1, %al 2620; AVX1-NEXT: je LBB51_36 2621; AVX1-NEXT: ## BB#35: ## %cond.load49 2622; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2623; AVX1-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 2624; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2625; AVX1-NEXT: LBB51_36: ## %else50 2626; AVX1-NEXT: vpextrb $2, %xmm2, %eax 2627; AVX1-NEXT: testb $1, %al 2628; AVX1-NEXT: je LBB51_38 2629; AVX1-NEXT: ## BB#37: ## %cond.load52 2630; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2631; AVX1-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 2632; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2633; AVX1-NEXT: LBB51_38: ## %else53 2634; AVX1-NEXT: vpextrb $3, %xmm2, %eax 2635; AVX1-NEXT: testb $1, %al 2636; AVX1-NEXT: je LBB51_40 2637; AVX1-NEXT: ## BB#39: ## %cond.load55 2638; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2639; AVX1-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 2640; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2641; AVX1-NEXT: LBB51_40: ## %else56 2642; AVX1-NEXT: vpextrb $4, %xmm2, %eax 2643; AVX1-NEXT: testb $1, %al 2644; AVX1-NEXT: je LBB51_42 2645; AVX1-NEXT: ## BB#41: ## %cond.load58 2646; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2647; AVX1-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 2648; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2649; AVX1-NEXT: LBB51_42: ## %else59 2650; AVX1-NEXT: vpextrb $5, %xmm2, %eax 2651; AVX1-NEXT: testb $1, %al 2652; AVX1-NEXT: je LBB51_44 2653; AVX1-NEXT: ## BB#43: ## %cond.load61 2654; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2655; AVX1-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 2656; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2657; AVX1-NEXT: LBB51_44: ## %else62 2658; AVX1-NEXT: vpextrb $6, %xmm2, %eax 2659; AVX1-NEXT: testb $1, %al 2660; AVX1-NEXT: je LBB51_46 2661; AVX1-NEXT: ## BB#45: ## %cond.load64 2662; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2663; AVX1-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 2664; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2665; AVX1-NEXT: LBB51_46: ## %else65 2666; AVX1-NEXT: vpextrb $7, %xmm2, %eax 2667; AVX1-NEXT: testb $1, %al 2668; AVX1-NEXT: je LBB51_48 2669; AVX1-NEXT: ## BB#47: ## %cond.load67 2670; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2671; AVX1-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 2672; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2673; AVX1-NEXT: LBB51_48: ## %else68 2674; AVX1-NEXT: vpextrb $8, %xmm2, %eax 2675; AVX1-NEXT: testb $1, %al 2676; AVX1-NEXT: je LBB51_50 2677; AVX1-NEXT: ## BB#49: ## %cond.load70 2678; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2679; AVX1-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 2680; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2681; AVX1-NEXT: LBB51_50: ## %else71 2682; AVX1-NEXT: vpextrb $9, %xmm2, %eax 2683; AVX1-NEXT: testb $1, %al 2684; AVX1-NEXT: je LBB51_52 2685; AVX1-NEXT: ## BB#51: ## %cond.load73 2686; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2687; AVX1-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 2688; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2689; AVX1-NEXT: LBB51_52: ## %else74 2690; AVX1-NEXT: vpextrb $10, %xmm2, %eax 2691; AVX1-NEXT: testb $1, %al 2692; AVX1-NEXT: je LBB51_54 2693; AVX1-NEXT: ## BB#53: ## %cond.load76 2694; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2695; AVX1-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 2696; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2697; AVX1-NEXT: LBB51_54: ## %else77 2698; AVX1-NEXT: vpextrb $11, %xmm2, %eax 2699; AVX1-NEXT: testb $1, %al 2700; AVX1-NEXT: je LBB51_56 2701; AVX1-NEXT: ## BB#55: ## %cond.load79 2702; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2703; AVX1-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 2704; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2705; AVX1-NEXT: LBB51_56: ## %else80 2706; AVX1-NEXT: vpextrb $12, %xmm2, %eax 2707; AVX1-NEXT: testb $1, %al 2708; AVX1-NEXT: je LBB51_58 2709; AVX1-NEXT: ## BB#57: ## %cond.load82 2710; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2711; AVX1-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 2712; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2713; AVX1-NEXT: LBB51_58: ## %else83 2714; AVX1-NEXT: vpextrb $13, %xmm2, %eax 2715; AVX1-NEXT: testb $1, %al 2716; AVX1-NEXT: je LBB51_60 2717; AVX1-NEXT: ## BB#59: ## %cond.load85 2718; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2719; AVX1-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 2720; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2721; AVX1-NEXT: LBB51_60: ## %else86 2722; AVX1-NEXT: vpextrb $14, %xmm2, %eax 2723; AVX1-NEXT: testb $1, %al 2724; AVX1-NEXT: je LBB51_62 2725; AVX1-NEXT: ## BB#61: ## %cond.load88 2726; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2727; AVX1-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 2728; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2729; AVX1-NEXT: LBB51_62: ## %else89 2730; AVX1-NEXT: vpextrb $15, %xmm2, %eax 2731; AVX1-NEXT: testb $1, %al 2732; AVX1-NEXT: je LBB51_64 2733; AVX1-NEXT: ## BB#63: ## %cond.load91 2734; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2735; AVX1-NEXT: vpinsrb $15, 31(%rdi), %xmm3, %xmm3 2736; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2737; AVX1-NEXT: LBB51_64: ## %else92 2738; AVX1-NEXT: vpsllw $7, %xmm2, %xmm2 2739; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 2740; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 2741; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 2742; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 2743; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 2744; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 2745; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 2746; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2747; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 2748; AVX1-NEXT: retq 2749; 2750; AVX2-LABEL: test_mask_load_32xi8: 2751; AVX2: ## BB#0: 2752; AVX2-NEXT: vpextrb $0, %xmm0, %eax 2753; AVX2-NEXT: ## implicit-def: %YMM1 2754; AVX2-NEXT: testb $1, %al 2755; AVX2-NEXT: je LBB51_2 2756; AVX2-NEXT: ## BB#1: ## %cond.load 2757; AVX2-NEXT: movzbl (%rdi), %eax 2758; AVX2-NEXT: vmovd %eax, %xmm1 2759; AVX2-NEXT: LBB51_2: ## %else 2760; AVX2-NEXT: vpextrb $1, %xmm0, %eax 2761; AVX2-NEXT: testb $1, %al 2762; AVX2-NEXT: je LBB51_4 2763; AVX2-NEXT: ## BB#3: ## %cond.load1 2764; AVX2-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 2765; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2766; AVX2-NEXT: LBB51_4: ## %else2 2767; AVX2-NEXT: vpextrb $2, %xmm0, %eax 2768; AVX2-NEXT: testb $1, %al 2769; AVX2-NEXT: je LBB51_6 2770; AVX2-NEXT: ## BB#5: ## %cond.load4 2771; AVX2-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 2772; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2773; AVX2-NEXT: LBB51_6: ## %else5 2774; AVX2-NEXT: vpextrb $3, %xmm0, %eax 2775; AVX2-NEXT: testb $1, %al 2776; AVX2-NEXT: je LBB51_8 2777; AVX2-NEXT: ## BB#7: ## %cond.load7 2778; AVX2-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 2779; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2780; AVX2-NEXT: LBB51_8: ## %else8 2781; AVX2-NEXT: vpextrb $4, %xmm0, %eax 2782; AVX2-NEXT: testb $1, %al 2783; AVX2-NEXT: je LBB51_10 2784; AVX2-NEXT: ## BB#9: ## %cond.load10 2785; AVX2-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 2786; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2787; AVX2-NEXT: LBB51_10: ## %else11 2788; AVX2-NEXT: vpextrb $5, %xmm0, %eax 2789; AVX2-NEXT: testb $1, %al 2790; AVX2-NEXT: je LBB51_12 2791; AVX2-NEXT: ## BB#11: ## %cond.load13 2792; AVX2-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 2793; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2794; AVX2-NEXT: LBB51_12: ## %else14 2795; AVX2-NEXT: vpextrb $6, %xmm0, %eax 2796; AVX2-NEXT: testb $1, %al 2797; AVX2-NEXT: je LBB51_14 2798; AVX2-NEXT: ## BB#13: ## %cond.load16 2799; AVX2-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 2800; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2801; AVX2-NEXT: LBB51_14: ## %else17 2802; AVX2-NEXT: vpextrb $7, %xmm0, %eax 2803; AVX2-NEXT: testb $1, %al 2804; AVX2-NEXT: je LBB51_16 2805; AVX2-NEXT: ## BB#15: ## %cond.load19 2806; AVX2-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 2807; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2808; AVX2-NEXT: LBB51_16: ## %else20 2809; AVX2-NEXT: vpextrb $8, %xmm0, %eax 2810; AVX2-NEXT: testb $1, %al 2811; AVX2-NEXT: je LBB51_18 2812; AVX2-NEXT: ## BB#17: ## %cond.load22 2813; AVX2-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 2814; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2815; AVX2-NEXT: LBB51_18: ## %else23 2816; AVX2-NEXT: vpextrb $9, %xmm0, %eax 2817; AVX2-NEXT: testb $1, %al 2818; AVX2-NEXT: je LBB51_20 2819; AVX2-NEXT: ## BB#19: ## %cond.load25 2820; AVX2-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 2821; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2822; AVX2-NEXT: LBB51_20: ## %else26 2823; AVX2-NEXT: vpextrb $10, %xmm0, %eax 2824; AVX2-NEXT: testb $1, %al 2825; AVX2-NEXT: je LBB51_22 2826; AVX2-NEXT: ## BB#21: ## %cond.load28 2827; AVX2-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 2828; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2829; AVX2-NEXT: LBB51_22: ## %else29 2830; AVX2-NEXT: vpextrb $11, %xmm0, %eax 2831; AVX2-NEXT: testb $1, %al 2832; AVX2-NEXT: je LBB51_24 2833; AVX2-NEXT: ## BB#23: ## %cond.load31 2834; AVX2-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 2835; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2836; AVX2-NEXT: LBB51_24: ## %else32 2837; AVX2-NEXT: vpextrb $12, %xmm0, %eax 2838; AVX2-NEXT: testb $1, %al 2839; AVX2-NEXT: je LBB51_26 2840; AVX2-NEXT: ## BB#25: ## %cond.load34 2841; AVX2-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 2842; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2843; AVX2-NEXT: LBB51_26: ## %else35 2844; AVX2-NEXT: vpextrb $13, %xmm0, %eax 2845; AVX2-NEXT: testb $1, %al 2846; AVX2-NEXT: je LBB51_28 2847; AVX2-NEXT: ## BB#27: ## %cond.load37 2848; AVX2-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 2849; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2850; AVX2-NEXT: LBB51_28: ## %else38 2851; AVX2-NEXT: vpextrb $14, %xmm0, %eax 2852; AVX2-NEXT: testb $1, %al 2853; AVX2-NEXT: je LBB51_30 2854; AVX2-NEXT: ## BB#29: ## %cond.load40 2855; AVX2-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 2856; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2857; AVX2-NEXT: LBB51_30: ## %else41 2858; AVX2-NEXT: vpextrb $15, %xmm0, %eax 2859; AVX2-NEXT: testb $1, %al 2860; AVX2-NEXT: je LBB51_32 2861; AVX2-NEXT: ## BB#31: ## %cond.load43 2862; AVX2-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 2863; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 2864; AVX2-NEXT: LBB51_32: ## %else44 2865; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2866; AVX2-NEXT: vpextrb $0, %xmm2, %eax 2867; AVX2-NEXT: testb $1, %al 2868; AVX2-NEXT: je LBB51_34 2869; AVX2-NEXT: ## BB#33: ## %cond.load46 2870; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2871; AVX2-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 2872; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2873; AVX2-NEXT: LBB51_34: ## %else47 2874; AVX2-NEXT: vpextrb $1, %xmm2, %eax 2875; AVX2-NEXT: testb $1, %al 2876; AVX2-NEXT: je LBB51_36 2877; AVX2-NEXT: ## BB#35: ## %cond.load49 2878; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2879; AVX2-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 2880; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2881; AVX2-NEXT: LBB51_36: ## %else50 2882; AVX2-NEXT: vpextrb $2, %xmm2, %eax 2883; AVX2-NEXT: testb $1, %al 2884; AVX2-NEXT: je LBB51_38 2885; AVX2-NEXT: ## BB#37: ## %cond.load52 2886; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2887; AVX2-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 2888; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2889; AVX2-NEXT: LBB51_38: ## %else53 2890; AVX2-NEXT: vpextrb $3, %xmm2, %eax 2891; AVX2-NEXT: testb $1, %al 2892; AVX2-NEXT: je LBB51_40 2893; AVX2-NEXT: ## BB#39: ## %cond.load55 2894; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2895; AVX2-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 2896; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2897; AVX2-NEXT: LBB51_40: ## %else56 2898; AVX2-NEXT: vpextrb $4, %xmm2, %eax 2899; AVX2-NEXT: testb $1, %al 2900; AVX2-NEXT: je LBB51_42 2901; AVX2-NEXT: ## BB#41: ## %cond.load58 2902; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2903; AVX2-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 2904; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2905; AVX2-NEXT: LBB51_42: ## %else59 2906; AVX2-NEXT: vpextrb $5, %xmm2, %eax 2907; AVX2-NEXT: testb $1, %al 2908; AVX2-NEXT: je LBB51_44 2909; AVX2-NEXT: ## BB#43: ## %cond.load61 2910; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2911; AVX2-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 2912; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2913; AVX2-NEXT: LBB51_44: ## %else62 2914; AVX2-NEXT: vpextrb $6, %xmm2, %eax 2915; AVX2-NEXT: testb $1, %al 2916; AVX2-NEXT: je LBB51_46 2917; AVX2-NEXT: ## BB#45: ## %cond.load64 2918; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2919; AVX2-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 2920; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2921; AVX2-NEXT: LBB51_46: ## %else65 2922; AVX2-NEXT: vpextrb $7, %xmm2, %eax 2923; AVX2-NEXT: testb $1, %al 2924; AVX2-NEXT: je LBB51_48 2925; AVX2-NEXT: ## BB#47: ## %cond.load67 2926; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2927; AVX2-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 2928; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2929; AVX2-NEXT: LBB51_48: ## %else68 2930; AVX2-NEXT: vpextrb $8, %xmm2, %eax 2931; AVX2-NEXT: testb $1, %al 2932; AVX2-NEXT: je LBB51_50 2933; AVX2-NEXT: ## BB#49: ## %cond.load70 2934; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2935; AVX2-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 2936; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2937; AVX2-NEXT: LBB51_50: ## %else71 2938; AVX2-NEXT: vpextrb $9, %xmm2, %eax 2939; AVX2-NEXT: testb $1, %al 2940; AVX2-NEXT: je LBB51_52 2941; AVX2-NEXT: ## BB#51: ## %cond.load73 2942; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2943; AVX2-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 2944; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2945; AVX2-NEXT: LBB51_52: ## %else74 2946; AVX2-NEXT: vpextrb $10, %xmm2, %eax 2947; AVX2-NEXT: testb $1, %al 2948; AVX2-NEXT: je LBB51_54 2949; AVX2-NEXT: ## BB#53: ## %cond.load76 2950; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2951; AVX2-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 2952; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2953; AVX2-NEXT: LBB51_54: ## %else77 2954; AVX2-NEXT: vpextrb $11, %xmm2, %eax 2955; AVX2-NEXT: testb $1, %al 2956; AVX2-NEXT: je LBB51_56 2957; AVX2-NEXT: ## BB#55: ## %cond.load79 2958; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2959; AVX2-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 2960; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2961; AVX2-NEXT: LBB51_56: ## %else80 2962; AVX2-NEXT: vpextrb $12, %xmm2, %eax 2963; AVX2-NEXT: testb $1, %al 2964; AVX2-NEXT: je LBB51_58 2965; AVX2-NEXT: ## BB#57: ## %cond.load82 2966; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2967; AVX2-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 2968; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2969; AVX2-NEXT: LBB51_58: ## %else83 2970; AVX2-NEXT: vpextrb $13, %xmm2, %eax 2971; AVX2-NEXT: testb $1, %al 2972; AVX2-NEXT: je LBB51_60 2973; AVX2-NEXT: ## BB#59: ## %cond.load85 2974; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2975; AVX2-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 2976; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2977; AVX2-NEXT: LBB51_60: ## %else86 2978; AVX2-NEXT: vpextrb $14, %xmm2, %eax 2979; AVX2-NEXT: testb $1, %al 2980; AVX2-NEXT: je LBB51_62 2981; AVX2-NEXT: ## BB#61: ## %cond.load88 2982; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 2983; AVX2-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 2984; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2985; AVX2-NEXT: LBB51_62: ## %else89 2986; AVX2-NEXT: vpextrb $15, %xmm2, %eax 2987; AVX2-NEXT: testb $1, %al 2988; AVX2-NEXT: je LBB51_64 2989; AVX2-NEXT: ## BB#63: ## %cond.load91 2990; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2991; AVX2-NEXT: vpinsrb $15, 31(%rdi), %xmm2, %xmm2 2992; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2993; AVX2-NEXT: LBB51_64: ## %else92 2994; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 2995; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 2996; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 2997; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 2998; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2999; AVX2-NEXT: retq 3000; 3001; AVX512F-LABEL: test_mask_load_32xi8: 3002; AVX512F: ## BB#0: 3003; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 3004; AVX512F-NEXT: ## implicit-def: %YMM1 3005; AVX512F-NEXT: testb $1, %al 3006; AVX512F-NEXT: je LBB51_2 3007; AVX512F-NEXT: ## BB#1: ## %cond.load 3008; AVX512F-NEXT: movzbl (%rdi), %eax 3009; AVX512F-NEXT: vmovd %eax, %xmm1 3010; AVX512F-NEXT: LBB51_2: ## %else 3011; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 3012; AVX512F-NEXT: testb $1, %al 3013; AVX512F-NEXT: je LBB51_4 3014; AVX512F-NEXT: ## BB#3: ## %cond.load1 3015; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 3016; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3017; AVX512F-NEXT: LBB51_4: ## %else2 3018; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 3019; AVX512F-NEXT: testb $1, %al 3020; AVX512F-NEXT: je LBB51_6 3021; AVX512F-NEXT: ## BB#5: ## %cond.load4 3022; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 3023; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3024; AVX512F-NEXT: LBB51_6: ## %else5 3025; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 3026; AVX512F-NEXT: testb $1, %al 3027; AVX512F-NEXT: je LBB51_8 3028; AVX512F-NEXT: ## BB#7: ## %cond.load7 3029; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 3030; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3031; AVX512F-NEXT: LBB51_8: ## %else8 3032; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 3033; AVX512F-NEXT: testb $1, %al 3034; AVX512F-NEXT: je LBB51_10 3035; AVX512F-NEXT: ## BB#9: ## %cond.load10 3036; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 3037; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3038; AVX512F-NEXT: LBB51_10: ## %else11 3039; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 3040; AVX512F-NEXT: testb $1, %al 3041; AVX512F-NEXT: je LBB51_12 3042; AVX512F-NEXT: ## BB#11: ## %cond.load13 3043; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 3044; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3045; AVX512F-NEXT: LBB51_12: ## %else14 3046; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 3047; AVX512F-NEXT: testb $1, %al 3048; AVX512F-NEXT: je LBB51_14 3049; AVX512F-NEXT: ## BB#13: ## %cond.load16 3050; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 3051; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3052; AVX512F-NEXT: LBB51_14: ## %else17 3053; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 3054; AVX512F-NEXT: testb $1, %al 3055; AVX512F-NEXT: je LBB51_16 3056; AVX512F-NEXT: ## BB#15: ## %cond.load19 3057; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 3058; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3059; AVX512F-NEXT: LBB51_16: ## %else20 3060; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 3061; AVX512F-NEXT: testb $1, %al 3062; AVX512F-NEXT: je LBB51_18 3063; AVX512F-NEXT: ## BB#17: ## %cond.load22 3064; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 3065; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3066; AVX512F-NEXT: LBB51_18: ## %else23 3067; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 3068; AVX512F-NEXT: testb $1, %al 3069; AVX512F-NEXT: je LBB51_20 3070; AVX512F-NEXT: ## BB#19: ## %cond.load25 3071; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 3072; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3073; AVX512F-NEXT: LBB51_20: ## %else26 3074; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 3075; AVX512F-NEXT: testb $1, %al 3076; AVX512F-NEXT: je LBB51_22 3077; AVX512F-NEXT: ## BB#21: ## %cond.load28 3078; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 3079; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3080; AVX512F-NEXT: LBB51_22: ## %else29 3081; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 3082; AVX512F-NEXT: testb $1, %al 3083; AVX512F-NEXT: je LBB51_24 3084; AVX512F-NEXT: ## BB#23: ## %cond.load31 3085; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 3086; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3087; AVX512F-NEXT: LBB51_24: ## %else32 3088; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 3089; AVX512F-NEXT: testb $1, %al 3090; AVX512F-NEXT: je LBB51_26 3091; AVX512F-NEXT: ## BB#25: ## %cond.load34 3092; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 3093; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3094; AVX512F-NEXT: LBB51_26: ## %else35 3095; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 3096; AVX512F-NEXT: testb $1, %al 3097; AVX512F-NEXT: je LBB51_28 3098; AVX512F-NEXT: ## BB#27: ## %cond.load37 3099; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 3100; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3101; AVX512F-NEXT: LBB51_28: ## %else38 3102; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 3103; AVX512F-NEXT: testb $1, %al 3104; AVX512F-NEXT: je LBB51_30 3105; AVX512F-NEXT: ## BB#29: ## %cond.load40 3106; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 3107; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3108; AVX512F-NEXT: LBB51_30: ## %else41 3109; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 3110; AVX512F-NEXT: testb $1, %al 3111; AVX512F-NEXT: je LBB51_32 3112; AVX512F-NEXT: ## BB#31: ## %cond.load43 3113; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 3114; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 3115; AVX512F-NEXT: LBB51_32: ## %else44 3116; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 3117; AVX512F-NEXT: vpextrb $0, %xmm2, %eax 3118; AVX512F-NEXT: testb $1, %al 3119; AVX512F-NEXT: je LBB51_34 3120; AVX512F-NEXT: ## BB#33: ## %cond.load46 3121; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3122; AVX512F-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 3123; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3124; AVX512F-NEXT: LBB51_34: ## %else47 3125; AVX512F-NEXT: vpextrb $1, %xmm2, %eax 3126; AVX512F-NEXT: testb $1, %al 3127; AVX512F-NEXT: je LBB51_36 3128; AVX512F-NEXT: ## BB#35: ## %cond.load49 3129; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3130; AVX512F-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 3131; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3132; AVX512F-NEXT: LBB51_36: ## %else50 3133; AVX512F-NEXT: vpextrb $2, %xmm2, %eax 3134; AVX512F-NEXT: testb $1, %al 3135; AVX512F-NEXT: je LBB51_38 3136; AVX512F-NEXT: ## BB#37: ## %cond.load52 3137; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3138; AVX512F-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 3139; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3140; AVX512F-NEXT: LBB51_38: ## %else53 3141; AVX512F-NEXT: vpextrb $3, %xmm2, %eax 3142; AVX512F-NEXT: testb $1, %al 3143; AVX512F-NEXT: je LBB51_40 3144; AVX512F-NEXT: ## BB#39: ## %cond.load55 3145; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3146; AVX512F-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 3147; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3148; AVX512F-NEXT: LBB51_40: ## %else56 3149; AVX512F-NEXT: vpextrb $4, %xmm2, %eax 3150; AVX512F-NEXT: testb $1, %al 3151; AVX512F-NEXT: je LBB51_42 3152; AVX512F-NEXT: ## BB#41: ## %cond.load58 3153; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3154; AVX512F-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 3155; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3156; AVX512F-NEXT: LBB51_42: ## %else59 3157; AVX512F-NEXT: vpextrb $5, %xmm2, %eax 3158; AVX512F-NEXT: testb $1, %al 3159; AVX512F-NEXT: je LBB51_44 3160; AVX512F-NEXT: ## BB#43: ## %cond.load61 3161; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3162; AVX512F-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 3163; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3164; AVX512F-NEXT: LBB51_44: ## %else62 3165; AVX512F-NEXT: vpextrb $6, %xmm2, %eax 3166; AVX512F-NEXT: testb $1, %al 3167; AVX512F-NEXT: je LBB51_46 3168; AVX512F-NEXT: ## BB#45: ## %cond.load64 3169; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3170; AVX512F-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 3171; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3172; AVX512F-NEXT: LBB51_46: ## %else65 3173; AVX512F-NEXT: vpextrb $7, %xmm2, %eax 3174; AVX512F-NEXT: testb $1, %al 3175; AVX512F-NEXT: je LBB51_48 3176; AVX512F-NEXT: ## BB#47: ## %cond.load67 3177; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3178; AVX512F-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 3179; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3180; AVX512F-NEXT: LBB51_48: ## %else68 3181; AVX512F-NEXT: vpextrb $8, %xmm2, %eax 3182; AVX512F-NEXT: testb $1, %al 3183; AVX512F-NEXT: je LBB51_50 3184; AVX512F-NEXT: ## BB#49: ## %cond.load70 3185; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3186; AVX512F-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 3187; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3188; AVX512F-NEXT: LBB51_50: ## %else71 3189; AVX512F-NEXT: vpextrb $9, %xmm2, %eax 3190; AVX512F-NEXT: testb $1, %al 3191; AVX512F-NEXT: je LBB51_52 3192; AVX512F-NEXT: ## BB#51: ## %cond.load73 3193; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3194; AVX512F-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 3195; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3196; AVX512F-NEXT: LBB51_52: ## %else74 3197; AVX512F-NEXT: vpextrb $10, %xmm2, %eax 3198; AVX512F-NEXT: testb $1, %al 3199; AVX512F-NEXT: je LBB51_54 3200; AVX512F-NEXT: ## BB#53: ## %cond.load76 3201; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3202; AVX512F-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 3203; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3204; AVX512F-NEXT: LBB51_54: ## %else77 3205; AVX512F-NEXT: vpextrb $11, %xmm2, %eax 3206; AVX512F-NEXT: testb $1, %al 3207; AVX512F-NEXT: je LBB51_56 3208; AVX512F-NEXT: ## BB#55: ## %cond.load79 3209; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3210; AVX512F-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 3211; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3212; AVX512F-NEXT: LBB51_56: ## %else80 3213; AVX512F-NEXT: vpextrb $12, %xmm2, %eax 3214; AVX512F-NEXT: testb $1, %al 3215; AVX512F-NEXT: je LBB51_58 3216; AVX512F-NEXT: ## BB#57: ## %cond.load82 3217; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3218; AVX512F-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 3219; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3220; AVX512F-NEXT: LBB51_58: ## %else83 3221; AVX512F-NEXT: vpextrb $13, %xmm2, %eax 3222; AVX512F-NEXT: testb $1, %al 3223; AVX512F-NEXT: je LBB51_60 3224; AVX512F-NEXT: ## BB#59: ## %cond.load85 3225; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3226; AVX512F-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 3227; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3228; AVX512F-NEXT: LBB51_60: ## %else86 3229; AVX512F-NEXT: vpextrb $14, %xmm2, %eax 3230; AVX512F-NEXT: testb $1, %al 3231; AVX512F-NEXT: je LBB51_62 3232; AVX512F-NEXT: ## BB#61: ## %cond.load88 3233; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 3234; AVX512F-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 3235; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 3236; AVX512F-NEXT: LBB51_62: ## %else89 3237; AVX512F-NEXT: vpextrb $15, %xmm2, %eax 3238; AVX512F-NEXT: testb $1, %al 3239; AVX512F-NEXT: je LBB51_64 3240; AVX512F-NEXT: ## BB#63: ## %cond.load91 3241; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 3242; AVX512F-NEXT: vpinsrb $15, 31(%rdi), %xmm2, %xmm2 3243; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 3244; AVX512F-NEXT: LBB51_64: ## %else92 3245; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 3246; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 3247; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 3248; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 3249; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 3250; AVX512F-NEXT: retq 3251; 3252; SKX-LABEL: test_mask_load_32xi8: 3253; SKX: ## BB#0: 3254; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 3255; SKX-NEXT: vpmovb2m %ymm0, %k1 3256; SKX-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} 3257; SKX-NEXT: retq 3258 %res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 3259 ret <32 x i8> %res 3260} 3261declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 3262 3263define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { 3264; AVX1-LABEL: test_mask_load_64xi8: 3265; AVX1: ## BB#0: 3266; AVX1-NEXT: pushq %rbp 3267; AVX1-NEXT: Ltmp3: 3268; AVX1-NEXT: .cfi_def_cfa_offset 16 3269; AVX1-NEXT: pushq %r15 3270; AVX1-NEXT: Ltmp4: 3271; AVX1-NEXT: .cfi_def_cfa_offset 24 3272; AVX1-NEXT: pushq %r14 3273; AVX1-NEXT: Ltmp5: 3274; AVX1-NEXT: .cfi_def_cfa_offset 32 3275; AVX1-NEXT: pushq %r13 3276; AVX1-NEXT: Ltmp6: 3277; AVX1-NEXT: .cfi_def_cfa_offset 40 3278; AVX1-NEXT: pushq %r12 3279; AVX1-NEXT: Ltmp7: 3280; AVX1-NEXT: .cfi_def_cfa_offset 48 3281; AVX1-NEXT: pushq %rbx 3282; AVX1-NEXT: Ltmp8: 3283; AVX1-NEXT: .cfi_def_cfa_offset 56 3284; AVX1-NEXT: pushq %rax 3285; AVX1-NEXT: Ltmp9: 3286; AVX1-NEXT: .cfi_def_cfa_offset 64 3287; AVX1-NEXT: Ltmp10: 3288; AVX1-NEXT: .cfi_offset %rbx, -56 3289; AVX1-NEXT: Ltmp11: 3290; AVX1-NEXT: .cfi_offset %r12, -48 3291; AVX1-NEXT: Ltmp12: 3292; AVX1-NEXT: .cfi_offset %r13, -40 3293; AVX1-NEXT: Ltmp13: 3294; AVX1-NEXT: .cfi_offset %r14, -32 3295; AVX1-NEXT: Ltmp14: 3296; AVX1-NEXT: .cfi_offset %r15, -24 3297; AVX1-NEXT: Ltmp15: 3298; AVX1-NEXT: .cfi_offset %rbp, -16 3299; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax 3300; AVX1-NEXT: movl %edi, %r13d 3301; AVX1-NEXT: testb $1, %dil 3302; AVX1-NEXT: je LBB52_2 3303; AVX1-NEXT: ## BB#1: ## %cond.load 3304; AVX1-NEXT: movzbl (%rax), %ebp 3305; AVX1-NEXT: vmovd %ebp, %xmm9 3306; AVX1-NEXT: LBB52_2: ## %else 3307; AVX1-NEXT: testb $1, %sil 3308; AVX1-NEXT: je LBB52_4 3309; AVX1-NEXT: ## BB#3: ## %cond.load1 3310; AVX1-NEXT: vpinsrb $1, 1(%rax), %xmm9, %xmm3 3311; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3312; AVX1-NEXT: LBB52_4: ## %else2 3313; AVX1-NEXT: testb $1, %dl 3314; AVX1-NEXT: je LBB52_6 3315; AVX1-NEXT: ## BB#5: ## %cond.load4 3316; AVX1-NEXT: vpinsrb $2, 2(%rax), %xmm9, %xmm3 3317; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3318; AVX1-NEXT: LBB52_6: ## %else5 3319; AVX1-NEXT: testb $1, %cl 3320; AVX1-NEXT: je LBB52_8 3321; AVX1-NEXT: ## BB#7: ## %cond.load7 3322; AVX1-NEXT: vpinsrb $3, 3(%rax), %xmm9, %xmm3 3323; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3324; AVX1-NEXT: LBB52_8: ## %else8 3325; AVX1-NEXT: testb $1, %r8b 3326; AVX1-NEXT: je LBB52_10 3327; AVX1-NEXT: ## BB#9: ## %cond.load10 3328; AVX1-NEXT: vpinsrb $4, 4(%rax), %xmm9, %xmm3 3329; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3330; AVX1-NEXT: LBB52_10: ## %else11 3331; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3332; AVX1-NEXT: testb $1, %r9b 3333; AVX1-NEXT: je LBB52_12 3334; AVX1-NEXT: ## BB#11: ## %cond.load13 3335; AVX1-NEXT: vpinsrb $5, 5(%rax), %xmm9, %xmm3 3336; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3337; AVX1-NEXT: LBB52_12: ## %else14 3338; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3339; AVX1-NEXT: testb $1, %r10b 3340; AVX1-NEXT: je LBB52_14 3341; AVX1-NEXT: ## BB#13: ## %cond.load16 3342; AVX1-NEXT: vpinsrb $6, 6(%rax), %xmm9, %xmm3 3343; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3344; AVX1-NEXT: LBB52_14: ## %else17 3345; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3346; AVX1-NEXT: testb $1, %r11b 3347; AVX1-NEXT: je LBB52_16 3348; AVX1-NEXT: ## BB#15: ## %cond.load19 3349; AVX1-NEXT: vpinsrb $7, 7(%rax), %xmm9, %xmm3 3350; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3351; AVX1-NEXT: LBB52_16: ## %else20 3352; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3353; AVX1-NEXT: testb $1, %r14b 3354; AVX1-NEXT: je LBB52_18 3355; AVX1-NEXT: ## BB#17: ## %cond.load22 3356; AVX1-NEXT: vpinsrb $8, 8(%rax), %xmm9, %xmm3 3357; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3358; AVX1-NEXT: LBB52_18: ## %else23 3359; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3360; AVX1-NEXT: testb $1, %r15b 3361; AVX1-NEXT: je LBB52_20 3362; AVX1-NEXT: ## BB#19: ## %cond.load25 3363; AVX1-NEXT: vpinsrb $9, 9(%rax), %xmm9, %xmm3 3364; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3365; AVX1-NEXT: LBB52_20: ## %else26 3366; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dil 3367; AVX1-NEXT: testb $1, %r12b 3368; AVX1-NEXT: je LBB52_22 3369; AVX1-NEXT: ## BB#21: ## %cond.load28 3370; AVX1-NEXT: vpinsrb $10, 10(%rax), %xmm9, %xmm3 3371; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3372; AVX1-NEXT: LBB52_22: ## %else29 3373; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3374; AVX1-NEXT: testb $1, %dil 3375; AVX1-NEXT: je LBB52_24 3376; AVX1-NEXT: ## BB#23: ## %cond.load31 3377; AVX1-NEXT: vpinsrb $11, 11(%rax), %xmm9, %xmm3 3378; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3379; AVX1-NEXT: LBB52_24: ## %else32 3380; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %bl 3381; AVX1-NEXT: testb $1, %bpl 3382; AVX1-NEXT: je LBB52_26 3383; AVX1-NEXT: ## BB#25: ## %cond.load34 3384; AVX1-NEXT: vpinsrb $12, 12(%rax), %xmm9, %xmm3 3385; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3386; AVX1-NEXT: LBB52_26: ## %else35 3387; AVX1-NEXT: testb $1, %bl 3388; AVX1-NEXT: je LBB52_28 3389; AVX1-NEXT: ## BB#27: ## %cond.load37 3390; AVX1-NEXT: vpinsrb $13, 13(%rax), %xmm9, %xmm3 3391; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3392; AVX1-NEXT: LBB52_28: ## %else38 3393; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3394; AVX1-NEXT: je LBB52_30 3395; AVX1-NEXT: ## BB#29: ## %cond.load40 3396; AVX1-NEXT: vpinsrb $14, 14(%rax), %xmm9, %xmm3 3397; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3398; AVX1-NEXT: LBB52_30: ## %else41 3399; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3400; AVX1-NEXT: je LBB52_32 3401; AVX1-NEXT: ## BB#31: ## %cond.load43 3402; AVX1-NEXT: vpinsrb $15, 15(%rax), %xmm9, %xmm3 3403; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] 3404; AVX1-NEXT: LBB52_32: ## %else44 3405; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3406; AVX1-NEXT: je LBB52_34 3407; AVX1-NEXT: ## BB#33: ## %cond.load46 3408; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3409; AVX1-NEXT: vpinsrb $0, 16(%rax), %xmm3, %xmm3 3410; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3411; AVX1-NEXT: LBB52_34: ## %else47 3412; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3413; AVX1-NEXT: je LBB52_36 3414; AVX1-NEXT: ## BB#35: ## %cond.load49 3415; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3416; AVX1-NEXT: vpinsrb $1, 17(%rax), %xmm3, %xmm3 3417; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3418; AVX1-NEXT: LBB52_36: ## %else50 3419; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3420; AVX1-NEXT: je LBB52_38 3421; AVX1-NEXT: ## BB#37: ## %cond.load52 3422; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3423; AVX1-NEXT: vpinsrb $2, 18(%rax), %xmm3, %xmm3 3424; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3425; AVX1-NEXT: LBB52_38: ## %else53 3426; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3427; AVX1-NEXT: je LBB52_40 3428; AVX1-NEXT: ## BB#39: ## %cond.load55 3429; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3430; AVX1-NEXT: vpinsrb $3, 19(%rax), %xmm3, %xmm3 3431; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3432; AVX1-NEXT: LBB52_40: ## %else56 3433; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3434; AVX1-NEXT: je LBB52_42 3435; AVX1-NEXT: ## BB#41: ## %cond.load58 3436; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3437; AVX1-NEXT: vpinsrb $4, 20(%rax), %xmm3, %xmm3 3438; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3439; AVX1-NEXT: LBB52_42: ## %else59 3440; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3441; AVX1-NEXT: je LBB52_44 3442; AVX1-NEXT: ## BB#43: ## %cond.load61 3443; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3444; AVX1-NEXT: vpinsrb $5, 21(%rax), %xmm3, %xmm3 3445; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3446; AVX1-NEXT: LBB52_44: ## %else62 3447; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3448; AVX1-NEXT: je LBB52_46 3449; AVX1-NEXT: ## BB#45: ## %cond.load64 3450; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3451; AVX1-NEXT: vpinsrb $6, 22(%rax), %xmm3, %xmm3 3452; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3453; AVX1-NEXT: LBB52_46: ## %else65 3454; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3455; AVX1-NEXT: je LBB52_48 3456; AVX1-NEXT: ## BB#47: ## %cond.load67 3457; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3458; AVX1-NEXT: vpinsrb $7, 23(%rax), %xmm3, %xmm3 3459; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3460; AVX1-NEXT: LBB52_48: ## %else68 3461; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3462; AVX1-NEXT: je LBB52_50 3463; AVX1-NEXT: ## BB#49: ## %cond.load70 3464; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3465; AVX1-NEXT: vpinsrb $8, 24(%rax), %xmm3, %xmm3 3466; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3467; AVX1-NEXT: LBB52_50: ## %else71 3468; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3469; AVX1-NEXT: je LBB52_52 3470; AVX1-NEXT: ## BB#51: ## %cond.load73 3471; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3472; AVX1-NEXT: vpinsrb $9, 25(%rax), %xmm3, %xmm3 3473; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3474; AVX1-NEXT: LBB52_52: ## %else74 3475; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3476; AVX1-NEXT: je LBB52_54 3477; AVX1-NEXT: ## BB#53: ## %cond.load76 3478; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3479; AVX1-NEXT: vpinsrb $10, 26(%rax), %xmm3, %xmm3 3480; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3481; AVX1-NEXT: LBB52_54: ## %else77 3482; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3483; AVX1-NEXT: je LBB52_56 3484; AVX1-NEXT: ## BB#55: ## %cond.load79 3485; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3486; AVX1-NEXT: vpinsrb $11, 27(%rax), %xmm3, %xmm3 3487; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3488; AVX1-NEXT: LBB52_56: ## %else80 3489; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3490; AVX1-NEXT: je LBB52_58 3491; AVX1-NEXT: ## BB#57: ## %cond.load82 3492; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3493; AVX1-NEXT: vpinsrb $12, 28(%rax), %xmm3, %xmm3 3494; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3495; AVX1-NEXT: LBB52_58: ## %else83 3496; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3497; AVX1-NEXT: je LBB52_60 3498; AVX1-NEXT: ## BB#59: ## %cond.load85 3499; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3500; AVX1-NEXT: vpinsrb $13, 29(%rax), %xmm3, %xmm3 3501; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3502; AVX1-NEXT: LBB52_60: ## %else86 3503; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3504; AVX1-NEXT: je LBB52_62 3505; AVX1-NEXT: ## BB#61: ## %cond.load88 3506; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3507; AVX1-NEXT: vpinsrb $14, 30(%rax), %xmm3, %xmm3 3508; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3509; AVX1-NEXT: LBB52_62: ## %else89 3510; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3511; AVX1-NEXT: je LBB52_64 3512; AVX1-NEXT: ## BB#63: ## %cond.load91 3513; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 3514; AVX1-NEXT: vpinsrb $15, 31(%rax), %xmm3, %xmm3 3515; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 3516; AVX1-NEXT: LBB52_64: ## %else92 3517; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3518; AVX1-NEXT: je LBB52_66 3519; AVX1-NEXT: ## BB#65: ## %cond.load94 3520; AVX1-NEXT: vpinsrb $0, 32(%rax), %xmm0, %xmm3 3521; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7] 3522; AVX1-NEXT: LBB52_66: ## %else95 3523; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3524; AVX1-NEXT: je LBB52_68 3525; AVX1-NEXT: ## BB#67: ## %cond.load97 3526; AVX1-NEXT: vpinsrb $1, 33(%rax), %xmm3, %xmm4 3527; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3528; AVX1-NEXT: LBB52_68: ## %else98 3529; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3530; AVX1-NEXT: je LBB52_70 3531; AVX1-NEXT: ## BB#69: ## %cond.load100 3532; AVX1-NEXT: vpinsrb $2, 34(%rax), %xmm3, %xmm4 3533; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3534; AVX1-NEXT: LBB52_70: ## %else101 3535; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3536; AVX1-NEXT: je LBB52_72 3537; AVX1-NEXT: ## BB#71: ## %cond.load103 3538; AVX1-NEXT: vpinsrb $3, 35(%rax), %xmm3, %xmm4 3539; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3540; AVX1-NEXT: LBB52_72: ## %else104 3541; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3542; AVX1-NEXT: je LBB52_74 3543; AVX1-NEXT: ## BB#73: ## %cond.load106 3544; AVX1-NEXT: vpinsrb $4, 36(%rax), %xmm3, %xmm4 3545; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3546; AVX1-NEXT: LBB52_74: ## %else107 3547; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3548; AVX1-NEXT: je LBB52_76 3549; AVX1-NEXT: ## BB#75: ## %cond.load109 3550; AVX1-NEXT: vpinsrb $5, 37(%rax), %xmm3, %xmm4 3551; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3552; AVX1-NEXT: LBB52_76: ## %else110 3553; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3554; AVX1-NEXT: je LBB52_78 3555; AVX1-NEXT: ## BB#77: ## %cond.load112 3556; AVX1-NEXT: vpinsrb $6, 38(%rax), %xmm3, %xmm4 3557; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3558; AVX1-NEXT: LBB52_78: ## %else113 3559; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3560; AVX1-NEXT: je LBB52_80 3561; AVX1-NEXT: ## BB#79: ## %cond.load115 3562; AVX1-NEXT: vpinsrb $7, 39(%rax), %xmm3, %xmm4 3563; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3564; AVX1-NEXT: LBB52_80: ## %else116 3565; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3566; AVX1-NEXT: je LBB52_82 3567; AVX1-NEXT: ## BB#81: ## %cond.load118 3568; AVX1-NEXT: vpinsrb $8, 40(%rax), %xmm3, %xmm4 3569; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3570; AVX1-NEXT: LBB52_82: ## %else119 3571; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3572; AVX1-NEXT: je LBB52_84 3573; AVX1-NEXT: ## BB#83: ## %cond.load121 3574; AVX1-NEXT: vpinsrb $9, 41(%rax), %xmm3, %xmm4 3575; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3576; AVX1-NEXT: LBB52_84: ## %else122 3577; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3578; AVX1-NEXT: je LBB52_86 3579; AVX1-NEXT: ## BB#85: ## %cond.load124 3580; AVX1-NEXT: vpinsrb $10, 42(%rax), %xmm3, %xmm4 3581; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3582; AVX1-NEXT: LBB52_86: ## %else125 3583; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3584; AVX1-NEXT: je LBB52_88 3585; AVX1-NEXT: ## BB#87: ## %cond.load127 3586; AVX1-NEXT: vpinsrb $11, 43(%rax), %xmm3, %xmm4 3587; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3588; AVX1-NEXT: LBB52_88: ## %else128 3589; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3590; AVX1-NEXT: je LBB52_90 3591; AVX1-NEXT: ## BB#89: ## %cond.load130 3592; AVX1-NEXT: vpinsrb $12, 44(%rax), %xmm3, %xmm4 3593; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3594; AVX1-NEXT: LBB52_90: ## %else131 3595; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3596; AVX1-NEXT: je LBB52_92 3597; AVX1-NEXT: ## BB#91: ## %cond.load133 3598; AVX1-NEXT: vpinsrb $13, 45(%rax), %xmm3, %xmm4 3599; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3600; AVX1-NEXT: LBB52_92: ## %else134 3601; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3602; AVX1-NEXT: je LBB52_94 3603; AVX1-NEXT: ## BB#93: ## %cond.load136 3604; AVX1-NEXT: vpinsrb $14, 46(%rax), %xmm3, %xmm4 3605; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3606; AVX1-NEXT: LBB52_94: ## %else137 3607; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3608; AVX1-NEXT: je LBB52_96 3609; AVX1-NEXT: ## BB#95: ## %cond.load139 3610; AVX1-NEXT: vpinsrb $15, 47(%rax), %xmm3, %xmm4 3611; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 3612; AVX1-NEXT: LBB52_96: ## %else140 3613; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3614; AVX1-NEXT: je LBB52_98 3615; AVX1-NEXT: ## BB#97: ## %cond.load142 3616; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3617; AVX1-NEXT: vpinsrb $0, 48(%rax), %xmm4, %xmm4 3618; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3619; AVX1-NEXT: LBB52_98: ## %else143 3620; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3621; AVX1-NEXT: je LBB52_100 3622; AVX1-NEXT: ## BB#99: ## %cond.load145 3623; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3624; AVX1-NEXT: vpinsrb $1, 49(%rax), %xmm4, %xmm4 3625; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3626; AVX1-NEXT: LBB52_100: ## %else146 3627; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3628; AVX1-NEXT: je LBB52_102 3629; AVX1-NEXT: ## BB#101: ## %cond.load148 3630; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3631; AVX1-NEXT: vpinsrb $2, 50(%rax), %xmm4, %xmm4 3632; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3633; AVX1-NEXT: LBB52_102: ## %else149 3634; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3635; AVX1-NEXT: je LBB52_104 3636; AVX1-NEXT: ## BB#103: ## %cond.load151 3637; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3638; AVX1-NEXT: vpinsrb $3, 51(%rax), %xmm4, %xmm4 3639; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3640; AVX1-NEXT: LBB52_104: ## %else152 3641; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3642; AVX1-NEXT: je LBB52_106 3643; AVX1-NEXT: ## BB#105: ## %cond.load154 3644; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3645; AVX1-NEXT: vpinsrb $4, 52(%rax), %xmm4, %xmm4 3646; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3647; AVX1-NEXT: LBB52_106: ## %else155 3648; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3649; AVX1-NEXT: je LBB52_108 3650; AVX1-NEXT: ## BB#107: ## %cond.load157 3651; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3652; AVX1-NEXT: vpinsrb $5, 53(%rax), %xmm4, %xmm4 3653; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3654; AVX1-NEXT: LBB52_108: ## %else158 3655; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3656; AVX1-NEXT: je LBB52_110 3657; AVX1-NEXT: ## BB#109: ## %cond.load160 3658; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3659; AVX1-NEXT: vpinsrb $6, 54(%rax), %xmm4, %xmm4 3660; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3661; AVX1-NEXT: LBB52_110: ## %else161 3662; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3663; AVX1-NEXT: je LBB52_112 3664; AVX1-NEXT: ## BB#111: ## %cond.load163 3665; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3666; AVX1-NEXT: vpinsrb $7, 55(%rax), %xmm4, %xmm4 3667; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3668; AVX1-NEXT: LBB52_112: ## %else164 3669; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3670; AVX1-NEXT: je LBB52_114 3671; AVX1-NEXT: ## BB#113: ## %cond.load166 3672; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3673; AVX1-NEXT: vpinsrb $8, 56(%rax), %xmm4, %xmm4 3674; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3675; AVX1-NEXT: LBB52_114: ## %else167 3676; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3677; AVX1-NEXT: je LBB52_116 3678; AVX1-NEXT: ## BB#115: ## %cond.load169 3679; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3680; AVX1-NEXT: vpinsrb $9, 57(%rax), %xmm4, %xmm4 3681; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3682; AVX1-NEXT: LBB52_116: ## %else170 3683; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3684; AVX1-NEXT: je LBB52_118 3685; AVX1-NEXT: ## BB#117: ## %cond.load172 3686; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3687; AVX1-NEXT: vpinsrb $10, 58(%rax), %xmm4, %xmm4 3688; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3689; AVX1-NEXT: LBB52_118: ## %else173 3690; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3691; AVX1-NEXT: je LBB52_120 3692; AVX1-NEXT: ## BB#119: ## %cond.load175 3693; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3694; AVX1-NEXT: vpinsrb $11, 59(%rax), %xmm4, %xmm4 3695; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3696; AVX1-NEXT: LBB52_120: ## %else176 3697; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3698; AVX1-NEXT: je LBB52_122 3699; AVX1-NEXT: ## BB#121: ## %cond.load178 3700; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3701; AVX1-NEXT: vpinsrb $12, 60(%rax), %xmm4, %xmm4 3702; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3703; AVX1-NEXT: LBB52_122: ## %else179 3704; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3705; AVX1-NEXT: je LBB52_124 3706; AVX1-NEXT: ## BB#123: ## %cond.load181 3707; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3708; AVX1-NEXT: vpinsrb $13, 61(%rax), %xmm4, %xmm4 3709; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3710; AVX1-NEXT: LBB52_124: ## %else182 3711; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3712; AVX1-NEXT: je LBB52_126 3713; AVX1-NEXT: ## BB#125: ## %cond.load184 3714; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3715; AVX1-NEXT: vpinsrb $14, 62(%rax), %xmm4, %xmm4 3716; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3717; AVX1-NEXT: LBB52_126: ## %else185 3718; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) 3719; AVX1-NEXT: movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill 3720; AVX1-NEXT: movl %r8d, (%rsp) ## 4-byte Spill 3721; AVX1-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3722; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3723; AVX1-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3724; AVX1-NEXT: je LBB52_128 3725; AVX1-NEXT: ## BB#127: ## %cond.load187 3726; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3727; AVX1-NEXT: vpinsrb $15, 63(%rax), %xmm4, %xmm4 3728; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 3729; AVX1-NEXT: LBB52_128: ## %else188 3730; AVX1-NEXT: movzbl %r10b, %eax 3731; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3732; AVX1-NEXT: movzbl %r11b, %eax 3733; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3734; AVX1-NEXT: movzbl %r14b, %eax 3735; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3736; AVX1-NEXT: movzbl %r15b, %eax 3737; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3738; AVX1-NEXT: movzbl %r12b, %eax 3739; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3740; AVX1-NEXT: movzbl %dil, %eax 3741; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3742; AVX1-NEXT: movzbl %bpl, %eax 3743; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3744; AVX1-NEXT: movzbl %bl, %eax 3745; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3746; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3747; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3748; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3749; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3750; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3751; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3752; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3753; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3754; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3755; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3756; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3757; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3758; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3759; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3760; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3761; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3762; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3763; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3764; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3765; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3766; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3767; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3768; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3769; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3770; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3771; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3772; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3773; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3774; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3775; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3776; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3777; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3778; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3779; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3780; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3781; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3782; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3783; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3784; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3785; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3786; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 3787; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 3788; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 3789; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 3790; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 3791; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 3792; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3793; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 3794; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 3795; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d 3796; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 3797; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 3798; AVX1-NEXT: movzbl %r13b, %r13d 3799; AVX1-NEXT: vmovd %r13d, %xmm4 3800; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload 3801; AVX1-NEXT: movzbl %dil, %ebp 3802; AVX1-NEXT: vpinsrb $1, %ebp, %xmm4, %xmm4 3803; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 3804; AVX1-NEXT: movzbl %bpl, %ebp 3805; AVX1-NEXT: vpinsrb $2, %ebp, %xmm4, %xmm4 3806; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 3807; AVX1-NEXT: movzbl %bpl, %ebp 3808; AVX1-NEXT: vpinsrb $3, %ebp, %xmm4, %xmm4 3809; AVX1-NEXT: movl (%rsp), %ebp ## 4-byte Reload 3810; AVX1-NEXT: movzbl %bpl, %ebp 3811; AVX1-NEXT: vpinsrb $4, %ebp, %xmm4, %xmm4 3812; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 3813; AVX1-NEXT: movzbl %bpl, %ebp 3814; AVX1-NEXT: vpinsrb $5, %ebp, %xmm4, %xmm4 3815; AVX1-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3816; AVX1-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3817; AVX1-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3818; AVX1-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3819; AVX1-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3820; AVX1-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3821; AVX1-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3822; AVX1-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3823; AVX1-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3824; AVX1-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 3825; AVX1-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload 3826; AVX1-NEXT: ## xmm5 = mem[0],zero,zero,zero 3827; AVX1-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3828; AVX1-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3829; AVX1-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3830; AVX1-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3831; AVX1-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3832; AVX1-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3833; AVX1-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3834; AVX1-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3835; AVX1-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3836; AVX1-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3837; AVX1-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3838; AVX1-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3839; AVX1-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3840; AVX1-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 3841; AVX1-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm8 ## 4-byte Folded Reload 3842; AVX1-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm6 ## 4-byte Folded Reload 3843; AVX1-NEXT: ## xmm6 = mem[0],zero,zero,zero 3844; AVX1-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 3845; AVX1-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6 3846; AVX1-NEXT: vpinsrb $3, %r15d, %xmm6, %xmm6 3847; AVX1-NEXT: vpinsrb $4, %r14d, %xmm6, %xmm6 3848; AVX1-NEXT: vpinsrb $5, %r11d, %xmm6, %xmm6 3849; AVX1-NEXT: vpinsrb $6, %r8d, %xmm6, %xmm6 3850; AVX1-NEXT: vpinsrb $7, %edx, %xmm6, %xmm6 3851; AVX1-NEXT: vpinsrb $8, %eax, %xmm6, %xmm6 3852; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 3853; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 3854; AVX1-NEXT: vpinsrb $9, %ecx, %xmm6, %xmm6 3855; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 3856; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 3857; AVX1-NEXT: vpinsrb $10, %esi, %xmm6, %xmm6 3858; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 3859; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 3860; AVX1-NEXT: vpinsrb $11, %r9d, %xmm6, %xmm6 3861; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 3862; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 3863; AVX1-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 3864; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d 3865; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 3866; AVX1-NEXT: vpinsrb $13, %ebx, %xmm6, %xmm6 3867; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 3868; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 3869; AVX1-NEXT: vpinsrb $14, %r13d, %xmm6, %xmm6 3870; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 3871; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 3872; AVX1-NEXT: vpinsrb $15, %r14d, %xmm6, %xmm10 3873; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 3874; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 3875; AVX1-NEXT: vmovd %edi, %xmm7 3876; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 3877; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 3878; AVX1-NEXT: vpinsrb $1, %r11d, %xmm7, %xmm7 3879; AVX1-NEXT: vpinsrb $2, %r15d, %xmm7, %xmm7 3880; AVX1-NEXT: vpinsrb $3, %r12d, %xmm7, %xmm7 3881; AVX1-NEXT: vpinsrb $4, %r8d, %xmm7, %xmm7 3882; AVX1-NEXT: vpinsrb $5, %ecx, %xmm7, %xmm7 3883; AVX1-NEXT: vpinsrb $6, %r9d, %xmm7, %xmm7 3884; AVX1-NEXT: vpinsrb $7, %esi, %xmm7, %xmm7 3885; AVX1-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7 3886; AVX1-NEXT: vpinsrb $9, %eax, %xmm7, %xmm7 3887; AVX1-NEXT: vpinsrb $10, %r13d, %xmm7, %xmm7 3888; AVX1-NEXT: vpinsrb $11, %edx, %xmm7, %xmm7 3889; AVX1-NEXT: vpinsrb $12, %r14d, %xmm7, %xmm7 3890; AVX1-NEXT: vpinsrb $13, %ebx, %xmm7, %xmm7 3891; AVX1-NEXT: vpinsrb $14, %edi, %xmm7, %xmm7 3892; AVX1-NEXT: vpinsrb $15, %ebp, %xmm7, %xmm7 3893; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 3894; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 3895; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 3896; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3897; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm4 3898; AVX1-NEXT: vpsllw $7, %xmm8, %xmm6 3899; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm6 3900; AVX1-NEXT: vpcmpgtb %xmm6, %xmm2, %xmm6 3901; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 3902; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0 3903; AVX1-NEXT: vandps %ymm4, %ymm9, %ymm4 3904; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 3905; AVX1-NEXT: vpsllw $7, %xmm10, %xmm4 3906; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 3907; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm4 3908; AVX1-NEXT: vpsllw $7, %xmm7, %xmm6 3909; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5 3910; AVX1-NEXT: vpcmpgtb %xmm5, %xmm2, %xmm2 3911; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 3912; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 3913; AVX1-NEXT: vandps %ymm2, %ymm3, %ymm2 3914; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 3915; AVX1-NEXT: addq $8, %rsp 3916; AVX1-NEXT: popq %rbx 3917; AVX1-NEXT: popq %r12 3918; AVX1-NEXT: popq %r13 3919; AVX1-NEXT: popq %r14 3920; AVX1-NEXT: popq %r15 3921; AVX1-NEXT: popq %rbp 3922; AVX1-NEXT: retq 3923; 3924; AVX2-LABEL: test_mask_load_64xi8: 3925; AVX2: ## BB#0: 3926; AVX2-NEXT: pushq %rbp 3927; AVX2-NEXT: Ltmp3: 3928; AVX2-NEXT: .cfi_def_cfa_offset 16 3929; AVX2-NEXT: pushq %r15 3930; AVX2-NEXT: Ltmp4: 3931; AVX2-NEXT: .cfi_def_cfa_offset 24 3932; AVX2-NEXT: pushq %r14 3933; AVX2-NEXT: Ltmp5: 3934; AVX2-NEXT: .cfi_def_cfa_offset 32 3935; AVX2-NEXT: pushq %r13 3936; AVX2-NEXT: Ltmp6: 3937; AVX2-NEXT: .cfi_def_cfa_offset 40 3938; AVX2-NEXT: pushq %r12 3939; AVX2-NEXT: Ltmp7: 3940; AVX2-NEXT: .cfi_def_cfa_offset 48 3941; AVX2-NEXT: pushq %rbx 3942; AVX2-NEXT: Ltmp8: 3943; AVX2-NEXT: .cfi_def_cfa_offset 56 3944; AVX2-NEXT: pushq %rax 3945; AVX2-NEXT: Ltmp9: 3946; AVX2-NEXT: .cfi_def_cfa_offset 64 3947; AVX2-NEXT: Ltmp10: 3948; AVX2-NEXT: .cfi_offset %rbx, -56 3949; AVX2-NEXT: Ltmp11: 3950; AVX2-NEXT: .cfi_offset %r12, -48 3951; AVX2-NEXT: Ltmp12: 3952; AVX2-NEXT: .cfi_offset %r13, -40 3953; AVX2-NEXT: Ltmp13: 3954; AVX2-NEXT: .cfi_offset %r14, -32 3955; AVX2-NEXT: Ltmp14: 3956; AVX2-NEXT: .cfi_offset %r15, -24 3957; AVX2-NEXT: Ltmp15: 3958; AVX2-NEXT: .cfi_offset %rbp, -16 3959; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 3960; AVX2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill 3961; AVX2-NEXT: testb $1, %dil 3962; AVX2-NEXT: je LBB52_2 3963; AVX2-NEXT: ## BB#1: ## %cond.load 3964; AVX2-NEXT: movzbl (%rax), %ebp 3965; AVX2-NEXT: vmovd %ebp, %xmm2 3966; AVX2-NEXT: LBB52_2: ## %else 3967; AVX2-NEXT: testb $1, %sil 3968; AVX2-NEXT: je LBB52_4 3969; AVX2-NEXT: ## BB#3: ## %cond.load1 3970; AVX2-NEXT: vpinsrb $1, 1(%rax), %xmm2, %xmm3 3971; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 3972; AVX2-NEXT: LBB52_4: ## %else2 3973; AVX2-NEXT: testb $1, %dl 3974; AVX2-NEXT: je LBB52_6 3975; AVX2-NEXT: ## BB#5: ## %cond.load4 3976; AVX2-NEXT: vpinsrb $2, 2(%rax), %xmm2, %xmm3 3977; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 3978; AVX2-NEXT: LBB52_6: ## %else5 3979; AVX2-NEXT: testb $1, %cl 3980; AVX2-NEXT: je LBB52_8 3981; AVX2-NEXT: ## BB#7: ## %cond.load7 3982; AVX2-NEXT: vpinsrb $3, 3(%rax), %xmm2, %xmm3 3983; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 3984; AVX2-NEXT: LBB52_8: ## %else8 3985; AVX2-NEXT: testb $1, %r8b 3986; AVX2-NEXT: je LBB52_10 3987; AVX2-NEXT: ## BB#9: ## %cond.load10 3988; AVX2-NEXT: vpinsrb $4, 4(%rax), %xmm2, %xmm3 3989; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 3990; AVX2-NEXT: LBB52_10: ## %else11 3991; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3992; AVX2-NEXT: testb $1, %r9b 3993; AVX2-NEXT: je LBB52_12 3994; AVX2-NEXT: ## BB#11: ## %cond.load13 3995; AVX2-NEXT: vpinsrb $5, 5(%rax), %xmm2, %xmm3 3996; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 3997; AVX2-NEXT: LBB52_12: ## %else14 3998; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3999; AVX2-NEXT: testb $1, %r10b 4000; AVX2-NEXT: je LBB52_14 4001; AVX2-NEXT: ## BB#13: ## %cond.load16 4002; AVX2-NEXT: vpinsrb $6, 6(%rax), %xmm2, %xmm3 4003; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4004; AVX2-NEXT: LBB52_14: ## %else17 4005; AVX2-NEXT: testb $1, %r11b 4006; AVX2-NEXT: je LBB52_16 4007; AVX2-NEXT: ## BB#15: ## %cond.load19 4008; AVX2-NEXT: vpinsrb $7, 7(%rax), %xmm2, %xmm3 4009; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4010; AVX2-NEXT: LBB52_16: ## %else20 4011; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4012; AVX2-NEXT: je LBB52_18 4013; AVX2-NEXT: ## BB#17: ## %cond.load22 4014; AVX2-NEXT: vpinsrb $8, 8(%rax), %xmm2, %xmm3 4015; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4016; AVX2-NEXT: LBB52_18: ## %else23 4017; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4018; AVX2-NEXT: je LBB52_20 4019; AVX2-NEXT: ## BB#19: ## %cond.load25 4020; AVX2-NEXT: vpinsrb $9, 9(%rax), %xmm2, %xmm3 4021; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4022; AVX2-NEXT: LBB52_20: ## %else26 4023; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4024; AVX2-NEXT: je LBB52_22 4025; AVX2-NEXT: ## BB#21: ## %cond.load28 4026; AVX2-NEXT: vpinsrb $10, 10(%rax), %xmm2, %xmm3 4027; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4028; AVX2-NEXT: LBB52_22: ## %else29 4029; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %bpl 4030; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4031; AVX2-NEXT: je LBB52_24 4032; AVX2-NEXT: ## BB#23: ## %cond.load31 4033; AVX2-NEXT: vpinsrb $11, 11(%rax), %xmm2, %xmm3 4034; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4035; AVX2-NEXT: LBB52_24: ## %else32 4036; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %bl 4037; AVX2-NEXT: testb $1, %bpl 4038; AVX2-NEXT: je LBB52_26 4039; AVX2-NEXT: ## BB#25: ## %cond.load34 4040; AVX2-NEXT: vpinsrb $12, 12(%rax), %xmm2, %xmm3 4041; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4042; AVX2-NEXT: LBB52_26: ## %else35 4043; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r14b 4044; AVX2-NEXT: testb $1, %bl 4045; AVX2-NEXT: je LBB52_28 4046; AVX2-NEXT: ## BB#27: ## %cond.load37 4047; AVX2-NEXT: vpinsrb $13, 13(%rax), %xmm2, %xmm3 4048; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4049; AVX2-NEXT: LBB52_28: ## %else38 4050; AVX2-NEXT: testb $1, %r14b 4051; AVX2-NEXT: je LBB52_30 4052; AVX2-NEXT: ## BB#29: ## %cond.load40 4053; AVX2-NEXT: vpinsrb $14, 14(%rax), %xmm2, %xmm3 4054; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4055; AVX2-NEXT: LBB52_30: ## %else41 4056; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r13b 4057; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4058; AVX2-NEXT: je LBB52_32 4059; AVX2-NEXT: ## BB#31: ## %cond.load43 4060; AVX2-NEXT: vpinsrb $15, 15(%rax), %xmm2, %xmm3 4061; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] 4062; AVX2-NEXT: LBB52_32: ## %else44 4063; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r12b 4064; AVX2-NEXT: testb $1, %r13b 4065; AVX2-NEXT: je LBB52_34 4066; AVX2-NEXT: ## BB#33: ## %cond.load46 4067; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4068; AVX2-NEXT: vpinsrb $0, 16(%rax), %xmm3, %xmm3 4069; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4070; AVX2-NEXT: LBB52_34: ## %else47 4071; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r15b 4072; AVX2-NEXT: testb $1, %r12b 4073; AVX2-NEXT: je LBB52_36 4074; AVX2-NEXT: ## BB#35: ## %cond.load49 4075; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4076; AVX2-NEXT: vpinsrb $1, 17(%rax), %xmm3, %xmm3 4077; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4078; AVX2-NEXT: LBB52_36: ## %else50 4079; AVX2-NEXT: testb $1, %r15b 4080; AVX2-NEXT: je LBB52_38 4081; AVX2-NEXT: ## BB#37: ## %cond.load52 4082; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4083; AVX2-NEXT: vpinsrb $2, 18(%rax), %xmm3, %xmm3 4084; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4085; AVX2-NEXT: LBB52_38: ## %else53 4086; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4087; AVX2-NEXT: je LBB52_40 4088; AVX2-NEXT: ## BB#39: ## %cond.load55 4089; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4090; AVX2-NEXT: vpinsrb $3, 19(%rax), %xmm3, %xmm3 4091; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4092; AVX2-NEXT: LBB52_40: ## %else56 4093; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4094; AVX2-NEXT: je LBB52_42 4095; AVX2-NEXT: ## BB#41: ## %cond.load58 4096; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4097; AVX2-NEXT: vpinsrb $4, 20(%rax), %xmm3, %xmm3 4098; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4099; AVX2-NEXT: LBB52_42: ## %else59 4100; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4101; AVX2-NEXT: je LBB52_44 4102; AVX2-NEXT: ## BB#43: ## %cond.load61 4103; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4104; AVX2-NEXT: vpinsrb $5, 21(%rax), %xmm3, %xmm3 4105; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4106; AVX2-NEXT: LBB52_44: ## %else62 4107; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4108; AVX2-NEXT: je LBB52_46 4109; AVX2-NEXT: ## BB#45: ## %cond.load64 4110; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4111; AVX2-NEXT: vpinsrb $6, 22(%rax), %xmm3, %xmm3 4112; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4113; AVX2-NEXT: LBB52_46: ## %else65 4114; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4115; AVX2-NEXT: je LBB52_48 4116; AVX2-NEXT: ## BB#47: ## %cond.load67 4117; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4118; AVX2-NEXT: vpinsrb $7, 23(%rax), %xmm3, %xmm3 4119; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4120; AVX2-NEXT: LBB52_48: ## %else68 4121; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4122; AVX2-NEXT: je LBB52_50 4123; AVX2-NEXT: ## BB#49: ## %cond.load70 4124; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4125; AVX2-NEXT: vpinsrb $8, 24(%rax), %xmm3, %xmm3 4126; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4127; AVX2-NEXT: LBB52_50: ## %else71 4128; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4129; AVX2-NEXT: je LBB52_52 4130; AVX2-NEXT: ## BB#51: ## %cond.load73 4131; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4132; AVX2-NEXT: vpinsrb $9, 25(%rax), %xmm3, %xmm3 4133; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4134; AVX2-NEXT: LBB52_52: ## %else74 4135; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4136; AVX2-NEXT: je LBB52_54 4137; AVX2-NEXT: ## BB#53: ## %cond.load76 4138; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4139; AVX2-NEXT: vpinsrb $10, 26(%rax), %xmm3, %xmm3 4140; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4141; AVX2-NEXT: LBB52_54: ## %else77 4142; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4143; AVX2-NEXT: je LBB52_56 4144; AVX2-NEXT: ## BB#55: ## %cond.load79 4145; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4146; AVX2-NEXT: vpinsrb $11, 27(%rax), %xmm3, %xmm3 4147; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4148; AVX2-NEXT: LBB52_56: ## %else80 4149; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4150; AVX2-NEXT: je LBB52_58 4151; AVX2-NEXT: ## BB#57: ## %cond.load82 4152; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4153; AVX2-NEXT: vpinsrb $12, 28(%rax), %xmm3, %xmm3 4154; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4155; AVX2-NEXT: LBB52_58: ## %else83 4156; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4157; AVX2-NEXT: je LBB52_60 4158; AVX2-NEXT: ## BB#59: ## %cond.load85 4159; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4160; AVX2-NEXT: vpinsrb $13, 29(%rax), %xmm3, %xmm3 4161; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4162; AVX2-NEXT: LBB52_60: ## %else86 4163; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4164; AVX2-NEXT: je LBB52_62 4165; AVX2-NEXT: ## BB#61: ## %cond.load88 4166; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4167; AVX2-NEXT: vpinsrb $14, 30(%rax), %xmm3, %xmm3 4168; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4169; AVX2-NEXT: LBB52_62: ## %else89 4170; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4171; AVX2-NEXT: je LBB52_64 4172; AVX2-NEXT: ## BB#63: ## %cond.load91 4173; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 4174; AVX2-NEXT: vpinsrb $15, 31(%rax), %xmm3, %xmm3 4175; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4176; AVX2-NEXT: LBB52_64: ## %else92 4177; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4178; AVX2-NEXT: je LBB52_66 4179; AVX2-NEXT: ## BB#65: ## %cond.load94 4180; AVX2-NEXT: vpinsrb $0, 32(%rax), %xmm0, %xmm3 4181; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7] 4182; AVX2-NEXT: LBB52_66: ## %else95 4183; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4184; AVX2-NEXT: je LBB52_68 4185; AVX2-NEXT: ## BB#67: ## %cond.load97 4186; AVX2-NEXT: vpinsrb $1, 33(%rax), %xmm3, %xmm4 4187; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4188; AVX2-NEXT: LBB52_68: ## %else98 4189; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4190; AVX2-NEXT: je LBB52_70 4191; AVX2-NEXT: ## BB#69: ## %cond.load100 4192; AVX2-NEXT: vpinsrb $2, 34(%rax), %xmm3, %xmm4 4193; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4194; AVX2-NEXT: LBB52_70: ## %else101 4195; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4196; AVX2-NEXT: je LBB52_72 4197; AVX2-NEXT: ## BB#71: ## %cond.load103 4198; AVX2-NEXT: vpinsrb $3, 35(%rax), %xmm3, %xmm4 4199; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4200; AVX2-NEXT: LBB52_72: ## %else104 4201; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4202; AVX2-NEXT: je LBB52_74 4203; AVX2-NEXT: ## BB#73: ## %cond.load106 4204; AVX2-NEXT: vpinsrb $4, 36(%rax), %xmm3, %xmm4 4205; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4206; AVX2-NEXT: LBB52_74: ## %else107 4207; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4208; AVX2-NEXT: je LBB52_76 4209; AVX2-NEXT: ## BB#75: ## %cond.load109 4210; AVX2-NEXT: vpinsrb $5, 37(%rax), %xmm3, %xmm4 4211; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4212; AVX2-NEXT: LBB52_76: ## %else110 4213; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4214; AVX2-NEXT: je LBB52_78 4215; AVX2-NEXT: ## BB#77: ## %cond.load112 4216; AVX2-NEXT: vpinsrb $6, 38(%rax), %xmm3, %xmm4 4217; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4218; AVX2-NEXT: LBB52_78: ## %else113 4219; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4220; AVX2-NEXT: je LBB52_80 4221; AVX2-NEXT: ## BB#79: ## %cond.load115 4222; AVX2-NEXT: vpinsrb $7, 39(%rax), %xmm3, %xmm4 4223; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4224; AVX2-NEXT: LBB52_80: ## %else116 4225; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4226; AVX2-NEXT: je LBB52_82 4227; AVX2-NEXT: ## BB#81: ## %cond.load118 4228; AVX2-NEXT: vpinsrb $8, 40(%rax), %xmm3, %xmm4 4229; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4230; AVX2-NEXT: LBB52_82: ## %else119 4231; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4232; AVX2-NEXT: je LBB52_84 4233; AVX2-NEXT: ## BB#83: ## %cond.load121 4234; AVX2-NEXT: vpinsrb $9, 41(%rax), %xmm3, %xmm4 4235; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4236; AVX2-NEXT: LBB52_84: ## %else122 4237; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4238; AVX2-NEXT: je LBB52_86 4239; AVX2-NEXT: ## BB#85: ## %cond.load124 4240; AVX2-NEXT: vpinsrb $10, 42(%rax), %xmm3, %xmm4 4241; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4242; AVX2-NEXT: LBB52_86: ## %else125 4243; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4244; AVX2-NEXT: je LBB52_88 4245; AVX2-NEXT: ## BB#87: ## %cond.load127 4246; AVX2-NEXT: vpinsrb $11, 43(%rax), %xmm3, %xmm4 4247; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4248; AVX2-NEXT: LBB52_88: ## %else128 4249; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4250; AVX2-NEXT: je LBB52_90 4251; AVX2-NEXT: ## BB#89: ## %cond.load130 4252; AVX2-NEXT: vpinsrb $12, 44(%rax), %xmm3, %xmm4 4253; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4254; AVX2-NEXT: LBB52_90: ## %else131 4255; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4256; AVX2-NEXT: je LBB52_92 4257; AVX2-NEXT: ## BB#91: ## %cond.load133 4258; AVX2-NEXT: vpinsrb $13, 45(%rax), %xmm3, %xmm4 4259; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4260; AVX2-NEXT: LBB52_92: ## %else134 4261; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4262; AVX2-NEXT: je LBB52_94 4263; AVX2-NEXT: ## BB#93: ## %cond.load136 4264; AVX2-NEXT: vpinsrb $14, 46(%rax), %xmm3, %xmm4 4265; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4266; AVX2-NEXT: LBB52_94: ## %else137 4267; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4268; AVX2-NEXT: je LBB52_96 4269; AVX2-NEXT: ## BB#95: ## %cond.load139 4270; AVX2-NEXT: vpinsrb $15, 47(%rax), %xmm3, %xmm4 4271; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 4272; AVX2-NEXT: LBB52_96: ## %else140 4273; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4274; AVX2-NEXT: je LBB52_98 4275; AVX2-NEXT: ## BB#97: ## %cond.load142 4276; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4277; AVX2-NEXT: vpinsrb $0, 48(%rax), %xmm4, %xmm4 4278; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4279; AVX2-NEXT: LBB52_98: ## %else143 4280; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4281; AVX2-NEXT: je LBB52_100 4282; AVX2-NEXT: ## BB#99: ## %cond.load145 4283; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4284; AVX2-NEXT: vpinsrb $1, 49(%rax), %xmm4, %xmm4 4285; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4286; AVX2-NEXT: LBB52_100: ## %else146 4287; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4288; AVX2-NEXT: je LBB52_102 4289; AVX2-NEXT: ## BB#101: ## %cond.load148 4290; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4291; AVX2-NEXT: vpinsrb $2, 50(%rax), %xmm4, %xmm4 4292; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4293; AVX2-NEXT: LBB52_102: ## %else149 4294; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4295; AVX2-NEXT: je LBB52_104 4296; AVX2-NEXT: ## BB#103: ## %cond.load151 4297; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4298; AVX2-NEXT: vpinsrb $3, 51(%rax), %xmm4, %xmm4 4299; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4300; AVX2-NEXT: LBB52_104: ## %else152 4301; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4302; AVX2-NEXT: je LBB52_106 4303; AVX2-NEXT: ## BB#105: ## %cond.load154 4304; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4305; AVX2-NEXT: vpinsrb $4, 52(%rax), %xmm4, %xmm4 4306; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4307; AVX2-NEXT: LBB52_106: ## %else155 4308; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4309; AVX2-NEXT: je LBB52_108 4310; AVX2-NEXT: ## BB#107: ## %cond.load157 4311; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4312; AVX2-NEXT: vpinsrb $5, 53(%rax), %xmm4, %xmm4 4313; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4314; AVX2-NEXT: LBB52_108: ## %else158 4315; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4316; AVX2-NEXT: je LBB52_110 4317; AVX2-NEXT: ## BB#109: ## %cond.load160 4318; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4319; AVX2-NEXT: vpinsrb $6, 54(%rax), %xmm4, %xmm4 4320; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4321; AVX2-NEXT: LBB52_110: ## %else161 4322; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4323; AVX2-NEXT: je LBB52_112 4324; AVX2-NEXT: ## BB#111: ## %cond.load163 4325; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4326; AVX2-NEXT: vpinsrb $7, 55(%rax), %xmm4, %xmm4 4327; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4328; AVX2-NEXT: LBB52_112: ## %else164 4329; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4330; AVX2-NEXT: je LBB52_114 4331; AVX2-NEXT: ## BB#113: ## %cond.load166 4332; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4333; AVX2-NEXT: vpinsrb $8, 56(%rax), %xmm4, %xmm4 4334; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4335; AVX2-NEXT: LBB52_114: ## %else167 4336; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4337; AVX2-NEXT: je LBB52_116 4338; AVX2-NEXT: ## BB#115: ## %cond.load169 4339; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4340; AVX2-NEXT: vpinsrb $9, 57(%rax), %xmm4, %xmm4 4341; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4342; AVX2-NEXT: LBB52_116: ## %else170 4343; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4344; AVX2-NEXT: je LBB52_118 4345; AVX2-NEXT: ## BB#117: ## %cond.load172 4346; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4347; AVX2-NEXT: vpinsrb $10, 58(%rax), %xmm4, %xmm4 4348; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4349; AVX2-NEXT: LBB52_118: ## %else173 4350; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4351; AVX2-NEXT: je LBB52_120 4352; AVX2-NEXT: ## BB#119: ## %cond.load175 4353; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4354; AVX2-NEXT: vpinsrb $11, 59(%rax), %xmm4, %xmm4 4355; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4356; AVX2-NEXT: LBB52_120: ## %else176 4357; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4358; AVX2-NEXT: je LBB52_122 4359; AVX2-NEXT: ## BB#121: ## %cond.load178 4360; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4361; AVX2-NEXT: vpinsrb $12, 60(%rax), %xmm4, %xmm4 4362; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4363; AVX2-NEXT: LBB52_122: ## %else179 4364; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4365; AVX2-NEXT: je LBB52_124 4366; AVX2-NEXT: ## BB#123: ## %cond.load181 4367; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4368; AVX2-NEXT: vpinsrb $13, 61(%rax), %xmm4, %xmm4 4369; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4370; AVX2-NEXT: LBB52_124: ## %else182 4371; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4372; AVX2-NEXT: jne LBB52_126 4373; AVX2-NEXT: ## BB#125: 4374; AVX2-NEXT: movq %rax, %rdi 4375; AVX2-NEXT: jmp LBB52_127 4376; AVX2-NEXT: LBB52_126: ## %cond.load184 4377; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4378; AVX2-NEXT: movq %rax, %rdi 4379; AVX2-NEXT: vpinsrb $14, 62(%rax), %xmm4, %xmm4 4380; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4381; AVX2-NEXT: LBB52_127: ## %else185 4382; AVX2-NEXT: movl %ebp, %eax 4383; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) 4384; AVX2-NEXT: movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill 4385; AVX2-NEXT: movl %r8d, (%rsp) ## 4-byte Spill 4386; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4387; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4388; AVX2-NEXT: movl %esi, %ebp 4389; AVX2-NEXT: je LBB52_129 4390; AVX2-NEXT: ## BB#128: ## %cond.load187 4391; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 4392; AVX2-NEXT: vpinsrb $15, 63(%rdi), %xmm4, %xmm4 4393; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 4394; AVX2-NEXT: LBB52_129: ## %else188 4395; AVX2-NEXT: movzbl %r10b, %ecx 4396; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4397; AVX2-NEXT: movzbl %r11b, %ecx 4398; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4399; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4400; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4401; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4402; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4403; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4404; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4405; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4406; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4407; AVX2-NEXT: movzbl %al, %eax 4408; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4409; AVX2-NEXT: movzbl %bl, %eax 4410; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4411; AVX2-NEXT: movzbl %r14b, %eax 4412; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4413; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4414; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4415; AVX2-NEXT: movzbl %r12b, %eax 4416; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4417; AVX2-NEXT: movzbl %r13b, %eax 4418; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4419; AVX2-NEXT: movzbl %r15b, %eax 4420; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4421; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4422; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4423; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4424; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4425; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4426; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4427; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4428; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4429; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4430; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4431; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4432; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4433; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4434; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4435; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4436; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4437; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4438; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4439; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4440; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4441; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4442; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4443; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4444; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4445; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4446; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4447; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4448; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4449; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 4450; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 4451; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 4452; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 4453; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 4454; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d 4455; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 4456; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4457; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4458; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 4459; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 4460; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 4461; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 4462; AVX2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill 4463; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload 4464; AVX2-NEXT: movzbl %dil, %r13d 4465; AVX2-NEXT: vmovd %r13d, %xmm4 4466; AVX2-NEXT: movzbl %bpl, %ebp 4467; AVX2-NEXT: vpinsrb $1, %ebp, %xmm4, %xmm4 4468; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 4469; AVX2-NEXT: movzbl %bpl, %ebp 4470; AVX2-NEXT: vpinsrb $2, %ebp, %xmm4, %xmm4 4471; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 4472; AVX2-NEXT: movzbl %bpl, %ebp 4473; AVX2-NEXT: vpinsrb $3, %ebp, %xmm4, %xmm4 4474; AVX2-NEXT: movl (%rsp), %ebp ## 4-byte Reload 4475; AVX2-NEXT: movzbl %bpl, %ebp 4476; AVX2-NEXT: vpinsrb $4, %ebp, %xmm4, %xmm4 4477; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 4478; AVX2-NEXT: movzbl %bpl, %ebp 4479; AVX2-NEXT: vpinsrb $5, %ebp, %xmm4, %xmm4 4480; AVX2-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4481; AVX2-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4482; AVX2-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4483; AVX2-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4484; AVX2-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4485; AVX2-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4486; AVX2-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4487; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4488; AVX2-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4489; AVX2-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload 4490; AVX2-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload 4491; AVX2-NEXT: ## xmm5 = mem[0],zero,zero,zero 4492; AVX2-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4493; AVX2-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4494; AVX2-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4495; AVX2-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4496; AVX2-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4497; AVX2-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4498; AVX2-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4499; AVX2-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4500; AVX2-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4501; AVX2-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4502; AVX2-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4503; AVX2-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4504; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4505; AVX2-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4506; AVX2-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload 4507; AVX2-NEXT: vmovd %r12d, %xmm6 4508; AVX2-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 4509; AVX2-NEXT: vpinsrb $2, %r15d, %xmm6, %xmm6 4510; AVX2-NEXT: vpinsrb $3, %r14d, %xmm6, %xmm6 4511; AVX2-NEXT: vpinsrb $4, %ebx, %xmm6, %xmm6 4512; AVX2-NEXT: vpinsrb $5, %r11d, %xmm6, %xmm6 4513; AVX2-NEXT: vpinsrb $6, %r9d, %xmm6, %xmm6 4514; AVX2-NEXT: vpinsrb $7, %esi, %xmm6, %xmm6 4515; AVX2-NEXT: vpinsrb $8, %eax, %xmm6, %xmm6 4516; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4517; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 4518; AVX2-NEXT: vpinsrb $9, %ecx, %xmm6, %xmm6 4519; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d 4520; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 4521; AVX2-NEXT: vpinsrb $10, %edx, %xmm6, %xmm6 4522; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 4523; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 4524; AVX2-NEXT: vpinsrb $11, %r8d, %xmm6, %xmm6 4525; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 4526; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 4527; AVX2-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 4528; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 4529; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 4530; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 4531; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 4532; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 4533; AVX2-NEXT: vpinsrb $14, %eax, %xmm6, %xmm6 4534; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 4535; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 4536; AVX2-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6 4537; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 4538; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 4539; AVX2-NEXT: vmovd %r12d, %xmm7 4540; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 4541; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 4542; AVX2-NEXT: vpinsrb $1, %r9d, %xmm7, %xmm7 4543; AVX2-NEXT: vpinsrb $2, %r11d, %xmm7, %xmm7 4544; AVX2-NEXT: vpinsrb $3, %r14d, %xmm7, %xmm7 4545; AVX2-NEXT: vpinsrb $4, %r13d, %xmm7, %xmm7 4546; AVX2-NEXT: vpinsrb $5, %ecx, %xmm7, %xmm7 4547; AVX2-NEXT: vpinsrb $6, %r8d, %xmm7, %xmm7 4548; AVX2-NEXT: vpinsrb $7, %ebx, %xmm7, %xmm7 4549; AVX2-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7 4550; AVX2-NEXT: vpinsrb $9, %ebp, %xmm7, %xmm7 4551; AVX2-NEXT: vpinsrb $10, %eax, %xmm7, %xmm7 4552; AVX2-NEXT: vpinsrb $11, %edi, %xmm7, %xmm7 4553; AVX2-NEXT: vpinsrb $12, %r15d, %xmm7, %xmm7 4554; AVX2-NEXT: vpinsrb $13, %esi, %xmm7, %xmm7 4555; AVX2-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm7 4556; AVX2-NEXT: vpinsrb $15, %edx, %xmm7, %xmm7 4557; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm4, %ymm4 4558; AVX2-NEXT: vpsllw $7, %ymm4, %ymm4 4559; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 4560; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 4561; AVX2-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 4562; AVX2-NEXT: vinserti128 $1, %xmm7, %ymm6, %ymm2 4563; AVX2-NEXT: vpsllw $7, %ymm2, %ymm2 4564; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 4565; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 4566; AVX2-NEXT: addq $8, %rsp 4567; AVX2-NEXT: popq %rbx 4568; AVX2-NEXT: popq %r12 4569; AVX2-NEXT: popq %r13 4570; AVX2-NEXT: popq %r14 4571; AVX2-NEXT: popq %r15 4572; AVX2-NEXT: popq %rbp 4573; AVX2-NEXT: retq 4574; 4575; AVX512F-LABEL: test_mask_load_64xi8: 4576; AVX512F: ## BB#0: 4577; AVX512F-NEXT: pushq %rbp 4578; AVX512F-NEXT: Ltmp0: 4579; AVX512F-NEXT: .cfi_def_cfa_offset 16 4580; AVX512F-NEXT: pushq %r15 4581; AVX512F-NEXT: Ltmp1: 4582; AVX512F-NEXT: .cfi_def_cfa_offset 24 4583; AVX512F-NEXT: pushq %r14 4584; AVX512F-NEXT: Ltmp2: 4585; AVX512F-NEXT: .cfi_def_cfa_offset 32 4586; AVX512F-NEXT: pushq %r13 4587; AVX512F-NEXT: Ltmp3: 4588; AVX512F-NEXT: .cfi_def_cfa_offset 40 4589; AVX512F-NEXT: pushq %r12 4590; AVX512F-NEXT: Ltmp4: 4591; AVX512F-NEXT: .cfi_def_cfa_offset 48 4592; AVX512F-NEXT: pushq %rbx 4593; AVX512F-NEXT: Ltmp5: 4594; AVX512F-NEXT: .cfi_def_cfa_offset 56 4595; AVX512F-NEXT: subq $76, %rsp 4596; AVX512F-NEXT: Ltmp6: 4597; AVX512F-NEXT: .cfi_def_cfa_offset 132 4598; AVX512F-NEXT: Ltmp7: 4599; AVX512F-NEXT: .cfi_offset %rbx, -56 4600; AVX512F-NEXT: Ltmp8: 4601; AVX512F-NEXT: .cfi_offset %r12, -48 4602; AVX512F-NEXT: Ltmp9: 4603; AVX512F-NEXT: .cfi_offset %r13, -40 4604; AVX512F-NEXT: Ltmp10: 4605; AVX512F-NEXT: .cfi_offset %r14, -32 4606; AVX512F-NEXT: Ltmp11: 4607; AVX512F-NEXT: .cfi_offset %r15, -24 4608; AVX512F-NEXT: Ltmp12: 4609; AVX512F-NEXT: .cfi_offset %rbp, -16 4610; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 4611; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 4612; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 4613; AVX512F-NEXT: kshiftlw $15, %k0, %k1 4614; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4615; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4616; AVX512F-NEXT: kmovw %k1, %eax 4617; AVX512F-NEXT: testb %al, %al 4618; AVX512F-NEXT: je LBB52_2 4619; AVX512F-NEXT: ## BB#1: ## %cond.load 4620; AVX512F-NEXT: movzbl (%rdi), %eax 4621; AVX512F-NEXT: vmovd %eax, %xmm0 4622; AVX512F-NEXT: LBB52_2: ## %else 4623; AVX512F-NEXT: kshiftlw $14, %k0, %k1 4624; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4625; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4626; AVX512F-NEXT: kmovw %k1, %eax 4627; AVX512F-NEXT: testb %al, %al 4628; AVX512F-NEXT: je LBB52_4 4629; AVX512F-NEXT: ## BB#3: ## %cond.load1 4630; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm0, %xmm6 4631; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4632; AVX512F-NEXT: LBB52_4: ## %else2 4633; AVX512F-NEXT: kshiftlw $13, %k0, %k1 4634; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4635; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4636; AVX512F-NEXT: kmovw %k1, %eax 4637; AVX512F-NEXT: testb %al, %al 4638; AVX512F-NEXT: je LBB52_6 4639; AVX512F-NEXT: ## BB#5: ## %cond.load4 4640; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm0, %xmm6 4641; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4642; AVX512F-NEXT: LBB52_6: ## %else5 4643; AVX512F-NEXT: kshiftlw $12, %k0, %k1 4644; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4645; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4646; AVX512F-NEXT: kmovw %k1, %eax 4647; AVX512F-NEXT: testb %al, %al 4648; AVX512F-NEXT: je LBB52_8 4649; AVX512F-NEXT: ## BB#7: ## %cond.load7 4650; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm0, %xmm6 4651; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4652; AVX512F-NEXT: LBB52_8: ## %else8 4653; AVX512F-NEXT: kshiftlw $11, %k0, %k1 4654; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4655; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4656; AVX512F-NEXT: kmovw %k1, %eax 4657; AVX512F-NEXT: testb %al, %al 4658; AVX512F-NEXT: je LBB52_10 4659; AVX512F-NEXT: ## BB#9: ## %cond.load10 4660; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm0, %xmm6 4661; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4662; AVX512F-NEXT: LBB52_10: ## %else11 4663; AVX512F-NEXT: kshiftlw $10, %k0, %k1 4664; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4665; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4666; AVX512F-NEXT: kmovw %k1, %eax 4667; AVX512F-NEXT: testb %al, %al 4668; AVX512F-NEXT: je LBB52_12 4669; AVX512F-NEXT: ## BB#11: ## %cond.load13 4670; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm0, %xmm6 4671; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4672; AVX512F-NEXT: LBB52_12: ## %else14 4673; AVX512F-NEXT: kshiftlw $9, %k0, %k1 4674; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4675; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4676; AVX512F-NEXT: kmovw %k1, %eax 4677; AVX512F-NEXT: testb %al, %al 4678; AVX512F-NEXT: je LBB52_14 4679; AVX512F-NEXT: ## BB#13: ## %cond.load16 4680; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm0, %xmm6 4681; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4682; AVX512F-NEXT: LBB52_14: ## %else17 4683; AVX512F-NEXT: kshiftlw $8, %k0, %k1 4684; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4685; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4686; AVX512F-NEXT: kmovw %k1, %eax 4687; AVX512F-NEXT: testb %al, %al 4688; AVX512F-NEXT: je LBB52_16 4689; AVX512F-NEXT: ## BB#15: ## %cond.load19 4690; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm0, %xmm6 4691; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4692; AVX512F-NEXT: LBB52_16: ## %else20 4693; AVX512F-NEXT: kshiftlw $7, %k0, %k1 4694; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4695; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4696; AVX512F-NEXT: kmovw %k1, %eax 4697; AVX512F-NEXT: testb %al, %al 4698; AVX512F-NEXT: je LBB52_18 4699; AVX512F-NEXT: ## BB#17: ## %cond.load22 4700; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm0, %xmm6 4701; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4702; AVX512F-NEXT: LBB52_18: ## %else23 4703; AVX512F-NEXT: kshiftlw $6, %k0, %k1 4704; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4705; AVX512F-NEXT: kmovw %k1, (%rsp) ## 2-byte Folded Spill 4706; AVX512F-NEXT: kmovw %k1, %eax 4707; AVX512F-NEXT: testb %al, %al 4708; AVX512F-NEXT: je LBB52_20 4709; AVX512F-NEXT: ## BB#19: ## %cond.load25 4710; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm0, %xmm6 4711; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4712; AVX512F-NEXT: LBB52_20: ## %else26 4713; AVX512F-NEXT: kshiftlw $5, %k0, %k1 4714; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4715; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4716; AVX512F-NEXT: kmovw %k1, %eax 4717; AVX512F-NEXT: testb %al, %al 4718; AVX512F-NEXT: je LBB52_22 4719; AVX512F-NEXT: ## BB#21: ## %cond.load28 4720; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm0, %xmm6 4721; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4722; AVX512F-NEXT: LBB52_22: ## %else29 4723; AVX512F-NEXT: kshiftlw $4, %k0, %k1 4724; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4725; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4726; AVX512F-NEXT: kmovw %k1, %eax 4727; AVX512F-NEXT: testb %al, %al 4728; AVX512F-NEXT: je LBB52_24 4729; AVX512F-NEXT: ## BB#23: ## %cond.load31 4730; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm0, %xmm6 4731; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4732; AVX512F-NEXT: LBB52_24: ## %else32 4733; AVX512F-NEXT: kshiftlw $3, %k0, %k1 4734; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4735; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4736; AVX512F-NEXT: kmovw %k1, %eax 4737; AVX512F-NEXT: testb %al, %al 4738; AVX512F-NEXT: je LBB52_26 4739; AVX512F-NEXT: ## BB#25: ## %cond.load34 4740; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm0, %xmm6 4741; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4742; AVX512F-NEXT: LBB52_26: ## %else35 4743; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 4744; AVX512F-NEXT: kshiftlw $2, %k0, %k1 4745; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4746; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4747; AVX512F-NEXT: kmovw %k1, %eax 4748; AVX512F-NEXT: testb %al, %al 4749; AVX512F-NEXT: je LBB52_28 4750; AVX512F-NEXT: ## BB#27: ## %cond.load37 4751; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm0, %xmm6 4752; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4753; AVX512F-NEXT: LBB52_28: ## %else38 4754; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 4755; AVX512F-NEXT: kshiftlw $1, %k0, %k1 4756; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4757; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4758; AVX512F-NEXT: kmovw %k1, %eax 4759; AVX512F-NEXT: testb %al, %al 4760; AVX512F-NEXT: je LBB52_30 4761; AVX512F-NEXT: ## BB#29: ## %cond.load40 4762; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm0, %xmm6 4763; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] 4764; AVX512F-NEXT: LBB52_30: ## %else41 4765; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 4766; AVX512F-NEXT: kshiftlw $0, %k0, %k0 4767; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4768; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4769; AVX512F-NEXT: kmovw %k0, %eax 4770; AVX512F-NEXT: testb %al, %al 4771; AVX512F-NEXT: je LBB52_32 4772; AVX512F-NEXT: ## BB#31: ## %cond.load43 4773; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm1 4774; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 4775; AVX512F-NEXT: LBB52_32: ## %else44 4776; AVX512F-NEXT: kshiftlw $15, %k1, %k0 4777; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4778; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4779; AVX512F-NEXT: kmovw %k0, %eax 4780; AVX512F-NEXT: testb %al, %al 4781; AVX512F-NEXT: je LBB52_34 4782; AVX512F-NEXT: ## BB#33: ## %cond.load46 4783; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4784; AVX512F-NEXT: vpinsrb $0, 16(%rdi), %xmm1, %xmm1 4785; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4786; AVX512F-NEXT: LBB52_34: ## %else47 4787; AVX512F-NEXT: kshiftlw $14, %k1, %k0 4788; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4789; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4790; AVX512F-NEXT: kmovw %k0, %eax 4791; AVX512F-NEXT: testb %al, %al 4792; AVX512F-NEXT: je LBB52_36 4793; AVX512F-NEXT: ## BB#35: ## %cond.load49 4794; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4795; AVX512F-NEXT: vpinsrb $1, 17(%rdi), %xmm1, %xmm1 4796; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4797; AVX512F-NEXT: LBB52_36: ## %else50 4798; AVX512F-NEXT: kshiftlw $13, %k1, %k0 4799; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4800; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4801; AVX512F-NEXT: kmovw %k0, %eax 4802; AVX512F-NEXT: testb %al, %al 4803; AVX512F-NEXT: je LBB52_38 4804; AVX512F-NEXT: ## BB#37: ## %cond.load52 4805; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4806; AVX512F-NEXT: vpinsrb $2, 18(%rdi), %xmm1, %xmm1 4807; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4808; AVX512F-NEXT: LBB52_38: ## %else53 4809; AVX512F-NEXT: kshiftlw $12, %k1, %k0 4810; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4811; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4812; AVX512F-NEXT: kmovw %k0, %eax 4813; AVX512F-NEXT: testb %al, %al 4814; AVX512F-NEXT: je LBB52_40 4815; AVX512F-NEXT: ## BB#39: ## %cond.load55 4816; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4817; AVX512F-NEXT: vpinsrb $3, 19(%rdi), %xmm1, %xmm1 4818; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4819; AVX512F-NEXT: LBB52_40: ## %else56 4820; AVX512F-NEXT: kshiftlw $11, %k1, %k0 4821; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4822; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4823; AVX512F-NEXT: kmovw %k0, %eax 4824; AVX512F-NEXT: testb %al, %al 4825; AVX512F-NEXT: je LBB52_42 4826; AVX512F-NEXT: ## BB#41: ## %cond.load58 4827; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4828; AVX512F-NEXT: vpinsrb $4, 20(%rdi), %xmm1, %xmm1 4829; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4830; AVX512F-NEXT: LBB52_42: ## %else59 4831; AVX512F-NEXT: kshiftlw $10, %k1, %k0 4832; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4833; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4834; AVX512F-NEXT: kmovw %k0, %eax 4835; AVX512F-NEXT: testb %al, %al 4836; AVX512F-NEXT: je LBB52_44 4837; AVX512F-NEXT: ## BB#43: ## %cond.load61 4838; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4839; AVX512F-NEXT: vpinsrb $5, 21(%rdi), %xmm1, %xmm1 4840; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4841; AVX512F-NEXT: LBB52_44: ## %else62 4842; AVX512F-NEXT: kshiftlw $9, %k1, %k0 4843; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4844; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4845; AVX512F-NEXT: kmovw %k0, %eax 4846; AVX512F-NEXT: testb %al, %al 4847; AVX512F-NEXT: je LBB52_46 4848; AVX512F-NEXT: ## BB#45: ## %cond.load64 4849; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4850; AVX512F-NEXT: vpinsrb $6, 22(%rdi), %xmm1, %xmm1 4851; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4852; AVX512F-NEXT: LBB52_46: ## %else65 4853; AVX512F-NEXT: kshiftlw $8, %k1, %k0 4854; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4855; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4856; AVX512F-NEXT: kmovw %k0, %eax 4857; AVX512F-NEXT: testb %al, %al 4858; AVX512F-NEXT: je LBB52_48 4859; AVX512F-NEXT: ## BB#47: ## %cond.load67 4860; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4861; AVX512F-NEXT: vpinsrb $7, 23(%rdi), %xmm1, %xmm1 4862; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4863; AVX512F-NEXT: LBB52_48: ## %else68 4864; AVX512F-NEXT: kshiftlw $7, %k1, %k0 4865; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4866; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4867; AVX512F-NEXT: kmovw %k0, %eax 4868; AVX512F-NEXT: testb %al, %al 4869; AVX512F-NEXT: je LBB52_50 4870; AVX512F-NEXT: ## BB#49: ## %cond.load70 4871; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4872; AVX512F-NEXT: vpinsrb $8, 24(%rdi), %xmm1, %xmm1 4873; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4874; AVX512F-NEXT: LBB52_50: ## %else71 4875; AVX512F-NEXT: kshiftlw $6, %k1, %k0 4876; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4877; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4878; AVX512F-NEXT: kmovw %k0, %eax 4879; AVX512F-NEXT: testb %al, %al 4880; AVX512F-NEXT: je LBB52_52 4881; AVX512F-NEXT: ## BB#51: ## %cond.load73 4882; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4883; AVX512F-NEXT: vpinsrb $9, 25(%rdi), %xmm1, %xmm1 4884; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4885; AVX512F-NEXT: LBB52_52: ## %else74 4886; AVX512F-NEXT: kshiftlw $5, %k1, %k0 4887; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4888; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4889; AVX512F-NEXT: kmovw %k0, %eax 4890; AVX512F-NEXT: testb %al, %al 4891; AVX512F-NEXT: je LBB52_54 4892; AVX512F-NEXT: ## BB#53: ## %cond.load76 4893; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4894; AVX512F-NEXT: vpinsrb $10, 26(%rdi), %xmm1, %xmm1 4895; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4896; AVX512F-NEXT: LBB52_54: ## %else77 4897; AVX512F-NEXT: kshiftlw $4, %k1, %k0 4898; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4899; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4900; AVX512F-NEXT: kmovw %k0, %eax 4901; AVX512F-NEXT: testb %al, %al 4902; AVX512F-NEXT: je LBB52_56 4903; AVX512F-NEXT: ## BB#55: ## %cond.load79 4904; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4905; AVX512F-NEXT: vpinsrb $11, 27(%rdi), %xmm1, %xmm1 4906; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4907; AVX512F-NEXT: LBB52_56: ## %else80 4908; AVX512F-NEXT: kshiftlw $3, %k1, %k0 4909; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4910; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4911; AVX512F-NEXT: kmovw %k0, %eax 4912; AVX512F-NEXT: testb %al, %al 4913; AVX512F-NEXT: je LBB52_58 4914; AVX512F-NEXT: ## BB#57: ## %cond.load82 4915; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4916; AVX512F-NEXT: vpinsrb $12, 28(%rdi), %xmm1, %xmm1 4917; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4918; AVX512F-NEXT: LBB52_58: ## %else83 4919; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm1 4920; AVX512F-NEXT: kshiftlw $2, %k1, %k0 4921; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4922; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4923; AVX512F-NEXT: kmovw %k0, %eax 4924; AVX512F-NEXT: testb %al, %al 4925; AVX512F-NEXT: je LBB52_60 4926; AVX512F-NEXT: ## BB#59: ## %cond.load85 4927; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 4928; AVX512F-NEXT: vpinsrb $13, 29(%rdi), %xmm2, %xmm2 4929; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 4930; AVX512F-NEXT: LBB52_60: ## %else86 4931; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 4932; AVX512F-NEXT: kshiftlw $1, %k1, %k0 4933; AVX512F-NEXT: kshiftrw $15, %k0, %k0 4934; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4935; AVX512F-NEXT: kmovw %k0, %eax 4936; AVX512F-NEXT: testb %al, %al 4937; AVX512F-NEXT: je LBB52_62 4938; AVX512F-NEXT: ## BB#61: ## %cond.load88 4939; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 4940; AVX512F-NEXT: vpinsrb $14, 30(%rdi), %xmm2, %xmm2 4941; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 4942; AVX512F-NEXT: LBB52_62: ## %else89 4943; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 4944; AVX512F-NEXT: kshiftlw $0, %k1, %k1 4945; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4946; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4947; AVX512F-NEXT: kmovw %k1, %eax 4948; AVX512F-NEXT: testb %al, %al 4949; AVX512F-NEXT: je LBB52_64 4950; AVX512F-NEXT: ## BB#63: ## %cond.load91 4951; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4952; AVX512F-NEXT: vpinsrb $15, 31(%rdi), %xmm1, %xmm1 4953; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4954; AVX512F-NEXT: LBB52_64: ## %else92 4955; AVX512F-NEXT: kshiftlw $15, %k0, %k1 4956; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4957; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4958; AVX512F-NEXT: kmovw %k1, %eax 4959; AVX512F-NEXT: testb %al, %al 4960; AVX512F-NEXT: je LBB52_66 4961; AVX512F-NEXT: ## BB#65: ## %cond.load94 4962; AVX512F-NEXT: vpinsrb $0, 32(%rdi), %xmm0, %xmm1 4963; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 4964; AVX512F-NEXT: LBB52_66: ## %else95 4965; AVX512F-NEXT: kshiftlw $14, %k0, %k1 4966; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4967; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4968; AVX512F-NEXT: kmovw %k1, %eax 4969; AVX512F-NEXT: testb %al, %al 4970; AVX512F-NEXT: je LBB52_68 4971; AVX512F-NEXT: ## BB#67: ## %cond.load97 4972; AVX512F-NEXT: vpinsrb $1, 33(%rdi), %xmm1, %xmm2 4973; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 4974; AVX512F-NEXT: LBB52_68: ## %else98 4975; AVX512F-NEXT: kshiftlw $13, %k0, %k1 4976; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4977; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4978; AVX512F-NEXT: kmovw %k1, %eax 4979; AVX512F-NEXT: testb %al, %al 4980; AVX512F-NEXT: je LBB52_70 4981; AVX512F-NEXT: ## BB#69: ## %cond.load100 4982; AVX512F-NEXT: vpinsrb $2, 34(%rdi), %xmm1, %xmm2 4983; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 4984; AVX512F-NEXT: LBB52_70: ## %else101 4985; AVX512F-NEXT: kshiftlw $12, %k0, %k1 4986; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4987; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4988; AVX512F-NEXT: kmovw %k1, %eax 4989; AVX512F-NEXT: testb %al, %al 4990; AVX512F-NEXT: je LBB52_72 4991; AVX512F-NEXT: ## BB#71: ## %cond.load103 4992; AVX512F-NEXT: vpinsrb $3, 35(%rdi), %xmm1, %xmm2 4993; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 4994; AVX512F-NEXT: LBB52_72: ## %else104 4995; AVX512F-NEXT: kshiftlw $11, %k0, %k1 4996; AVX512F-NEXT: kshiftrw $15, %k1, %k1 4997; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 4998; AVX512F-NEXT: kmovw %k1, %eax 4999; AVX512F-NEXT: testb %al, %al 5000; AVX512F-NEXT: je LBB52_74 5001; AVX512F-NEXT: ## BB#73: ## %cond.load106 5002; AVX512F-NEXT: vpinsrb $4, 36(%rdi), %xmm1, %xmm2 5003; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5004; AVX512F-NEXT: LBB52_74: ## %else107 5005; AVX512F-NEXT: kshiftlw $10, %k0, %k1 5006; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5007; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5008; AVX512F-NEXT: kmovw %k1, %eax 5009; AVX512F-NEXT: testb %al, %al 5010; AVX512F-NEXT: je LBB52_76 5011; AVX512F-NEXT: ## BB#75: ## %cond.load109 5012; AVX512F-NEXT: vpinsrb $5, 37(%rdi), %xmm1, %xmm2 5013; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5014; AVX512F-NEXT: LBB52_76: ## %else110 5015; AVX512F-NEXT: kshiftlw $9, %k0, %k1 5016; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5017; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5018; AVX512F-NEXT: kmovw %k1, %eax 5019; AVX512F-NEXT: testb %al, %al 5020; AVX512F-NEXT: je LBB52_78 5021; AVX512F-NEXT: ## BB#77: ## %cond.load112 5022; AVX512F-NEXT: vpinsrb $6, 38(%rdi), %xmm1, %xmm2 5023; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5024; AVX512F-NEXT: LBB52_78: ## %else113 5025; AVX512F-NEXT: kshiftlw $8, %k0, %k1 5026; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5027; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5028; AVX512F-NEXT: kmovw %k1, %eax 5029; AVX512F-NEXT: testb %al, %al 5030; AVX512F-NEXT: je LBB52_80 5031; AVX512F-NEXT: ## BB#79: ## %cond.load115 5032; AVX512F-NEXT: vpinsrb $7, 39(%rdi), %xmm1, %xmm2 5033; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5034; AVX512F-NEXT: LBB52_80: ## %else116 5035; AVX512F-NEXT: kshiftlw $7, %k0, %k1 5036; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5037; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5038; AVX512F-NEXT: kmovw %k1, %eax 5039; AVX512F-NEXT: testb %al, %al 5040; AVX512F-NEXT: je LBB52_82 5041; AVX512F-NEXT: ## BB#81: ## %cond.load118 5042; AVX512F-NEXT: vpinsrb $8, 40(%rdi), %xmm1, %xmm2 5043; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5044; AVX512F-NEXT: LBB52_82: ## %else119 5045; AVX512F-NEXT: kshiftlw $6, %k0, %k1 5046; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5047; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5048; AVX512F-NEXT: kmovw %k1, %eax 5049; AVX512F-NEXT: testb %al, %al 5050; AVX512F-NEXT: je LBB52_84 5051; AVX512F-NEXT: ## BB#83: ## %cond.load121 5052; AVX512F-NEXT: vpinsrb $9, 41(%rdi), %xmm1, %xmm2 5053; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5054; AVX512F-NEXT: LBB52_84: ## %else122 5055; AVX512F-NEXT: kshiftlw $5, %k0, %k1 5056; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5057; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5058; AVX512F-NEXT: kmovw %k1, %eax 5059; AVX512F-NEXT: testb %al, %al 5060; AVX512F-NEXT: je LBB52_86 5061; AVX512F-NEXT: ## BB#85: ## %cond.load124 5062; AVX512F-NEXT: vpinsrb $10, 42(%rdi), %xmm1, %xmm2 5063; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5064; AVX512F-NEXT: LBB52_86: ## %else125 5065; AVX512F-NEXT: kshiftlw $4, %k0, %k1 5066; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5067; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5068; AVX512F-NEXT: kmovw %k1, %eax 5069; AVX512F-NEXT: testb %al, %al 5070; AVX512F-NEXT: je LBB52_88 5071; AVX512F-NEXT: ## BB#87: ## %cond.load127 5072; AVX512F-NEXT: vpinsrb $11, 43(%rdi), %xmm1, %xmm2 5073; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5074; AVX512F-NEXT: LBB52_88: ## %else128 5075; AVX512F-NEXT: kshiftlw $3, %k0, %k1 5076; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5077; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5078; AVX512F-NEXT: kmovw %k1, %eax 5079; AVX512F-NEXT: testb %al, %al 5080; AVX512F-NEXT: je LBB52_90 5081; AVX512F-NEXT: ## BB#89: ## %cond.load130 5082; AVX512F-NEXT: vpinsrb $12, 44(%rdi), %xmm1, %xmm2 5083; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5084; AVX512F-NEXT: LBB52_90: ## %else131 5085; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm2 5086; AVX512F-NEXT: kshiftlw $2, %k0, %k1 5087; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5088; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5089; AVX512F-NEXT: kmovw %k1, %eax 5090; AVX512F-NEXT: testb %al, %al 5091; AVX512F-NEXT: je LBB52_92 5092; AVX512F-NEXT: ## BB#91: ## %cond.load133 5093; AVX512F-NEXT: vpinsrb $13, 45(%rdi), %xmm1, %xmm3 5094; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] 5095; AVX512F-NEXT: LBB52_92: ## %else134 5096; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 5097; AVX512F-NEXT: kshiftlw $1, %k0, %k1 5098; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5099; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5100; AVX512F-NEXT: kmovw %k1, %eax 5101; AVX512F-NEXT: testb %al, %al 5102; AVX512F-NEXT: je LBB52_94 5103; AVX512F-NEXT: ## BB#93: ## %cond.load136 5104; AVX512F-NEXT: vpinsrb $14, 46(%rdi), %xmm1, %xmm3 5105; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] 5106; AVX512F-NEXT: LBB52_94: ## %else137 5107; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 5108; AVX512F-NEXT: kshiftlw $0, %k0, %k0 5109; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5110; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5111; AVX512F-NEXT: kmovw %k0, %eax 5112; AVX512F-NEXT: testb %al, %al 5113; AVX512F-NEXT: je LBB52_96 5114; AVX512F-NEXT: ## BB#95: ## %cond.load139 5115; AVX512F-NEXT: vpinsrb $15, 47(%rdi), %xmm1, %xmm2 5116; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5117; AVX512F-NEXT: LBB52_96: ## %else140 5118; AVX512F-NEXT: kshiftlw $15, %k1, %k0 5119; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5120; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5121; AVX512F-NEXT: kmovw %k0, %eax 5122; AVX512F-NEXT: testb %al, %al 5123; AVX512F-NEXT: je LBB52_98 5124; AVX512F-NEXT: ## BB#97: ## %cond.load142 5125; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5126; AVX512F-NEXT: vpinsrb $0, 48(%rdi), %xmm2, %xmm2 5127; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5128; AVX512F-NEXT: LBB52_98: ## %else143 5129; AVX512F-NEXT: kshiftlw $14, %k1, %k0 5130; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5131; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5132; AVX512F-NEXT: kmovw %k0, %eax 5133; AVX512F-NEXT: testb %al, %al 5134; AVX512F-NEXT: je LBB52_100 5135; AVX512F-NEXT: ## BB#99: ## %cond.load145 5136; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5137; AVX512F-NEXT: vpinsrb $1, 49(%rdi), %xmm2, %xmm2 5138; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5139; AVX512F-NEXT: LBB52_100: ## %else146 5140; AVX512F-NEXT: kshiftlw $13, %k1, %k0 5141; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5142; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5143; AVX512F-NEXT: kmovw %k0, %eax 5144; AVX512F-NEXT: testb %al, %al 5145; AVX512F-NEXT: je LBB52_102 5146; AVX512F-NEXT: ## BB#101: ## %cond.load148 5147; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5148; AVX512F-NEXT: vpinsrb $2, 50(%rdi), %xmm2, %xmm2 5149; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5150; AVX512F-NEXT: LBB52_102: ## %else149 5151; AVX512F-NEXT: kshiftlw $12, %k1, %k0 5152; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5153; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5154; AVX512F-NEXT: kmovw %k0, %eax 5155; AVX512F-NEXT: testb %al, %al 5156; AVX512F-NEXT: je LBB52_104 5157; AVX512F-NEXT: ## BB#103: ## %cond.load151 5158; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5159; AVX512F-NEXT: vpinsrb $3, 51(%rdi), %xmm2, %xmm2 5160; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5161; AVX512F-NEXT: LBB52_104: ## %else152 5162; AVX512F-NEXT: kshiftlw $11, %k1, %k0 5163; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5164; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5165; AVX512F-NEXT: kmovw %k0, %eax 5166; AVX512F-NEXT: testb %al, %al 5167; AVX512F-NEXT: je LBB52_106 5168; AVX512F-NEXT: ## BB#105: ## %cond.load154 5169; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5170; AVX512F-NEXT: vpinsrb $4, 52(%rdi), %xmm2, %xmm2 5171; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5172; AVX512F-NEXT: LBB52_106: ## %else155 5173; AVX512F-NEXT: kshiftlw $10, %k1, %k0 5174; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5175; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5176; AVX512F-NEXT: kmovw %k0, %eax 5177; AVX512F-NEXT: testb %al, %al 5178; AVX512F-NEXT: je LBB52_108 5179; AVX512F-NEXT: ## BB#107: ## %cond.load157 5180; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5181; AVX512F-NEXT: vpinsrb $5, 53(%rdi), %xmm2, %xmm2 5182; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5183; AVX512F-NEXT: LBB52_108: ## %else158 5184; AVX512F-NEXT: kshiftlw $9, %k1, %k0 5185; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5186; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5187; AVX512F-NEXT: kmovw %k0, %eax 5188; AVX512F-NEXT: testb %al, %al 5189; AVX512F-NEXT: je LBB52_110 5190; AVX512F-NEXT: ## BB#109: ## %cond.load160 5191; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5192; AVX512F-NEXT: vpinsrb $6, 54(%rdi), %xmm2, %xmm2 5193; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5194; AVX512F-NEXT: LBB52_110: ## %else161 5195; AVX512F-NEXT: kshiftlw $8, %k1, %k0 5196; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5197; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5198; AVX512F-NEXT: kmovw %k0, %eax 5199; AVX512F-NEXT: testb %al, %al 5200; AVX512F-NEXT: je LBB52_112 5201; AVX512F-NEXT: ## BB#111: ## %cond.load163 5202; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5203; AVX512F-NEXT: vpinsrb $7, 55(%rdi), %xmm2, %xmm2 5204; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5205; AVX512F-NEXT: LBB52_112: ## %else164 5206; AVX512F-NEXT: kshiftlw $7, %k1, %k0 5207; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5208; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill 5209; AVX512F-NEXT: kmovw %k0, %eax 5210; AVX512F-NEXT: testb %al, %al 5211; AVX512F-NEXT: je LBB52_114 5212; AVX512F-NEXT: ## BB#113: ## %cond.load166 5213; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5214; AVX512F-NEXT: vpinsrb $8, 56(%rdi), %xmm2, %xmm2 5215; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5216; AVX512F-NEXT: LBB52_114: ## %else167 5217; AVX512F-NEXT: kshiftlw $6, %k1, %k2 5218; AVX512F-NEXT: kshiftrw $15, %k2, %k2 5219; AVX512F-NEXT: kmovw %k2, %eax 5220; AVX512F-NEXT: testb %al, %al 5221; AVX512F-NEXT: je LBB52_116 5222; AVX512F-NEXT: ## BB#115: ## %cond.load169 5223; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5224; AVX512F-NEXT: vpinsrb $9, 57(%rdi), %xmm2, %xmm2 5225; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5226; AVX512F-NEXT: LBB52_116: ## %else170 5227; AVX512F-NEXT: kshiftlw $5, %k1, %k3 5228; AVX512F-NEXT: kshiftrw $15, %k3, %k3 5229; AVX512F-NEXT: kmovw %k3, %eax 5230; AVX512F-NEXT: testb %al, %al 5231; AVX512F-NEXT: je LBB52_118 5232; AVX512F-NEXT: ## BB#117: ## %cond.load172 5233; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5234; AVX512F-NEXT: vpinsrb $10, 58(%rdi), %xmm2, %xmm2 5235; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5236; AVX512F-NEXT: LBB52_118: ## %else173 5237; AVX512F-NEXT: kshiftlw $4, %k1, %k4 5238; AVX512F-NEXT: kshiftrw $15, %k4, %k4 5239; AVX512F-NEXT: kmovw %k4, %eax 5240; AVX512F-NEXT: testb %al, %al 5241; AVX512F-NEXT: je LBB52_120 5242; AVX512F-NEXT: ## BB#119: ## %cond.load175 5243; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5244; AVX512F-NEXT: vpinsrb $11, 59(%rdi), %xmm2, %xmm2 5245; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5246; AVX512F-NEXT: LBB52_120: ## %else176 5247; AVX512F-NEXT: kshiftlw $3, %k1, %k5 5248; AVX512F-NEXT: kshiftrw $15, %k5, %k5 5249; AVX512F-NEXT: kmovw %k5, %eax 5250; AVX512F-NEXT: testb %al, %al 5251; AVX512F-NEXT: je LBB52_122 5252; AVX512F-NEXT: ## BB#121: ## %cond.load178 5253; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5254; AVX512F-NEXT: vpinsrb $12, 60(%rdi), %xmm2, %xmm2 5255; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5256; AVX512F-NEXT: LBB52_122: ## %else179 5257; AVX512F-NEXT: kshiftlw $2, %k1, %k6 5258; AVX512F-NEXT: kshiftrw $15, %k6, %k6 5259; AVX512F-NEXT: kmovw %k6, %eax 5260; AVX512F-NEXT: testb %al, %al 5261; AVX512F-NEXT: je LBB52_124 5262; AVX512F-NEXT: ## BB#123: ## %cond.load181 5263; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5264; AVX512F-NEXT: vpinsrb $13, 61(%rdi), %xmm2, %xmm2 5265; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5266; AVX512F-NEXT: LBB52_124: ## %else182 5267; AVX512F-NEXT: kshiftlw $1, %k1, %k7 5268; AVX512F-NEXT: kshiftrw $15, %k7, %k7 5269; AVX512F-NEXT: kmovw %k7, %eax 5270; AVX512F-NEXT: testb %al, %al 5271; AVX512F-NEXT: je LBB52_126 5272; AVX512F-NEXT: ## BB#125: ## %cond.load184 5273; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5274; AVX512F-NEXT: vpinsrb $14, 62(%rdi), %xmm2, %xmm2 5275; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5276; AVX512F-NEXT: LBB52_126: ## %else185 5277; AVX512F-NEXT: kshiftlw $0, %k1, %k1 5278; AVX512F-NEXT: kshiftrw $15, %k1, %k1 5279; AVX512F-NEXT: kmovw %k1, %eax 5280; AVX512F-NEXT: testb %al, %al 5281; AVX512F-NEXT: je LBB52_128 5282; AVX512F-NEXT: ## BB#127: ## %cond.load187 5283; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 5284; AVX512F-NEXT: vpinsrb $15, 63(%rdi), %xmm2, %xmm2 5285; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5286; AVX512F-NEXT: LBB52_128: ## %else188 5287; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5288; AVX512F-NEXT: kmovw %k0, %eax 5289; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5290; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5291; AVX512F-NEXT: kmovw %k0, %eax 5292; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5293; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5294; AVX512F-NEXT: kmovw %k0, %eax 5295; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5296; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5297; AVX512F-NEXT: kmovw %k0, %eax 5298; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5299; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5300; AVX512F-NEXT: kmovw %k0, %eax 5301; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5302; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5303; AVX512F-NEXT: kmovw %k0, %eax 5304; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5305; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5306; AVX512F-NEXT: kmovw %k0, %eax 5307; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5308; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5309; AVX512F-NEXT: kmovw %k0, %eax 5310; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5311; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5312; AVX512F-NEXT: kmovw %k0, %eax 5313; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5314; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5315; AVX512F-NEXT: kmovw %k0, %eax 5316; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5317; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5318; AVX512F-NEXT: kmovw %k0, %eax 5319; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5320; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5321; AVX512F-NEXT: kmovw %k0, %eax 5322; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5323; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5324; AVX512F-NEXT: kmovw %k0, %eax 5325; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5326; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5327; AVX512F-NEXT: kmovw %k0, %eax 5328; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5329; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5330; AVX512F-NEXT: kmovw %k0, %eax 5331; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5332; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5333; AVX512F-NEXT: kmovw %k0, %eax 5334; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5335; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5336; AVX512F-NEXT: kmovw %k0, %eax 5337; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5338; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5339; AVX512F-NEXT: kmovw %k0, %eax 5340; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5341; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5342; AVX512F-NEXT: kmovw %k0, %eax 5343; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5344; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5345; AVX512F-NEXT: kmovw %k0, %eax 5346; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5347; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5348; AVX512F-NEXT: kmovw %k0, %eax 5349; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5350; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5351; AVX512F-NEXT: kmovw %k0, %eax 5352; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5353; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5354; AVX512F-NEXT: kmovw %k0, %eax 5355; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5356; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5357; AVX512F-NEXT: kmovw %k0, %eax 5358; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5359; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5360; AVX512F-NEXT: kmovw %k0, %eax 5361; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5362; AVX512F-NEXT: kmovw (%rsp), %k0 ## 2-byte Folded Reload 5363; AVX512F-NEXT: kmovw %k0, %eax 5364; AVX512F-NEXT: movl %eax, (%rsp) ## 4-byte Spill 5365; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5366; AVX512F-NEXT: kmovw %k0, %eax 5367; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5368; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5369; AVX512F-NEXT: kmovw %k0, %eax 5370; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5371; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5372; AVX512F-NEXT: kmovw %k0, %eax 5373; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5374; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5375; AVX512F-NEXT: kmovw %k0, %eax 5376; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5377; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5378; AVX512F-NEXT: kmovw %k0, %eax 5379; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5380; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5381; AVX512F-NEXT: kmovw %k0, %eax 5382; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5383; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5384; AVX512F-NEXT: kmovw %k0, %eax 5385; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5386; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5387; AVX512F-NEXT: kmovw %k0, %eax 5388; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5389; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5390; AVX512F-NEXT: kmovw %k0, %eax 5391; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5392; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5393; AVX512F-NEXT: kmovw %k0, %eax 5394; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5395; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5396; AVX512F-NEXT: kmovw %k0, %eax 5397; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5398; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5399; AVX512F-NEXT: kmovw %k0, %eax 5400; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5401; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5402; AVX512F-NEXT: kmovw %k0, %eax 5403; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5404; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5405; AVX512F-NEXT: kmovw %k0, %eax 5406; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5407; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5408; AVX512F-NEXT: kmovw %k0, %eax 5409; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5410; AVX512F-NEXT: kmovw %k2, %eax 5411; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill 5412; AVX512F-NEXT: kmovw %k3, %r12d 5413; AVX512F-NEXT: kmovw %k4, %r15d 5414; AVX512F-NEXT: kmovw %k5, %r14d 5415; AVX512F-NEXT: kmovw %k6, %ebx 5416; AVX512F-NEXT: kmovw %k7, %r11d 5417; AVX512F-NEXT: kmovw %k1, %r10d 5418; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5419; AVX512F-NEXT: kmovw %k0, %eax 5420; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill 5421; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5422; AVX512F-NEXT: kmovw %k0, %r8d 5423; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5424; AVX512F-NEXT: kmovw %k0, %r9d 5425; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5426; AVX512F-NEXT: kmovw %k0, %edi 5427; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5428; AVX512F-NEXT: kmovw %k0, %esi 5429; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5430; AVX512F-NEXT: kmovw %k0, %edx 5431; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5432; AVX512F-NEXT: kmovw %k0, %ecx 5433; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5434; AVX512F-NEXT: kmovw %k0, %eax 5435; AVX512F-NEXT: movl -{{[0-9]+}}(%rsp), %r13d ## 4-byte Reload 5436; AVX512F-NEXT: vmovd %r13d, %xmm2 5437; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5438; AVX512F-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5439; AVX512F-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5440; AVX512F-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5441; AVX512F-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5442; AVX512F-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5443; AVX512F-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5444; AVX512F-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5445; AVX512F-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5446; AVX512F-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5447; AVX512F-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5448; AVX512F-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5449; AVX512F-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5450; AVX512F-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5451; AVX512F-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload 5452; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 5453; AVX512F-NEXT: vmovd %ebp, %xmm3 5454; AVX512F-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5455; AVX512F-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5456; AVX512F-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5457; AVX512F-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5458; AVX512F-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5459; AVX512F-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5460; AVX512F-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5461; AVX512F-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5462; AVX512F-NEXT: vpinsrb $9, (%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5463; AVX512F-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5464; AVX512F-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5465; AVX512F-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5466; AVX512F-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5467; AVX512F-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5468; AVX512F-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload 5469; AVX512F-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload 5470; AVX512F-NEXT: vmovd %ebp, %xmm6 5471; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5472; AVX512F-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5473; AVX512F-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5474; AVX512F-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5475; AVX512F-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5476; AVX512F-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5477; AVX512F-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5478; AVX512F-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5479; AVX512F-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload 5480; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5481; AVX512F-NEXT: kmovw %k0, %r13d 5482; AVX512F-NEXT: vpinsrb $10, %r12d, %xmm6, %xmm6 5483; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5484; AVX512F-NEXT: kmovw %k0, %r12d 5485; AVX512F-NEXT: vpinsrb $11, %r15d, %xmm6, %xmm6 5486; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5487; AVX512F-NEXT: kmovw %k0, %r15d 5488; AVX512F-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6 5489; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5490; AVX512F-NEXT: kmovw %k0, %r14d 5491; AVX512F-NEXT: vpinsrb $13, %ebx, %xmm6, %xmm6 5492; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5493; AVX512F-NEXT: kmovw %k0, %ebx 5494; AVX512F-NEXT: vpinsrb $14, %r11d, %xmm6, %xmm6 5495; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5496; AVX512F-NEXT: kmovw %k0, %r11d 5497; AVX512F-NEXT: vpinsrb $15, %r10d, %xmm6, %xmm6 5498; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5499; AVX512F-NEXT: kmovw %k0, %r10d 5500; AVX512F-NEXT: vmovd %r8d, %xmm7 5501; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload 5502; AVX512F-NEXT: kmovw %k0, %r8d 5503; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 5504; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 5505; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 5506; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 5507; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 5508; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm7, %xmm2 ## 4-byte Folded Reload 5509; AVX512F-NEXT: vpinsrb $2, %r9d, %xmm2, %xmm2 5510; AVX512F-NEXT: vpinsrb $3, %edi, %xmm2, %xmm2 5511; AVX512F-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 5512; AVX512F-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 5513; AVX512F-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 5514; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 5515; AVX512F-NEXT: vpinsrb $8, %r13d, %xmm2, %xmm2 5516; AVX512F-NEXT: vpinsrb $9, %r12d, %xmm2, %xmm2 5517; AVX512F-NEXT: vpinsrb $10, %r15d, %xmm2, %xmm2 5518; AVX512F-NEXT: vpinsrb $11, %r14d, %xmm2, %xmm2 5519; AVX512F-NEXT: vpinsrb $12, %ebx, %xmm2, %xmm2 5520; AVX512F-NEXT: vpinsrb $13, %r11d, %xmm2, %xmm2 5521; AVX512F-NEXT: vpinsrb $14, %r10d, %xmm2, %xmm2 5522; AVX512F-NEXT: vpinsrb $15, %r8d, %xmm2, %xmm2 5523; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm2, %ymm2 5524; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 5525; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 5526; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 5527; AVX512F-NEXT: addq $76, %rsp 5528; AVX512F-NEXT: popq %rbx 5529; AVX512F-NEXT: popq %r12 5530; AVX512F-NEXT: popq %r13 5531; AVX512F-NEXT: popq %r14 5532; AVX512F-NEXT: popq %r15 5533; AVX512F-NEXT: popq %rbp 5534; AVX512F-NEXT: retq 5535; 5536; SKX-LABEL: test_mask_load_64xi8: 5537; SKX: ## BB#0: 5538; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 5539; SKX-NEXT: vpmovb2m %zmm0, %k1 5540; SKX-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} 5541; SKX-NEXT: vmovaps %zmm1, %zmm0 5542; SKX-NEXT: retq 5543 %res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val) 5544 ret <64 x i8> %res 5545} 5546declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) 5547 5548define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 5549; AVX-LABEL: test_mask_load_8xi16: 5550; AVX: ## BB#0: 5551; AVX-NEXT: vpextrb $0, %xmm0, %eax 5552; AVX-NEXT: ## implicit-def: %XMM1 5553; AVX-NEXT: testb $1, %al 5554; AVX-NEXT: je LBB53_2 5555; AVX-NEXT: ## BB#1: ## %cond.load 5556; AVX-NEXT: movzwl (%rdi), %eax 5557; AVX-NEXT: vmovd %eax, %xmm1 5558; AVX-NEXT: LBB53_2: ## %else 5559; AVX-NEXT: vpextrb $2, %xmm0, %eax 5560; AVX-NEXT: testb $1, %al 5561; AVX-NEXT: je LBB53_4 5562; AVX-NEXT: ## BB#3: ## %cond.load1 5563; AVX-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm1 5564; AVX-NEXT: LBB53_4: ## %else2 5565; AVX-NEXT: vpextrb $4, %xmm0, %eax 5566; AVX-NEXT: testb $1, %al 5567; AVX-NEXT: je LBB53_6 5568; AVX-NEXT: ## BB#5: ## %cond.load4 5569; AVX-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm1 5570; AVX-NEXT: LBB53_6: ## %else5 5571; AVX-NEXT: vpextrb $6, %xmm0, %eax 5572; AVX-NEXT: testb $1, %al 5573; AVX-NEXT: je LBB53_8 5574; AVX-NEXT: ## BB#7: ## %cond.load7 5575; AVX-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 5576; AVX-NEXT: LBB53_8: ## %else8 5577; AVX-NEXT: vpextrb $8, %xmm0, %eax 5578; AVX-NEXT: testb $1, %al 5579; AVX-NEXT: je LBB53_10 5580; AVX-NEXT: ## BB#9: ## %cond.load10 5581; AVX-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm1 5582; AVX-NEXT: LBB53_10: ## %else11 5583; AVX-NEXT: vpextrb $10, %xmm0, %eax 5584; AVX-NEXT: testb $1, %al 5585; AVX-NEXT: je LBB53_12 5586; AVX-NEXT: ## BB#11: ## %cond.load13 5587; AVX-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm1 5588; AVX-NEXT: LBB53_12: ## %else14 5589; AVX-NEXT: vpextrb $12, %xmm0, %eax 5590; AVX-NEXT: testb $1, %al 5591; AVX-NEXT: je LBB53_14 5592; AVX-NEXT: ## BB#13: ## %cond.load16 5593; AVX-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm1 5594; AVX-NEXT: LBB53_14: ## %else17 5595; AVX-NEXT: vpextrb $14, %xmm0, %eax 5596; AVX-NEXT: testb $1, %al 5597; AVX-NEXT: je LBB53_16 5598; AVX-NEXT: ## BB#15: ## %cond.load19 5599; AVX-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm1 5600; AVX-NEXT: LBB53_16: ## %else20 5601; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 5602; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 5603; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 5604; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 5605; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 5606; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 5607; AVX-NEXT: retq 5608; 5609; AVX512F-LABEL: test_mask_load_8xi16: 5610; AVX512F: ## BB#0: 5611; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 5612; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 5613; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 5614; AVX512F-NEXT: kshiftlw $15, %k1, %k0 5615; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5616; AVX512F-NEXT: kmovw %k0, %eax 5617; AVX512F-NEXT: ## implicit-def: %XMM0 5618; AVX512F-NEXT: testb %al, %al 5619; AVX512F-NEXT: je LBB53_2 5620; AVX512F-NEXT: ## BB#1: ## %cond.load 5621; AVX512F-NEXT: movzwl (%rdi), %eax 5622; AVX512F-NEXT: vmovd %eax, %xmm0 5623; AVX512F-NEXT: LBB53_2: ## %else 5624; AVX512F-NEXT: kshiftlw $14, %k1, %k0 5625; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5626; AVX512F-NEXT: kmovw %k0, %eax 5627; AVX512F-NEXT: testb %al, %al 5628; AVX512F-NEXT: je LBB53_4 5629; AVX512F-NEXT: ## BB#3: ## %cond.load1 5630; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm0, %xmm0 5631; AVX512F-NEXT: LBB53_4: ## %else2 5632; AVX512F-NEXT: kshiftlw $13, %k1, %k0 5633; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5634; AVX512F-NEXT: kmovw %k0, %eax 5635; AVX512F-NEXT: testb %al, %al 5636; AVX512F-NEXT: je LBB53_6 5637; AVX512F-NEXT: ## BB#5: ## %cond.load4 5638; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm0, %xmm0 5639; AVX512F-NEXT: LBB53_6: ## %else5 5640; AVX512F-NEXT: kshiftlw $12, %k1, %k0 5641; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5642; AVX512F-NEXT: kmovw %k0, %eax 5643; AVX512F-NEXT: testb %al, %al 5644; AVX512F-NEXT: je LBB53_8 5645; AVX512F-NEXT: ## BB#7: ## %cond.load7 5646; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm0, %xmm0 5647; AVX512F-NEXT: LBB53_8: ## %else8 5648; AVX512F-NEXT: kshiftlw $11, %k1, %k0 5649; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5650; AVX512F-NEXT: kmovw %k0, %eax 5651; AVX512F-NEXT: testb %al, %al 5652; AVX512F-NEXT: je LBB53_10 5653; AVX512F-NEXT: ## BB#9: ## %cond.load10 5654; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm0, %xmm0 5655; AVX512F-NEXT: LBB53_10: ## %else11 5656; AVX512F-NEXT: kshiftlw $10, %k1, %k0 5657; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5658; AVX512F-NEXT: kmovw %k0, %eax 5659; AVX512F-NEXT: testb %al, %al 5660; AVX512F-NEXT: je LBB53_12 5661; AVX512F-NEXT: ## BB#11: ## %cond.load13 5662; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm0, %xmm0 5663; AVX512F-NEXT: LBB53_12: ## %else14 5664; AVX512F-NEXT: kshiftlw $9, %k1, %k0 5665; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5666; AVX512F-NEXT: kmovw %k0, %eax 5667; AVX512F-NEXT: testb %al, %al 5668; AVX512F-NEXT: je LBB53_14 5669; AVX512F-NEXT: ## BB#13: ## %cond.load16 5670; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm0, %xmm0 5671; AVX512F-NEXT: LBB53_14: ## %else17 5672; AVX512F-NEXT: kshiftlw $8, %k1, %k0 5673; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5674; AVX512F-NEXT: kmovw %k0, %eax 5675; AVX512F-NEXT: testb %al, %al 5676; AVX512F-NEXT: je LBB53_16 5677; AVX512F-NEXT: ## BB#15: ## %cond.load19 5678; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm0 5679; AVX512F-NEXT: LBB53_16: ## %else20 5680; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 5681; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z} 5682; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 5683; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 5684; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm2 5685; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 5686; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 5687; AVX512F-NEXT: retq 5688; 5689; SKX-LABEL: test_mask_load_8xi16: 5690; SKX: ## BB#0: 5691; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 5692; SKX-NEXT: vpmovw2m %xmm0, %k1 5693; SKX-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} 5694; SKX-NEXT: retq 5695 %res = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 5696 ret <8 x i16> %res 5697} 5698declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 5699 5700define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 5701; AVX1-LABEL: test_mask_load_16xi16: 5702; AVX1: ## BB#0: 5703; AVX1-NEXT: vpextrb $0, %xmm0, %eax 5704; AVX1-NEXT: ## implicit-def: %YMM1 5705; AVX1-NEXT: testb $1, %al 5706; AVX1-NEXT: je LBB54_2 5707; AVX1-NEXT: ## BB#1: ## %cond.load 5708; AVX1-NEXT: movzwl (%rdi), %eax 5709; AVX1-NEXT: vmovd %eax, %xmm1 5710; AVX1-NEXT: LBB54_2: ## %else 5711; AVX1-NEXT: vpextrb $1, %xmm0, %eax 5712; AVX1-NEXT: testb $1, %al 5713; AVX1-NEXT: je LBB54_4 5714; AVX1-NEXT: ## BB#3: ## %cond.load1 5715; AVX1-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm2 5716; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5717; AVX1-NEXT: LBB54_4: ## %else2 5718; AVX1-NEXT: vpextrb $2, %xmm0, %eax 5719; AVX1-NEXT: testb $1, %al 5720; AVX1-NEXT: je LBB54_6 5721; AVX1-NEXT: ## BB#5: ## %cond.load4 5722; AVX1-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm2 5723; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5724; AVX1-NEXT: LBB54_6: ## %else5 5725; AVX1-NEXT: vpextrb $3, %xmm0, %eax 5726; AVX1-NEXT: testb $1, %al 5727; AVX1-NEXT: je LBB54_8 5728; AVX1-NEXT: ## BB#7: ## %cond.load7 5729; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm2 5730; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5731; AVX1-NEXT: LBB54_8: ## %else8 5732; AVX1-NEXT: vpextrb $4, %xmm0, %eax 5733; AVX1-NEXT: testb $1, %al 5734; AVX1-NEXT: je LBB54_10 5735; AVX1-NEXT: ## BB#9: ## %cond.load10 5736; AVX1-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm2 5737; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5738; AVX1-NEXT: LBB54_10: ## %else11 5739; AVX1-NEXT: vpextrb $5, %xmm0, %eax 5740; AVX1-NEXT: testb $1, %al 5741; AVX1-NEXT: je LBB54_12 5742; AVX1-NEXT: ## BB#11: ## %cond.load13 5743; AVX1-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm2 5744; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5745; AVX1-NEXT: LBB54_12: ## %else14 5746; AVX1-NEXT: vpextrb $6, %xmm0, %eax 5747; AVX1-NEXT: testb $1, %al 5748; AVX1-NEXT: je LBB54_14 5749; AVX1-NEXT: ## BB#13: ## %cond.load16 5750; AVX1-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm2 5751; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5752; AVX1-NEXT: LBB54_14: ## %else17 5753; AVX1-NEXT: vpextrb $7, %xmm0, %eax 5754; AVX1-NEXT: testb $1, %al 5755; AVX1-NEXT: je LBB54_16 5756; AVX1-NEXT: ## BB#15: ## %cond.load19 5757; AVX1-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm2 5758; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5759; AVX1-NEXT: LBB54_16: ## %else20 5760; AVX1-NEXT: vpextrb $8, %xmm0, %eax 5761; AVX1-NEXT: testb $1, %al 5762; AVX1-NEXT: je LBB54_18 5763; AVX1-NEXT: ## BB#17: ## %cond.load22 5764; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5765; AVX1-NEXT: vpinsrw $0, 16(%rdi), %xmm2, %xmm2 5766; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5767; AVX1-NEXT: LBB54_18: ## %else23 5768; AVX1-NEXT: vpextrb $9, %xmm0, %eax 5769; AVX1-NEXT: testb $1, %al 5770; AVX1-NEXT: je LBB54_20 5771; AVX1-NEXT: ## BB#19: ## %cond.load25 5772; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5773; AVX1-NEXT: vpinsrw $1, 18(%rdi), %xmm2, %xmm2 5774; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5775; AVX1-NEXT: LBB54_20: ## %else26 5776; AVX1-NEXT: vpextrb $10, %xmm0, %eax 5777; AVX1-NEXT: testb $1, %al 5778; AVX1-NEXT: je LBB54_22 5779; AVX1-NEXT: ## BB#21: ## %cond.load28 5780; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5781; AVX1-NEXT: vpinsrw $2, 20(%rdi), %xmm2, %xmm2 5782; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5783; AVX1-NEXT: LBB54_22: ## %else29 5784; AVX1-NEXT: vpextrb $11, %xmm0, %eax 5785; AVX1-NEXT: testb $1, %al 5786; AVX1-NEXT: je LBB54_24 5787; AVX1-NEXT: ## BB#23: ## %cond.load31 5788; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5789; AVX1-NEXT: vpinsrw $3, 22(%rdi), %xmm2, %xmm2 5790; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5791; AVX1-NEXT: LBB54_24: ## %else32 5792; AVX1-NEXT: vpextrb $12, %xmm0, %eax 5793; AVX1-NEXT: testb $1, %al 5794; AVX1-NEXT: je LBB54_26 5795; AVX1-NEXT: ## BB#25: ## %cond.load34 5796; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5797; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm2, %xmm2 5798; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5799; AVX1-NEXT: LBB54_26: ## %else35 5800; AVX1-NEXT: vpextrb $13, %xmm0, %eax 5801; AVX1-NEXT: testb $1, %al 5802; AVX1-NEXT: je LBB54_28 5803; AVX1-NEXT: ## BB#27: ## %cond.load37 5804; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5805; AVX1-NEXT: vpinsrw $5, 26(%rdi), %xmm2, %xmm2 5806; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5807; AVX1-NEXT: LBB54_28: ## %else38 5808; AVX1-NEXT: vpextrb $14, %xmm0, %eax 5809; AVX1-NEXT: testb $1, %al 5810; AVX1-NEXT: je LBB54_30 5811; AVX1-NEXT: ## BB#29: ## %cond.load40 5812; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5813; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm2, %xmm2 5814; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5815; AVX1-NEXT: LBB54_30: ## %else41 5816; AVX1-NEXT: vpextrb $15, %xmm0, %eax 5817; AVX1-NEXT: testb $1, %al 5818; AVX1-NEXT: je LBB54_32 5819; AVX1-NEXT: ## BB#31: ## %cond.load43 5820; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5821; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm2, %xmm2 5822; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5823; AVX1-NEXT: LBB54_32: ## %else44 5824; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5825; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 5826; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2 5827; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 5828; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 5829; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 5830; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 5831; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 5832; AVX1-NEXT: retq 5833; 5834; AVX2-LABEL: test_mask_load_16xi16: 5835; AVX2: ## BB#0: 5836; AVX2-NEXT: vpextrb $0, %xmm0, %eax 5837; AVX2-NEXT: ## implicit-def: %YMM1 5838; AVX2-NEXT: testb $1, %al 5839; AVX2-NEXT: je LBB54_2 5840; AVX2-NEXT: ## BB#1: ## %cond.load 5841; AVX2-NEXT: movzwl (%rdi), %eax 5842; AVX2-NEXT: vmovd %eax, %xmm1 5843; AVX2-NEXT: LBB54_2: ## %else 5844; AVX2-NEXT: vpextrb $1, %xmm0, %eax 5845; AVX2-NEXT: testb $1, %al 5846; AVX2-NEXT: je LBB54_4 5847; AVX2-NEXT: ## BB#3: ## %cond.load1 5848; AVX2-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm2 5849; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5850; AVX2-NEXT: LBB54_4: ## %else2 5851; AVX2-NEXT: vpextrb $2, %xmm0, %eax 5852; AVX2-NEXT: testb $1, %al 5853; AVX2-NEXT: je LBB54_6 5854; AVX2-NEXT: ## BB#5: ## %cond.load4 5855; AVX2-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm2 5856; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5857; AVX2-NEXT: LBB54_6: ## %else5 5858; AVX2-NEXT: vpextrb $3, %xmm0, %eax 5859; AVX2-NEXT: testb $1, %al 5860; AVX2-NEXT: je LBB54_8 5861; AVX2-NEXT: ## BB#7: ## %cond.load7 5862; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm2 5863; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5864; AVX2-NEXT: LBB54_8: ## %else8 5865; AVX2-NEXT: vpextrb $4, %xmm0, %eax 5866; AVX2-NEXT: testb $1, %al 5867; AVX2-NEXT: je LBB54_10 5868; AVX2-NEXT: ## BB#9: ## %cond.load10 5869; AVX2-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm2 5870; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5871; AVX2-NEXT: LBB54_10: ## %else11 5872; AVX2-NEXT: vpextrb $5, %xmm0, %eax 5873; AVX2-NEXT: testb $1, %al 5874; AVX2-NEXT: je LBB54_12 5875; AVX2-NEXT: ## BB#11: ## %cond.load13 5876; AVX2-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm2 5877; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5878; AVX2-NEXT: LBB54_12: ## %else14 5879; AVX2-NEXT: vpextrb $6, %xmm0, %eax 5880; AVX2-NEXT: testb $1, %al 5881; AVX2-NEXT: je LBB54_14 5882; AVX2-NEXT: ## BB#13: ## %cond.load16 5883; AVX2-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm2 5884; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5885; AVX2-NEXT: LBB54_14: ## %else17 5886; AVX2-NEXT: vpextrb $7, %xmm0, %eax 5887; AVX2-NEXT: testb $1, %al 5888; AVX2-NEXT: je LBB54_16 5889; AVX2-NEXT: ## BB#15: ## %cond.load19 5890; AVX2-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm2 5891; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 5892; AVX2-NEXT: LBB54_16: ## %else20 5893; AVX2-NEXT: vpextrb $8, %xmm0, %eax 5894; AVX2-NEXT: testb $1, %al 5895; AVX2-NEXT: je LBB54_18 5896; AVX2-NEXT: ## BB#17: ## %cond.load22 5897; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5898; AVX2-NEXT: vpinsrw $0, 16(%rdi), %xmm2, %xmm2 5899; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5900; AVX2-NEXT: LBB54_18: ## %else23 5901; AVX2-NEXT: vpextrb $9, %xmm0, %eax 5902; AVX2-NEXT: testb $1, %al 5903; AVX2-NEXT: je LBB54_20 5904; AVX2-NEXT: ## BB#19: ## %cond.load25 5905; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5906; AVX2-NEXT: vpinsrw $1, 18(%rdi), %xmm2, %xmm2 5907; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5908; AVX2-NEXT: LBB54_20: ## %else26 5909; AVX2-NEXT: vpextrb $10, %xmm0, %eax 5910; AVX2-NEXT: testb $1, %al 5911; AVX2-NEXT: je LBB54_22 5912; AVX2-NEXT: ## BB#21: ## %cond.load28 5913; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5914; AVX2-NEXT: vpinsrw $2, 20(%rdi), %xmm2, %xmm2 5915; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5916; AVX2-NEXT: LBB54_22: ## %else29 5917; AVX2-NEXT: vpextrb $11, %xmm0, %eax 5918; AVX2-NEXT: testb $1, %al 5919; AVX2-NEXT: je LBB54_24 5920; AVX2-NEXT: ## BB#23: ## %cond.load31 5921; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5922; AVX2-NEXT: vpinsrw $3, 22(%rdi), %xmm2, %xmm2 5923; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5924; AVX2-NEXT: LBB54_24: ## %else32 5925; AVX2-NEXT: vpextrb $12, %xmm0, %eax 5926; AVX2-NEXT: testb $1, %al 5927; AVX2-NEXT: je LBB54_26 5928; AVX2-NEXT: ## BB#25: ## %cond.load34 5929; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5930; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm2, %xmm2 5931; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5932; AVX2-NEXT: LBB54_26: ## %else35 5933; AVX2-NEXT: vpextrb $13, %xmm0, %eax 5934; AVX2-NEXT: testb $1, %al 5935; AVX2-NEXT: je LBB54_28 5936; AVX2-NEXT: ## BB#27: ## %cond.load37 5937; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5938; AVX2-NEXT: vpinsrw $5, 26(%rdi), %xmm2, %xmm2 5939; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5940; AVX2-NEXT: LBB54_28: ## %else38 5941; AVX2-NEXT: vpextrb $14, %xmm0, %eax 5942; AVX2-NEXT: testb $1, %al 5943; AVX2-NEXT: je LBB54_30 5944; AVX2-NEXT: ## BB#29: ## %cond.load40 5945; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5946; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm2, %xmm2 5947; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5948; AVX2-NEXT: LBB54_30: ## %else41 5949; AVX2-NEXT: vpextrb $15, %xmm0, %eax 5950; AVX2-NEXT: testb $1, %al 5951; AVX2-NEXT: je LBB54_32 5952; AVX2-NEXT: ## BB#31: ## %cond.load43 5953; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 5954; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm2, %xmm2 5955; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 5956; AVX2-NEXT: LBB54_32: ## %else44 5957; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 5958; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 5959; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 5960; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 5961; AVX2-NEXT: retq 5962; 5963; AVX512F-LABEL: test_mask_load_16xi16: 5964; AVX512F: ## BB#0: 5965; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 5966; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 5967; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 5968; AVX512F-NEXT: kshiftlw $15, %k1, %k0 5969; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5970; AVX512F-NEXT: kmovw %k0, %eax 5971; AVX512F-NEXT: ## implicit-def: %YMM0 5972; AVX512F-NEXT: testb %al, %al 5973; AVX512F-NEXT: je LBB54_2 5974; AVX512F-NEXT: ## BB#1: ## %cond.load 5975; AVX512F-NEXT: movzwl (%rdi), %eax 5976; AVX512F-NEXT: vmovd %eax, %xmm0 5977; AVX512F-NEXT: LBB54_2: ## %else 5978; AVX512F-NEXT: kshiftlw $14, %k1, %k0 5979; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5980; AVX512F-NEXT: kmovw %k0, %eax 5981; AVX512F-NEXT: testb %al, %al 5982; AVX512F-NEXT: je LBB54_4 5983; AVX512F-NEXT: ## BB#3: ## %cond.load1 5984; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm0, %xmm1 5985; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 5986; AVX512F-NEXT: LBB54_4: ## %else2 5987; AVX512F-NEXT: kshiftlw $13, %k1, %k0 5988; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5989; AVX512F-NEXT: kmovw %k0, %eax 5990; AVX512F-NEXT: testb %al, %al 5991; AVX512F-NEXT: je LBB54_6 5992; AVX512F-NEXT: ## BB#5: ## %cond.load4 5993; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm0, %xmm1 5994; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 5995; AVX512F-NEXT: LBB54_6: ## %else5 5996; AVX512F-NEXT: kshiftlw $12, %k1, %k0 5997; AVX512F-NEXT: kshiftrw $15, %k0, %k0 5998; AVX512F-NEXT: kmovw %k0, %eax 5999; AVX512F-NEXT: testb %al, %al 6000; AVX512F-NEXT: je LBB54_8 6001; AVX512F-NEXT: ## BB#7: ## %cond.load7 6002; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm0, %xmm1 6003; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 6004; AVX512F-NEXT: LBB54_8: ## %else8 6005; AVX512F-NEXT: kshiftlw $11, %k1, %k0 6006; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6007; AVX512F-NEXT: kmovw %k0, %eax 6008; AVX512F-NEXT: testb %al, %al 6009; AVX512F-NEXT: je LBB54_10 6010; AVX512F-NEXT: ## BB#9: ## %cond.load10 6011; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm0, %xmm1 6012; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 6013; AVX512F-NEXT: LBB54_10: ## %else11 6014; AVX512F-NEXT: kshiftlw $10, %k1, %k0 6015; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6016; AVX512F-NEXT: kmovw %k0, %eax 6017; AVX512F-NEXT: testb %al, %al 6018; AVX512F-NEXT: je LBB54_12 6019; AVX512F-NEXT: ## BB#11: ## %cond.load13 6020; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm0, %xmm1 6021; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 6022; AVX512F-NEXT: LBB54_12: ## %else14 6023; AVX512F-NEXT: kshiftlw $9, %k1, %k0 6024; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6025; AVX512F-NEXT: kmovw %k0, %eax 6026; AVX512F-NEXT: testb %al, %al 6027; AVX512F-NEXT: je LBB54_14 6028; AVX512F-NEXT: ## BB#13: ## %cond.load16 6029; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm0, %xmm1 6030; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 6031; AVX512F-NEXT: LBB54_14: ## %else17 6032; AVX512F-NEXT: kshiftlw $8, %k1, %k0 6033; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6034; AVX512F-NEXT: kmovw %k0, %eax 6035; AVX512F-NEXT: testb %al, %al 6036; AVX512F-NEXT: je LBB54_16 6037; AVX512F-NEXT: ## BB#15: ## %cond.load19 6038; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm1 6039; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 6040; AVX512F-NEXT: LBB54_16: ## %else20 6041; AVX512F-NEXT: kshiftlw $7, %k1, %k0 6042; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6043; AVX512F-NEXT: kmovw %k0, %eax 6044; AVX512F-NEXT: testb %al, %al 6045; AVX512F-NEXT: je LBB54_18 6046; AVX512F-NEXT: ## BB#17: ## %cond.load22 6047; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6048; AVX512F-NEXT: vpinsrw $0, 16(%rdi), %xmm1, %xmm1 6049; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6050; AVX512F-NEXT: LBB54_18: ## %else23 6051; AVX512F-NEXT: kshiftlw $6, %k1, %k0 6052; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6053; AVX512F-NEXT: kmovw %k0, %eax 6054; AVX512F-NEXT: testb %al, %al 6055; AVX512F-NEXT: je LBB54_20 6056; AVX512F-NEXT: ## BB#19: ## %cond.load25 6057; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6058; AVX512F-NEXT: vpinsrw $1, 18(%rdi), %xmm1, %xmm1 6059; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6060; AVX512F-NEXT: LBB54_20: ## %else26 6061; AVX512F-NEXT: kshiftlw $5, %k1, %k0 6062; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6063; AVX512F-NEXT: kmovw %k0, %eax 6064; AVX512F-NEXT: testb %al, %al 6065; AVX512F-NEXT: je LBB54_22 6066; AVX512F-NEXT: ## BB#21: ## %cond.load28 6067; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6068; AVX512F-NEXT: vpinsrw $2, 20(%rdi), %xmm1, %xmm1 6069; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6070; AVX512F-NEXT: LBB54_22: ## %else29 6071; AVX512F-NEXT: kshiftlw $4, %k1, %k0 6072; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6073; AVX512F-NEXT: kmovw %k0, %eax 6074; AVX512F-NEXT: testb %al, %al 6075; AVX512F-NEXT: je LBB54_24 6076; AVX512F-NEXT: ## BB#23: ## %cond.load31 6077; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6078; AVX512F-NEXT: vpinsrw $3, 22(%rdi), %xmm1, %xmm1 6079; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6080; AVX512F-NEXT: LBB54_24: ## %else32 6081; AVX512F-NEXT: kshiftlw $3, %k1, %k0 6082; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6083; AVX512F-NEXT: kmovw %k0, %eax 6084; AVX512F-NEXT: testb %al, %al 6085; AVX512F-NEXT: je LBB54_26 6086; AVX512F-NEXT: ## BB#25: ## %cond.load34 6087; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6088; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm1, %xmm1 6089; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6090; AVX512F-NEXT: LBB54_26: ## %else35 6091; AVX512F-NEXT: kshiftlw $2, %k1, %k0 6092; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6093; AVX512F-NEXT: kmovw %k0, %eax 6094; AVX512F-NEXT: testb %al, %al 6095; AVX512F-NEXT: je LBB54_28 6096; AVX512F-NEXT: ## BB#27: ## %cond.load37 6097; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6098; AVX512F-NEXT: vpinsrw $5, 26(%rdi), %xmm1, %xmm1 6099; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6100; AVX512F-NEXT: LBB54_28: ## %else38 6101; AVX512F-NEXT: kshiftlw $1, %k1, %k0 6102; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6103; AVX512F-NEXT: kmovw %k0, %eax 6104; AVX512F-NEXT: testb %al, %al 6105; AVX512F-NEXT: je LBB54_30 6106; AVX512F-NEXT: ## BB#29: ## %cond.load40 6107; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6108; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1 6109; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6110; AVX512F-NEXT: LBB54_30: ## %else41 6111; AVX512F-NEXT: kshiftlw $0, %k1, %k0 6112; AVX512F-NEXT: kshiftrw $15, %k0, %k0 6113; AVX512F-NEXT: kmovw %k0, %eax 6114; AVX512F-NEXT: testb %al, %al 6115; AVX512F-NEXT: je LBB54_32 6116; AVX512F-NEXT: ## BB#31: ## %cond.load43 6117; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6118; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1 6119; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 6120; AVX512F-NEXT: LBB54_32: ## %else44 6121; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 6122; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 6123; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 6124; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 6125; AVX512F-NEXT: retq 6126; 6127; SKX-LABEL: test_mask_load_16xi16: 6128; SKX: ## BB#0: 6129; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 6130; SKX-NEXT: vpmovb2m %xmm0, %k1 6131; SKX-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} 6132; SKX-NEXT: retq 6133 %res = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 6134 ret <16 x i16> %res 6135} 6136declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 6137 6138define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { 6139; AVX1-LABEL: test_mask_load_32xi16: 6140; AVX1: ## BB#0: 6141; AVX1-NEXT: vpextrb $0, %xmm0, %eax 6142; AVX1-NEXT: testb $1, %al 6143; AVX1-NEXT: je LBB55_2 6144; AVX1-NEXT: ## BB#1: ## %cond.load 6145; AVX1-NEXT: movzwl (%rdi), %eax 6146; AVX1-NEXT: vmovd %eax, %xmm3 6147; AVX1-NEXT: LBB55_2: ## %else 6148; AVX1-NEXT: vpextrb $1, %xmm0, %eax 6149; AVX1-NEXT: testb $1, %al 6150; AVX1-NEXT: je LBB55_4 6151; AVX1-NEXT: ## BB#3: ## %cond.load1 6152; AVX1-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 6153; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6154; AVX1-NEXT: LBB55_4: ## %else2 6155; AVX1-NEXT: vpextrb $2, %xmm0, %eax 6156; AVX1-NEXT: testb $1, %al 6157; AVX1-NEXT: je LBB55_6 6158; AVX1-NEXT: ## BB#5: ## %cond.load4 6159; AVX1-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 6160; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6161; AVX1-NEXT: LBB55_6: ## %else5 6162; AVX1-NEXT: vpextrb $3, %xmm0, %eax 6163; AVX1-NEXT: testb $1, %al 6164; AVX1-NEXT: je LBB55_8 6165; AVX1-NEXT: ## BB#7: ## %cond.load7 6166; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 6167; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6168; AVX1-NEXT: LBB55_8: ## %else8 6169; AVX1-NEXT: vpextrb $4, %xmm0, %eax 6170; AVX1-NEXT: testb $1, %al 6171; AVX1-NEXT: je LBB55_10 6172; AVX1-NEXT: ## BB#9: ## %cond.load10 6173; AVX1-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 6174; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6175; AVX1-NEXT: LBB55_10: ## %else11 6176; AVX1-NEXT: vpextrb $5, %xmm0, %eax 6177; AVX1-NEXT: testb $1, %al 6178; AVX1-NEXT: je LBB55_12 6179; AVX1-NEXT: ## BB#11: ## %cond.load13 6180; AVX1-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 6181; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6182; AVX1-NEXT: LBB55_12: ## %else14 6183; AVX1-NEXT: vpextrb $6, %xmm0, %eax 6184; AVX1-NEXT: testb $1, %al 6185; AVX1-NEXT: je LBB55_14 6186; AVX1-NEXT: ## BB#13: ## %cond.load16 6187; AVX1-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 6188; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6189; AVX1-NEXT: LBB55_14: ## %else17 6190; AVX1-NEXT: vpextrb $7, %xmm0, %eax 6191; AVX1-NEXT: testb $1, %al 6192; AVX1-NEXT: je LBB55_16 6193; AVX1-NEXT: ## BB#15: ## %cond.load19 6194; AVX1-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 6195; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6196; AVX1-NEXT: LBB55_16: ## %else20 6197; AVX1-NEXT: vpextrb $8, %xmm0, %eax 6198; AVX1-NEXT: testb $1, %al 6199; AVX1-NEXT: je LBB55_18 6200; AVX1-NEXT: ## BB#17: ## %cond.load22 6201; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6202; AVX1-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 6203; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6204; AVX1-NEXT: LBB55_18: ## %else23 6205; AVX1-NEXT: vpextrb $9, %xmm0, %eax 6206; AVX1-NEXT: testb $1, %al 6207; AVX1-NEXT: je LBB55_20 6208; AVX1-NEXT: ## BB#19: ## %cond.load25 6209; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6210; AVX1-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 6211; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6212; AVX1-NEXT: LBB55_20: ## %else26 6213; AVX1-NEXT: vpextrb $10, %xmm0, %eax 6214; AVX1-NEXT: testb $1, %al 6215; AVX1-NEXT: je LBB55_22 6216; AVX1-NEXT: ## BB#21: ## %cond.load28 6217; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6218; AVX1-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 6219; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6220; AVX1-NEXT: LBB55_22: ## %else29 6221; AVX1-NEXT: vpextrb $11, %xmm0, %eax 6222; AVX1-NEXT: testb $1, %al 6223; AVX1-NEXT: je LBB55_24 6224; AVX1-NEXT: ## BB#23: ## %cond.load31 6225; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6226; AVX1-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 6227; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6228; AVX1-NEXT: LBB55_24: ## %else32 6229; AVX1-NEXT: vpextrb $12, %xmm0, %eax 6230; AVX1-NEXT: testb $1, %al 6231; AVX1-NEXT: je LBB55_26 6232; AVX1-NEXT: ## BB#25: ## %cond.load34 6233; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6234; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 6235; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6236; AVX1-NEXT: LBB55_26: ## %else35 6237; AVX1-NEXT: vpextrb $13, %xmm0, %eax 6238; AVX1-NEXT: testb $1, %al 6239; AVX1-NEXT: je LBB55_28 6240; AVX1-NEXT: ## BB#27: ## %cond.load37 6241; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6242; AVX1-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 6243; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6244; AVX1-NEXT: LBB55_28: ## %else38 6245; AVX1-NEXT: vpextrb $14, %xmm0, %eax 6246; AVX1-NEXT: testb $1, %al 6247; AVX1-NEXT: je LBB55_30 6248; AVX1-NEXT: ## BB#29: ## %cond.load40 6249; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6250; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 6251; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6252; AVX1-NEXT: LBB55_30: ## %else41 6253; AVX1-NEXT: vpextrb $15, %xmm0, %eax 6254; AVX1-NEXT: testb $1, %al 6255; AVX1-NEXT: je LBB55_32 6256; AVX1-NEXT: ## BB#31: ## %cond.load43 6257; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 6258; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 6259; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 6260; AVX1-NEXT: LBB55_32: ## %else44 6261; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 6262; AVX1-NEXT: vpextrb $0, %xmm4, %eax 6263; AVX1-NEXT: testb $1, %al 6264; AVX1-NEXT: je LBB55_34 6265; AVX1-NEXT: ## BB#33: ## %cond.load46 6266; AVX1-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 6267; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] 6268; AVX1-NEXT: LBB55_34: ## %else47 6269; AVX1-NEXT: vpextrb $1, %xmm4, %eax 6270; AVX1-NEXT: testb $1, %al 6271; AVX1-NEXT: je LBB55_36 6272; AVX1-NEXT: ## BB#35: ## %cond.load49 6273; AVX1-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 6274; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6275; AVX1-NEXT: LBB55_36: ## %else50 6276; AVX1-NEXT: vpextrb $2, %xmm4, %eax 6277; AVX1-NEXT: testb $1, %al 6278; AVX1-NEXT: je LBB55_38 6279; AVX1-NEXT: ## BB#37: ## %cond.load52 6280; AVX1-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 6281; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6282; AVX1-NEXT: LBB55_38: ## %else53 6283; AVX1-NEXT: vpextrb $3, %xmm4, %eax 6284; AVX1-NEXT: testb $1, %al 6285; AVX1-NEXT: je LBB55_40 6286; AVX1-NEXT: ## BB#39: ## %cond.load55 6287; AVX1-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 6288; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6289; AVX1-NEXT: LBB55_40: ## %else56 6290; AVX1-NEXT: vpextrb $4, %xmm4, %eax 6291; AVX1-NEXT: testb $1, %al 6292; AVX1-NEXT: je LBB55_42 6293; AVX1-NEXT: ## BB#41: ## %cond.load58 6294; AVX1-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 6295; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6296; AVX1-NEXT: LBB55_42: ## %else59 6297; AVX1-NEXT: vpextrb $5, %xmm4, %eax 6298; AVX1-NEXT: testb $1, %al 6299; AVX1-NEXT: je LBB55_44 6300; AVX1-NEXT: ## BB#43: ## %cond.load61 6301; AVX1-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 6302; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6303; AVX1-NEXT: LBB55_44: ## %else62 6304; AVX1-NEXT: vpextrb $6, %xmm4, %eax 6305; AVX1-NEXT: testb $1, %al 6306; AVX1-NEXT: je LBB55_46 6307; AVX1-NEXT: ## BB#45: ## %cond.load64 6308; AVX1-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 6309; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6310; AVX1-NEXT: LBB55_46: ## %else65 6311; AVX1-NEXT: vpextrb $7, %xmm4, %eax 6312; AVX1-NEXT: testb $1, %al 6313; AVX1-NEXT: je LBB55_48 6314; AVX1-NEXT: ## BB#47: ## %cond.load67 6315; AVX1-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 6316; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6317; AVX1-NEXT: LBB55_48: ## %else68 6318; AVX1-NEXT: vpextrb $8, %xmm4, %eax 6319; AVX1-NEXT: testb $1, %al 6320; AVX1-NEXT: je LBB55_50 6321; AVX1-NEXT: ## BB#49: ## %cond.load70 6322; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6323; AVX1-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 6324; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6325; AVX1-NEXT: LBB55_50: ## %else71 6326; AVX1-NEXT: vpextrb $9, %xmm4, %eax 6327; AVX1-NEXT: testb $1, %al 6328; AVX1-NEXT: je LBB55_52 6329; AVX1-NEXT: ## BB#51: ## %cond.load73 6330; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6331; AVX1-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 6332; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6333; AVX1-NEXT: LBB55_52: ## %else74 6334; AVX1-NEXT: vpextrb $10, %xmm4, %eax 6335; AVX1-NEXT: testb $1, %al 6336; AVX1-NEXT: je LBB55_54 6337; AVX1-NEXT: ## BB#53: ## %cond.load76 6338; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6339; AVX1-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 6340; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6341; AVX1-NEXT: LBB55_54: ## %else77 6342; AVX1-NEXT: vpextrb $11, %xmm4, %eax 6343; AVX1-NEXT: testb $1, %al 6344; AVX1-NEXT: je LBB55_56 6345; AVX1-NEXT: ## BB#55: ## %cond.load79 6346; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6347; AVX1-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 6348; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6349; AVX1-NEXT: LBB55_56: ## %else80 6350; AVX1-NEXT: vpextrb $12, %xmm4, %eax 6351; AVX1-NEXT: testb $1, %al 6352; AVX1-NEXT: je LBB55_58 6353; AVX1-NEXT: ## BB#57: ## %cond.load82 6354; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6355; AVX1-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 6356; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6357; AVX1-NEXT: LBB55_58: ## %else83 6358; AVX1-NEXT: vpextrb $13, %xmm4, %eax 6359; AVX1-NEXT: testb $1, %al 6360; AVX1-NEXT: je LBB55_60 6361; AVX1-NEXT: ## BB#59: ## %cond.load85 6362; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6363; AVX1-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 6364; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6365; AVX1-NEXT: LBB55_60: ## %else86 6366; AVX1-NEXT: vpextrb $14, %xmm4, %eax 6367; AVX1-NEXT: testb $1, %al 6368; AVX1-NEXT: je LBB55_62 6369; AVX1-NEXT: ## BB#61: ## %cond.load88 6370; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6371; AVX1-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 6372; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6373; AVX1-NEXT: LBB55_62: ## %else89 6374; AVX1-NEXT: vpextrb $15, %xmm4, %eax 6375; AVX1-NEXT: testb $1, %al 6376; AVX1-NEXT: je LBB55_64 6377; AVX1-NEXT: ## BB#63: ## %cond.load91 6378; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 6379; AVX1-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 6380; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 6381; AVX1-NEXT: LBB55_64: ## %else92 6382; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6383; AVX1-NEXT: vpsllw $15, %xmm6, %xmm6 6384; AVX1-NEXT: vpsraw $15, %xmm6, %xmm6 6385; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 6386; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 6387; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 6388; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0 6389; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 6390; AVX1-NEXT: vandps %ymm0, %ymm3, %ymm0 6391; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 6392; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero 6393; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 6394; AVX1-NEXT: vpsraw $15, %xmm1, %xmm1 6395; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 6396; AVX1-NEXT: vpsllw $15, %xmm3, %xmm3 6397; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 6398; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 6399; AVX1-NEXT: vandnps %ymm2, %ymm1, %ymm2 6400; AVX1-NEXT: vandps %ymm1, %ymm5, %ymm1 6401; AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 6402; AVX1-NEXT: retq 6403; 6404; AVX2-LABEL: test_mask_load_32xi16: 6405; AVX2: ## BB#0: 6406; AVX2-NEXT: vpextrb $0, %xmm0, %eax 6407; AVX2-NEXT: testb $1, %al 6408; AVX2-NEXT: je LBB55_2 6409; AVX2-NEXT: ## BB#1: ## %cond.load 6410; AVX2-NEXT: movzwl (%rdi), %eax 6411; AVX2-NEXT: vmovd %eax, %xmm3 6412; AVX2-NEXT: LBB55_2: ## %else 6413; AVX2-NEXT: vpextrb $1, %xmm0, %eax 6414; AVX2-NEXT: testb $1, %al 6415; AVX2-NEXT: je LBB55_4 6416; AVX2-NEXT: ## BB#3: ## %cond.load1 6417; AVX2-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 6418; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6419; AVX2-NEXT: LBB55_4: ## %else2 6420; AVX2-NEXT: vpextrb $2, %xmm0, %eax 6421; AVX2-NEXT: testb $1, %al 6422; AVX2-NEXT: je LBB55_6 6423; AVX2-NEXT: ## BB#5: ## %cond.load4 6424; AVX2-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 6425; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6426; AVX2-NEXT: LBB55_6: ## %else5 6427; AVX2-NEXT: vpextrb $3, %xmm0, %eax 6428; AVX2-NEXT: testb $1, %al 6429; AVX2-NEXT: je LBB55_8 6430; AVX2-NEXT: ## BB#7: ## %cond.load7 6431; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 6432; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6433; AVX2-NEXT: LBB55_8: ## %else8 6434; AVX2-NEXT: vpextrb $4, %xmm0, %eax 6435; AVX2-NEXT: testb $1, %al 6436; AVX2-NEXT: je LBB55_10 6437; AVX2-NEXT: ## BB#9: ## %cond.load10 6438; AVX2-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 6439; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6440; AVX2-NEXT: LBB55_10: ## %else11 6441; AVX2-NEXT: vpextrb $5, %xmm0, %eax 6442; AVX2-NEXT: testb $1, %al 6443; AVX2-NEXT: je LBB55_12 6444; AVX2-NEXT: ## BB#11: ## %cond.load13 6445; AVX2-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 6446; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6447; AVX2-NEXT: LBB55_12: ## %else14 6448; AVX2-NEXT: vpextrb $6, %xmm0, %eax 6449; AVX2-NEXT: testb $1, %al 6450; AVX2-NEXT: je LBB55_14 6451; AVX2-NEXT: ## BB#13: ## %cond.load16 6452; AVX2-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 6453; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6454; AVX2-NEXT: LBB55_14: ## %else17 6455; AVX2-NEXT: vpextrb $7, %xmm0, %eax 6456; AVX2-NEXT: testb $1, %al 6457; AVX2-NEXT: je LBB55_16 6458; AVX2-NEXT: ## BB#15: ## %cond.load19 6459; AVX2-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 6460; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6461; AVX2-NEXT: LBB55_16: ## %else20 6462; AVX2-NEXT: vpextrb $8, %xmm0, %eax 6463; AVX2-NEXT: testb $1, %al 6464; AVX2-NEXT: je LBB55_18 6465; AVX2-NEXT: ## BB#17: ## %cond.load22 6466; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6467; AVX2-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 6468; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6469; AVX2-NEXT: LBB55_18: ## %else23 6470; AVX2-NEXT: vpextrb $9, %xmm0, %eax 6471; AVX2-NEXT: testb $1, %al 6472; AVX2-NEXT: je LBB55_20 6473; AVX2-NEXT: ## BB#19: ## %cond.load25 6474; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6475; AVX2-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 6476; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6477; AVX2-NEXT: LBB55_20: ## %else26 6478; AVX2-NEXT: vpextrb $10, %xmm0, %eax 6479; AVX2-NEXT: testb $1, %al 6480; AVX2-NEXT: je LBB55_22 6481; AVX2-NEXT: ## BB#21: ## %cond.load28 6482; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6483; AVX2-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 6484; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6485; AVX2-NEXT: LBB55_22: ## %else29 6486; AVX2-NEXT: vpextrb $11, %xmm0, %eax 6487; AVX2-NEXT: testb $1, %al 6488; AVX2-NEXT: je LBB55_24 6489; AVX2-NEXT: ## BB#23: ## %cond.load31 6490; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6491; AVX2-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 6492; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6493; AVX2-NEXT: LBB55_24: ## %else32 6494; AVX2-NEXT: vpextrb $12, %xmm0, %eax 6495; AVX2-NEXT: testb $1, %al 6496; AVX2-NEXT: je LBB55_26 6497; AVX2-NEXT: ## BB#25: ## %cond.load34 6498; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6499; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 6500; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6501; AVX2-NEXT: LBB55_26: ## %else35 6502; AVX2-NEXT: vpextrb $13, %xmm0, %eax 6503; AVX2-NEXT: testb $1, %al 6504; AVX2-NEXT: je LBB55_28 6505; AVX2-NEXT: ## BB#27: ## %cond.load37 6506; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6507; AVX2-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 6508; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6509; AVX2-NEXT: LBB55_28: ## %else38 6510; AVX2-NEXT: vpextrb $14, %xmm0, %eax 6511; AVX2-NEXT: testb $1, %al 6512; AVX2-NEXT: je LBB55_30 6513; AVX2-NEXT: ## BB#29: ## %cond.load40 6514; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6515; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 6516; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6517; AVX2-NEXT: LBB55_30: ## %else41 6518; AVX2-NEXT: vpextrb $15, %xmm0, %eax 6519; AVX2-NEXT: testb $1, %al 6520; AVX2-NEXT: je LBB55_32 6521; AVX2-NEXT: ## BB#31: ## %cond.load43 6522; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 6523; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 6524; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6525; AVX2-NEXT: LBB55_32: ## %else44 6526; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 6527; AVX2-NEXT: vpextrb $0, %xmm4, %eax 6528; AVX2-NEXT: testb $1, %al 6529; AVX2-NEXT: je LBB55_34 6530; AVX2-NEXT: ## BB#33: ## %cond.load46 6531; AVX2-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 6532; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] 6533; AVX2-NEXT: LBB55_34: ## %else47 6534; AVX2-NEXT: vpextrb $1, %xmm4, %eax 6535; AVX2-NEXT: testb $1, %al 6536; AVX2-NEXT: je LBB55_36 6537; AVX2-NEXT: ## BB#35: ## %cond.load49 6538; AVX2-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 6539; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6540; AVX2-NEXT: LBB55_36: ## %else50 6541; AVX2-NEXT: vpextrb $2, %xmm4, %eax 6542; AVX2-NEXT: testb $1, %al 6543; AVX2-NEXT: je LBB55_38 6544; AVX2-NEXT: ## BB#37: ## %cond.load52 6545; AVX2-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 6546; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6547; AVX2-NEXT: LBB55_38: ## %else53 6548; AVX2-NEXT: vpextrb $3, %xmm4, %eax 6549; AVX2-NEXT: testb $1, %al 6550; AVX2-NEXT: je LBB55_40 6551; AVX2-NEXT: ## BB#39: ## %cond.load55 6552; AVX2-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 6553; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6554; AVX2-NEXT: LBB55_40: ## %else56 6555; AVX2-NEXT: vpextrb $4, %xmm4, %eax 6556; AVX2-NEXT: testb $1, %al 6557; AVX2-NEXT: je LBB55_42 6558; AVX2-NEXT: ## BB#41: ## %cond.load58 6559; AVX2-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 6560; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6561; AVX2-NEXT: LBB55_42: ## %else59 6562; AVX2-NEXT: vpextrb $5, %xmm4, %eax 6563; AVX2-NEXT: testb $1, %al 6564; AVX2-NEXT: je LBB55_44 6565; AVX2-NEXT: ## BB#43: ## %cond.load61 6566; AVX2-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 6567; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6568; AVX2-NEXT: LBB55_44: ## %else62 6569; AVX2-NEXT: vpextrb $6, %xmm4, %eax 6570; AVX2-NEXT: testb $1, %al 6571; AVX2-NEXT: je LBB55_46 6572; AVX2-NEXT: ## BB#45: ## %cond.load64 6573; AVX2-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 6574; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6575; AVX2-NEXT: LBB55_46: ## %else65 6576; AVX2-NEXT: vpextrb $7, %xmm4, %eax 6577; AVX2-NEXT: testb $1, %al 6578; AVX2-NEXT: je LBB55_48 6579; AVX2-NEXT: ## BB#47: ## %cond.load67 6580; AVX2-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 6581; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6582; AVX2-NEXT: LBB55_48: ## %else68 6583; AVX2-NEXT: vpextrb $8, %xmm4, %eax 6584; AVX2-NEXT: testb $1, %al 6585; AVX2-NEXT: je LBB55_50 6586; AVX2-NEXT: ## BB#49: ## %cond.load70 6587; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6588; AVX2-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 6589; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6590; AVX2-NEXT: LBB55_50: ## %else71 6591; AVX2-NEXT: vpextrb $9, %xmm4, %eax 6592; AVX2-NEXT: testb $1, %al 6593; AVX2-NEXT: je LBB55_52 6594; AVX2-NEXT: ## BB#51: ## %cond.load73 6595; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6596; AVX2-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 6597; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6598; AVX2-NEXT: LBB55_52: ## %else74 6599; AVX2-NEXT: vpextrb $10, %xmm4, %eax 6600; AVX2-NEXT: testb $1, %al 6601; AVX2-NEXT: je LBB55_54 6602; AVX2-NEXT: ## BB#53: ## %cond.load76 6603; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6604; AVX2-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 6605; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6606; AVX2-NEXT: LBB55_54: ## %else77 6607; AVX2-NEXT: vpextrb $11, %xmm4, %eax 6608; AVX2-NEXT: testb $1, %al 6609; AVX2-NEXT: je LBB55_56 6610; AVX2-NEXT: ## BB#55: ## %cond.load79 6611; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6612; AVX2-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 6613; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6614; AVX2-NEXT: LBB55_56: ## %else80 6615; AVX2-NEXT: vpextrb $12, %xmm4, %eax 6616; AVX2-NEXT: testb $1, %al 6617; AVX2-NEXT: je LBB55_58 6618; AVX2-NEXT: ## BB#57: ## %cond.load82 6619; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6620; AVX2-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 6621; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6622; AVX2-NEXT: LBB55_58: ## %else83 6623; AVX2-NEXT: vpextrb $13, %xmm4, %eax 6624; AVX2-NEXT: testb $1, %al 6625; AVX2-NEXT: je LBB55_60 6626; AVX2-NEXT: ## BB#59: ## %cond.load85 6627; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6628; AVX2-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 6629; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6630; AVX2-NEXT: LBB55_60: ## %else86 6631; AVX2-NEXT: vpextrb $14, %xmm4, %eax 6632; AVX2-NEXT: testb $1, %al 6633; AVX2-NEXT: je LBB55_62 6634; AVX2-NEXT: ## BB#61: ## %cond.load88 6635; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6636; AVX2-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 6637; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6638; AVX2-NEXT: LBB55_62: ## %else89 6639; AVX2-NEXT: vpextrb $15, %xmm4, %eax 6640; AVX2-NEXT: testb $1, %al 6641; AVX2-NEXT: je LBB55_64 6642; AVX2-NEXT: ## BB#63: ## %cond.load91 6643; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 6644; AVX2-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 6645; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6646; AVX2-NEXT: LBB55_64: ## %else92 6647; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 6648; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 6649; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 6650; AVX2-NEXT: vpblendvb %ymm0, %ymm3, %ymm1, %ymm0 6651; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 6652; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1 6653; AVX2-NEXT: vpsraw $15, %ymm1, %ymm1 6654; AVX2-NEXT: vpblendvb %ymm1, %ymm5, %ymm2, %ymm1 6655; AVX2-NEXT: retq 6656; 6657; AVX512F-LABEL: test_mask_load_32xi16: 6658; AVX512F: ## BB#0: 6659; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 6660; AVX512F-NEXT: testb $1, %al 6661; AVX512F-NEXT: je LBB55_2 6662; AVX512F-NEXT: ## BB#1: ## %cond.load 6663; AVX512F-NEXT: movzwl (%rdi), %eax 6664; AVX512F-NEXT: vmovd %eax, %xmm3 6665; AVX512F-NEXT: LBB55_2: ## %else 6666; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 6667; AVX512F-NEXT: testb $1, %al 6668; AVX512F-NEXT: je LBB55_4 6669; AVX512F-NEXT: ## BB#3: ## %cond.load1 6670; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 6671; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6672; AVX512F-NEXT: LBB55_4: ## %else2 6673; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 6674; AVX512F-NEXT: testb $1, %al 6675; AVX512F-NEXT: je LBB55_6 6676; AVX512F-NEXT: ## BB#5: ## %cond.load4 6677; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 6678; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6679; AVX512F-NEXT: LBB55_6: ## %else5 6680; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 6681; AVX512F-NEXT: testb $1, %al 6682; AVX512F-NEXT: je LBB55_8 6683; AVX512F-NEXT: ## BB#7: ## %cond.load7 6684; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 6685; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6686; AVX512F-NEXT: LBB55_8: ## %else8 6687; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 6688; AVX512F-NEXT: testb $1, %al 6689; AVX512F-NEXT: je LBB55_10 6690; AVX512F-NEXT: ## BB#9: ## %cond.load10 6691; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 6692; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6693; AVX512F-NEXT: LBB55_10: ## %else11 6694; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 6695; AVX512F-NEXT: testb $1, %al 6696; AVX512F-NEXT: je LBB55_12 6697; AVX512F-NEXT: ## BB#11: ## %cond.load13 6698; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 6699; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6700; AVX512F-NEXT: LBB55_12: ## %else14 6701; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 6702; AVX512F-NEXT: testb $1, %al 6703; AVX512F-NEXT: je LBB55_14 6704; AVX512F-NEXT: ## BB#13: ## %cond.load16 6705; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 6706; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6707; AVX512F-NEXT: LBB55_14: ## %else17 6708; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 6709; AVX512F-NEXT: testb $1, %al 6710; AVX512F-NEXT: je LBB55_16 6711; AVX512F-NEXT: ## BB#15: ## %cond.load19 6712; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 6713; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] 6714; AVX512F-NEXT: LBB55_16: ## %else20 6715; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 6716; AVX512F-NEXT: testb $1, %al 6717; AVX512F-NEXT: je LBB55_18 6718; AVX512F-NEXT: ## BB#17: ## %cond.load22 6719; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6720; AVX512F-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 6721; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6722; AVX512F-NEXT: LBB55_18: ## %else23 6723; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 6724; AVX512F-NEXT: testb $1, %al 6725; AVX512F-NEXT: je LBB55_20 6726; AVX512F-NEXT: ## BB#19: ## %cond.load25 6727; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6728; AVX512F-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 6729; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6730; AVX512F-NEXT: LBB55_20: ## %else26 6731; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 6732; AVX512F-NEXT: testb $1, %al 6733; AVX512F-NEXT: je LBB55_22 6734; AVX512F-NEXT: ## BB#21: ## %cond.load28 6735; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6736; AVX512F-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 6737; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6738; AVX512F-NEXT: LBB55_22: ## %else29 6739; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 6740; AVX512F-NEXT: testb $1, %al 6741; AVX512F-NEXT: je LBB55_24 6742; AVX512F-NEXT: ## BB#23: ## %cond.load31 6743; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6744; AVX512F-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 6745; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6746; AVX512F-NEXT: LBB55_24: ## %else32 6747; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 6748; AVX512F-NEXT: testb $1, %al 6749; AVX512F-NEXT: je LBB55_26 6750; AVX512F-NEXT: ## BB#25: ## %cond.load34 6751; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6752; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 6753; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6754; AVX512F-NEXT: LBB55_26: ## %else35 6755; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 6756; AVX512F-NEXT: testb $1, %al 6757; AVX512F-NEXT: je LBB55_28 6758; AVX512F-NEXT: ## BB#27: ## %cond.load37 6759; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6760; AVX512F-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 6761; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6762; AVX512F-NEXT: LBB55_28: ## %else38 6763; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 6764; AVX512F-NEXT: testb $1, %al 6765; AVX512F-NEXT: je LBB55_30 6766; AVX512F-NEXT: ## BB#29: ## %cond.load40 6767; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6768; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 6769; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6770; AVX512F-NEXT: LBB55_30: ## %else41 6771; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 6772; AVX512F-NEXT: testb $1, %al 6773; AVX512F-NEXT: je LBB55_32 6774; AVX512F-NEXT: ## BB#31: ## %cond.load43 6775; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 6776; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 6777; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 6778; AVX512F-NEXT: LBB55_32: ## %else44 6779; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 6780; AVX512F-NEXT: vpextrb $0, %xmm4, %eax 6781; AVX512F-NEXT: testb $1, %al 6782; AVX512F-NEXT: je LBB55_34 6783; AVX512F-NEXT: ## BB#33: ## %cond.load46 6784; AVX512F-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 6785; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] 6786; AVX512F-NEXT: LBB55_34: ## %else47 6787; AVX512F-NEXT: vpextrb $1, %xmm4, %eax 6788; AVX512F-NEXT: testb $1, %al 6789; AVX512F-NEXT: je LBB55_36 6790; AVX512F-NEXT: ## BB#35: ## %cond.load49 6791; AVX512F-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 6792; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6793; AVX512F-NEXT: LBB55_36: ## %else50 6794; AVX512F-NEXT: vpextrb $2, %xmm4, %eax 6795; AVX512F-NEXT: testb $1, %al 6796; AVX512F-NEXT: je LBB55_38 6797; AVX512F-NEXT: ## BB#37: ## %cond.load52 6798; AVX512F-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 6799; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6800; AVX512F-NEXT: LBB55_38: ## %else53 6801; AVX512F-NEXT: vpextrb $3, %xmm4, %eax 6802; AVX512F-NEXT: testb $1, %al 6803; AVX512F-NEXT: je LBB55_40 6804; AVX512F-NEXT: ## BB#39: ## %cond.load55 6805; AVX512F-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 6806; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6807; AVX512F-NEXT: LBB55_40: ## %else56 6808; AVX512F-NEXT: vpextrb $4, %xmm4, %eax 6809; AVX512F-NEXT: testb $1, %al 6810; AVX512F-NEXT: je LBB55_42 6811; AVX512F-NEXT: ## BB#41: ## %cond.load58 6812; AVX512F-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 6813; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6814; AVX512F-NEXT: LBB55_42: ## %else59 6815; AVX512F-NEXT: vpextrb $5, %xmm4, %eax 6816; AVX512F-NEXT: testb $1, %al 6817; AVX512F-NEXT: je LBB55_44 6818; AVX512F-NEXT: ## BB#43: ## %cond.load61 6819; AVX512F-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 6820; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6821; AVX512F-NEXT: LBB55_44: ## %else62 6822; AVX512F-NEXT: vpextrb $6, %xmm4, %eax 6823; AVX512F-NEXT: testb $1, %al 6824; AVX512F-NEXT: je LBB55_46 6825; AVX512F-NEXT: ## BB#45: ## %cond.load64 6826; AVX512F-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 6827; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6828; AVX512F-NEXT: LBB55_46: ## %else65 6829; AVX512F-NEXT: vpextrb $7, %xmm4, %eax 6830; AVX512F-NEXT: testb $1, %al 6831; AVX512F-NEXT: je LBB55_48 6832; AVX512F-NEXT: ## BB#47: ## %cond.load67 6833; AVX512F-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 6834; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] 6835; AVX512F-NEXT: LBB55_48: ## %else68 6836; AVX512F-NEXT: vpextrb $8, %xmm4, %eax 6837; AVX512F-NEXT: testb $1, %al 6838; AVX512F-NEXT: je LBB55_50 6839; AVX512F-NEXT: ## BB#49: ## %cond.load70 6840; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6841; AVX512F-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 6842; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6843; AVX512F-NEXT: LBB55_50: ## %else71 6844; AVX512F-NEXT: vpextrb $9, %xmm4, %eax 6845; AVX512F-NEXT: testb $1, %al 6846; AVX512F-NEXT: je LBB55_52 6847; AVX512F-NEXT: ## BB#51: ## %cond.load73 6848; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6849; AVX512F-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 6850; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6851; AVX512F-NEXT: LBB55_52: ## %else74 6852; AVX512F-NEXT: vpextrb $10, %xmm4, %eax 6853; AVX512F-NEXT: testb $1, %al 6854; AVX512F-NEXT: je LBB55_54 6855; AVX512F-NEXT: ## BB#53: ## %cond.load76 6856; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6857; AVX512F-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 6858; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6859; AVX512F-NEXT: LBB55_54: ## %else77 6860; AVX512F-NEXT: vpextrb $11, %xmm4, %eax 6861; AVX512F-NEXT: testb $1, %al 6862; AVX512F-NEXT: je LBB55_56 6863; AVX512F-NEXT: ## BB#55: ## %cond.load79 6864; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6865; AVX512F-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 6866; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6867; AVX512F-NEXT: LBB55_56: ## %else80 6868; AVX512F-NEXT: vpextrb $12, %xmm4, %eax 6869; AVX512F-NEXT: testb $1, %al 6870; AVX512F-NEXT: je LBB55_58 6871; AVX512F-NEXT: ## BB#57: ## %cond.load82 6872; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6873; AVX512F-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 6874; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6875; AVX512F-NEXT: LBB55_58: ## %else83 6876; AVX512F-NEXT: vpextrb $13, %xmm4, %eax 6877; AVX512F-NEXT: testb $1, %al 6878; AVX512F-NEXT: je LBB55_60 6879; AVX512F-NEXT: ## BB#59: ## %cond.load85 6880; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6881; AVX512F-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 6882; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6883; AVX512F-NEXT: LBB55_60: ## %else86 6884; AVX512F-NEXT: vpextrb $14, %xmm4, %eax 6885; AVX512F-NEXT: testb $1, %al 6886; AVX512F-NEXT: je LBB55_62 6887; AVX512F-NEXT: ## BB#61: ## %cond.load88 6888; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6889; AVX512F-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 6890; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6891; AVX512F-NEXT: LBB55_62: ## %else89 6892; AVX512F-NEXT: vpextrb $15, %xmm4, %eax 6893; AVX512F-NEXT: testb $1, %al 6894; AVX512F-NEXT: je LBB55_64 6895; AVX512F-NEXT: ## BB#63: ## %cond.load91 6896; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 6897; AVX512F-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 6898; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 6899; AVX512F-NEXT: LBB55_64: ## %else92 6900; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 6901; AVX512F-NEXT: vpsllw $15, %ymm0, %ymm0 6902; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0 6903; AVX512F-NEXT: vpblendvb %ymm0, %ymm3, %ymm1, %ymm0 6904; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 6905; AVX512F-NEXT: vpsllw $15, %ymm1, %ymm1 6906; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1 6907; AVX512F-NEXT: vpblendvb %ymm1, %ymm5, %ymm2, %ymm1 6908; AVX512F-NEXT: retq 6909; 6910; SKX-LABEL: test_mask_load_32xi16: 6911; SKX: ## BB#0: 6912; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 6913; SKX-NEXT: vpmovb2m %ymm0, %k1 6914; SKX-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} 6915; SKX-NEXT: vmovaps %zmm1, %zmm0 6916; SKX-NEXT: retq 6917 %res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val) 6918 ret <32 x i16> %res 6919} 6920declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>) 6921 6922define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 6923; AVX-LABEL: test_mask_store_16xi8: 6924; AVX: ## BB#0: 6925; AVX-NEXT: vpextrb $0, %xmm0, %eax 6926; AVX-NEXT: testb $1, %al 6927; AVX-NEXT: je LBB56_2 6928; AVX-NEXT: ## BB#1: ## %cond.store 6929; AVX-NEXT: vpextrb $0, %xmm1, (%rdi) 6930; AVX-NEXT: LBB56_2: ## %else 6931; AVX-NEXT: vpextrb $1, %xmm0, %eax 6932; AVX-NEXT: testb $1, %al 6933; AVX-NEXT: je LBB56_4 6934; AVX-NEXT: ## BB#3: ## %cond.store1 6935; AVX-NEXT: vpextrb $1, %xmm1, 1(%rdi) 6936; AVX-NEXT: LBB56_4: ## %else2 6937; AVX-NEXT: vpextrb $2, %xmm0, %eax 6938; AVX-NEXT: testb $1, %al 6939; AVX-NEXT: je LBB56_6 6940; AVX-NEXT: ## BB#5: ## %cond.store3 6941; AVX-NEXT: vpextrb $2, %xmm1, 2(%rdi) 6942; AVX-NEXT: LBB56_6: ## %else4 6943; AVX-NEXT: vpextrb $3, %xmm0, %eax 6944; AVX-NEXT: testb $1, %al 6945; AVX-NEXT: je LBB56_8 6946; AVX-NEXT: ## BB#7: ## %cond.store5 6947; AVX-NEXT: vpextrb $3, %xmm1, 3(%rdi) 6948; AVX-NEXT: LBB56_8: ## %else6 6949; AVX-NEXT: vpextrb $4, %xmm0, %eax 6950; AVX-NEXT: testb $1, %al 6951; AVX-NEXT: je LBB56_10 6952; AVX-NEXT: ## BB#9: ## %cond.store7 6953; AVX-NEXT: vpextrb $4, %xmm1, 4(%rdi) 6954; AVX-NEXT: LBB56_10: ## %else8 6955; AVX-NEXT: vpextrb $5, %xmm0, %eax 6956; AVX-NEXT: testb $1, %al 6957; AVX-NEXT: je LBB56_12 6958; AVX-NEXT: ## BB#11: ## %cond.store9 6959; AVX-NEXT: vpextrb $5, %xmm1, 5(%rdi) 6960; AVX-NEXT: LBB56_12: ## %else10 6961; AVX-NEXT: vpextrb $6, %xmm0, %eax 6962; AVX-NEXT: testb $1, %al 6963; AVX-NEXT: je LBB56_14 6964; AVX-NEXT: ## BB#13: ## %cond.store11 6965; AVX-NEXT: vpextrb $6, %xmm1, 6(%rdi) 6966; AVX-NEXT: LBB56_14: ## %else12 6967; AVX-NEXT: vpextrb $7, %xmm0, %eax 6968; AVX-NEXT: testb $1, %al 6969; AVX-NEXT: je LBB56_16 6970; AVX-NEXT: ## BB#15: ## %cond.store13 6971; AVX-NEXT: vpextrb $7, %xmm1, 7(%rdi) 6972; AVX-NEXT: LBB56_16: ## %else14 6973; AVX-NEXT: vpextrb $8, %xmm0, %eax 6974; AVX-NEXT: testb $1, %al 6975; AVX-NEXT: je LBB56_18 6976; AVX-NEXT: ## BB#17: ## %cond.store15 6977; AVX-NEXT: vpextrb $8, %xmm1, 8(%rdi) 6978; AVX-NEXT: LBB56_18: ## %else16 6979; AVX-NEXT: vpextrb $9, %xmm0, %eax 6980; AVX-NEXT: testb $1, %al 6981; AVX-NEXT: je LBB56_20 6982; AVX-NEXT: ## BB#19: ## %cond.store17 6983; AVX-NEXT: vpextrb $9, %xmm1, 9(%rdi) 6984; AVX-NEXT: LBB56_20: ## %else18 6985; AVX-NEXT: vpextrb $10, %xmm0, %eax 6986; AVX-NEXT: testb $1, %al 6987; AVX-NEXT: je LBB56_22 6988; AVX-NEXT: ## BB#21: ## %cond.store19 6989; AVX-NEXT: vpextrb $10, %xmm1, 10(%rdi) 6990; AVX-NEXT: LBB56_22: ## %else20 6991; AVX-NEXT: vpextrb $11, %xmm0, %eax 6992; AVX-NEXT: testb $1, %al 6993; AVX-NEXT: je LBB56_24 6994; AVX-NEXT: ## BB#23: ## %cond.store21 6995; AVX-NEXT: vpextrb $11, %xmm1, 11(%rdi) 6996; AVX-NEXT: LBB56_24: ## %else22 6997; AVX-NEXT: vpextrb $12, %xmm0, %eax 6998; AVX-NEXT: testb $1, %al 6999; AVX-NEXT: je LBB56_26 7000; AVX-NEXT: ## BB#25: ## %cond.store23 7001; AVX-NEXT: vpextrb $12, %xmm1, 12(%rdi) 7002; AVX-NEXT: LBB56_26: ## %else24 7003; AVX-NEXT: vpextrb $13, %xmm0, %eax 7004; AVX-NEXT: testb $1, %al 7005; AVX-NEXT: je LBB56_28 7006; AVX-NEXT: ## BB#27: ## %cond.store25 7007; AVX-NEXT: vpextrb $13, %xmm1, 13(%rdi) 7008; AVX-NEXT: LBB56_28: ## %else26 7009; AVX-NEXT: vpextrb $14, %xmm0, %eax 7010; AVX-NEXT: testb $1, %al 7011; AVX-NEXT: je LBB56_30 7012; AVX-NEXT: ## BB#29: ## %cond.store27 7013; AVX-NEXT: vpextrb $14, %xmm1, 14(%rdi) 7014; AVX-NEXT: LBB56_30: ## %else28 7015; AVX-NEXT: vpextrb $15, %xmm0, %eax 7016; AVX-NEXT: testb $1, %al 7017; AVX-NEXT: je LBB56_32 7018; AVX-NEXT: ## BB#31: ## %cond.store29 7019; AVX-NEXT: vpextrb $15, %xmm1, 15(%rdi) 7020; AVX-NEXT: LBB56_32: ## %else30 7021; AVX-NEXT: retq 7022; 7023; AVX512F-LABEL: test_mask_store_16xi8: 7024; AVX512F: ## BB#0: 7025; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 7026; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 7027; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 7028; AVX512F-NEXT: kshiftlw $15, %k0, %k1 7029; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7030; AVX512F-NEXT: kmovw %k1, %eax 7031; AVX512F-NEXT: testb %al, %al 7032; AVX512F-NEXT: je LBB56_2 7033; AVX512F-NEXT: ## BB#1: ## %cond.store 7034; AVX512F-NEXT: vpextrb $0, %xmm1, (%rdi) 7035; AVX512F-NEXT: LBB56_2: ## %else 7036; AVX512F-NEXT: kshiftlw $14, %k0, %k1 7037; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7038; AVX512F-NEXT: kmovw %k1, %eax 7039; AVX512F-NEXT: testb %al, %al 7040; AVX512F-NEXT: je LBB56_4 7041; AVX512F-NEXT: ## BB#3: ## %cond.store1 7042; AVX512F-NEXT: vpextrb $1, %xmm1, 1(%rdi) 7043; AVX512F-NEXT: LBB56_4: ## %else2 7044; AVX512F-NEXT: kshiftlw $13, %k0, %k1 7045; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7046; AVX512F-NEXT: kmovw %k1, %eax 7047; AVX512F-NEXT: testb %al, %al 7048; AVX512F-NEXT: je LBB56_6 7049; AVX512F-NEXT: ## BB#5: ## %cond.store3 7050; AVX512F-NEXT: vpextrb $2, %xmm1, 2(%rdi) 7051; AVX512F-NEXT: LBB56_6: ## %else4 7052; AVX512F-NEXT: kshiftlw $12, %k0, %k1 7053; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7054; AVX512F-NEXT: kmovw %k1, %eax 7055; AVX512F-NEXT: testb %al, %al 7056; AVX512F-NEXT: je LBB56_8 7057; AVX512F-NEXT: ## BB#7: ## %cond.store5 7058; AVX512F-NEXT: vpextrb $3, %xmm1, 3(%rdi) 7059; AVX512F-NEXT: LBB56_8: ## %else6 7060; AVX512F-NEXT: kshiftlw $11, %k0, %k1 7061; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7062; AVX512F-NEXT: kmovw %k1, %eax 7063; AVX512F-NEXT: testb %al, %al 7064; AVX512F-NEXT: je LBB56_10 7065; AVX512F-NEXT: ## BB#9: ## %cond.store7 7066; AVX512F-NEXT: vpextrb $4, %xmm1, 4(%rdi) 7067; AVX512F-NEXT: LBB56_10: ## %else8 7068; AVX512F-NEXT: kshiftlw $10, %k0, %k1 7069; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7070; AVX512F-NEXT: kmovw %k1, %eax 7071; AVX512F-NEXT: testb %al, %al 7072; AVX512F-NEXT: je LBB56_12 7073; AVX512F-NEXT: ## BB#11: ## %cond.store9 7074; AVX512F-NEXT: vpextrb $5, %xmm1, 5(%rdi) 7075; AVX512F-NEXT: LBB56_12: ## %else10 7076; AVX512F-NEXT: kshiftlw $9, %k0, %k1 7077; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7078; AVX512F-NEXT: kmovw %k1, %eax 7079; AVX512F-NEXT: testb %al, %al 7080; AVX512F-NEXT: je LBB56_14 7081; AVX512F-NEXT: ## BB#13: ## %cond.store11 7082; AVX512F-NEXT: vpextrb $6, %xmm1, 6(%rdi) 7083; AVX512F-NEXT: LBB56_14: ## %else12 7084; AVX512F-NEXT: kshiftlw $8, %k0, %k1 7085; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7086; AVX512F-NEXT: kmovw %k1, %eax 7087; AVX512F-NEXT: testb %al, %al 7088; AVX512F-NEXT: je LBB56_16 7089; AVX512F-NEXT: ## BB#15: ## %cond.store13 7090; AVX512F-NEXT: vpextrb $7, %xmm1, 7(%rdi) 7091; AVX512F-NEXT: LBB56_16: ## %else14 7092; AVX512F-NEXT: kshiftlw $7, %k0, %k1 7093; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7094; AVX512F-NEXT: kmovw %k1, %eax 7095; AVX512F-NEXT: testb %al, %al 7096; AVX512F-NEXT: je LBB56_18 7097; AVX512F-NEXT: ## BB#17: ## %cond.store15 7098; AVX512F-NEXT: vpextrb $8, %xmm1, 8(%rdi) 7099; AVX512F-NEXT: LBB56_18: ## %else16 7100; AVX512F-NEXT: kshiftlw $6, %k0, %k1 7101; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7102; AVX512F-NEXT: kmovw %k1, %eax 7103; AVX512F-NEXT: testb %al, %al 7104; AVX512F-NEXT: je LBB56_20 7105; AVX512F-NEXT: ## BB#19: ## %cond.store17 7106; AVX512F-NEXT: vpextrb $9, %xmm1, 9(%rdi) 7107; AVX512F-NEXT: LBB56_20: ## %else18 7108; AVX512F-NEXT: kshiftlw $5, %k0, %k1 7109; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7110; AVX512F-NEXT: kmovw %k1, %eax 7111; AVX512F-NEXT: testb %al, %al 7112; AVX512F-NEXT: je LBB56_22 7113; AVX512F-NEXT: ## BB#21: ## %cond.store19 7114; AVX512F-NEXT: vpextrb $10, %xmm1, 10(%rdi) 7115; AVX512F-NEXT: LBB56_22: ## %else20 7116; AVX512F-NEXT: kshiftlw $4, %k0, %k1 7117; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7118; AVX512F-NEXT: kmovw %k1, %eax 7119; AVX512F-NEXT: testb %al, %al 7120; AVX512F-NEXT: je LBB56_24 7121; AVX512F-NEXT: ## BB#23: ## %cond.store21 7122; AVX512F-NEXT: vpextrb $11, %xmm1, 11(%rdi) 7123; AVX512F-NEXT: LBB56_24: ## %else22 7124; AVX512F-NEXT: kshiftlw $3, %k0, %k1 7125; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7126; AVX512F-NEXT: kmovw %k1, %eax 7127; AVX512F-NEXT: testb %al, %al 7128; AVX512F-NEXT: je LBB56_26 7129; AVX512F-NEXT: ## BB#25: ## %cond.store23 7130; AVX512F-NEXT: vpextrb $12, %xmm1, 12(%rdi) 7131; AVX512F-NEXT: LBB56_26: ## %else24 7132; AVX512F-NEXT: kshiftlw $2, %k0, %k1 7133; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7134; AVX512F-NEXT: kmovw %k1, %eax 7135; AVX512F-NEXT: testb %al, %al 7136; AVX512F-NEXT: je LBB56_28 7137; AVX512F-NEXT: ## BB#27: ## %cond.store25 7138; AVX512F-NEXT: vpextrb $13, %xmm1, 13(%rdi) 7139; AVX512F-NEXT: LBB56_28: ## %else26 7140; AVX512F-NEXT: kshiftlw $1, %k0, %k1 7141; AVX512F-NEXT: kshiftrw $15, %k1, %k1 7142; AVX512F-NEXT: kmovw %k1, %eax 7143; AVX512F-NEXT: testb %al, %al 7144; AVX512F-NEXT: je LBB56_30 7145; AVX512F-NEXT: ## BB#29: ## %cond.store27 7146; AVX512F-NEXT: vpextrb $14, %xmm1, 14(%rdi) 7147; AVX512F-NEXT: LBB56_30: ## %else28 7148; AVX512F-NEXT: kshiftlw $0, %k0, %k0 7149; AVX512F-NEXT: kshiftrw $15, %k0, %k0 7150; AVX512F-NEXT: kmovw %k0, %eax 7151; AVX512F-NEXT: testb %al, %al 7152; AVX512F-NEXT: je LBB56_32 7153; AVX512F-NEXT: ## BB#31: ## %cond.store29 7154; AVX512F-NEXT: vpextrb $15, %xmm1, 15(%rdi) 7155; AVX512F-NEXT: LBB56_32: ## %else30 7156; AVX512F-NEXT: retq 7157; 7158; SKX-LABEL: test_mask_store_16xi8: 7159; SKX: ## BB#0: 7160; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 7161; SKX-NEXT: vpmovb2m %xmm0, %k1 7162; SKX-NEXT: vmovdqu8 %xmm1, (%rdi) {%k1} 7163; SKX-NEXT: retq 7164 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 7165 ret void 7166} 7167declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 7168 7169define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 7170; AVX1-LABEL: test_mask_store_32xi8: 7171; AVX1: ## BB#0: 7172; AVX1-NEXT: vpextrb $0, %xmm0, %eax 7173; AVX1-NEXT: testb $1, %al 7174; AVX1-NEXT: je LBB57_2 7175; AVX1-NEXT: ## BB#1: ## %cond.store 7176; AVX1-NEXT: vpextrb $0, %xmm1, (%rdi) 7177; AVX1-NEXT: LBB57_2: ## %else 7178; AVX1-NEXT: vpextrb $1, %xmm0, %eax 7179; AVX1-NEXT: testb $1, %al 7180; AVX1-NEXT: je LBB57_4 7181; AVX1-NEXT: ## BB#3: ## %cond.store1 7182; AVX1-NEXT: vpextrb $1, %xmm1, 1(%rdi) 7183; AVX1-NEXT: LBB57_4: ## %else2 7184; AVX1-NEXT: vpextrb $2, %xmm0, %eax 7185; AVX1-NEXT: testb $1, %al 7186; AVX1-NEXT: je LBB57_6 7187; AVX1-NEXT: ## BB#5: ## %cond.store3 7188; AVX1-NEXT: vpextrb $2, %xmm1, 2(%rdi) 7189; AVX1-NEXT: LBB57_6: ## %else4 7190; AVX1-NEXT: vpextrb $3, %xmm0, %eax 7191; AVX1-NEXT: testb $1, %al 7192; AVX1-NEXT: je LBB57_8 7193; AVX1-NEXT: ## BB#7: ## %cond.store5 7194; AVX1-NEXT: vpextrb $3, %xmm1, 3(%rdi) 7195; AVX1-NEXT: LBB57_8: ## %else6 7196; AVX1-NEXT: vpextrb $4, %xmm0, %eax 7197; AVX1-NEXT: testb $1, %al 7198; AVX1-NEXT: je LBB57_10 7199; AVX1-NEXT: ## BB#9: ## %cond.store7 7200; AVX1-NEXT: vpextrb $4, %xmm1, 4(%rdi) 7201; AVX1-NEXT: LBB57_10: ## %else8 7202; AVX1-NEXT: vpextrb $5, %xmm0, %eax 7203; AVX1-NEXT: testb $1, %al 7204; AVX1-NEXT: je LBB57_12 7205; AVX1-NEXT: ## BB#11: ## %cond.store9 7206; AVX1-NEXT: vpextrb $5, %xmm1, 5(%rdi) 7207; AVX1-NEXT: LBB57_12: ## %else10 7208; AVX1-NEXT: vpextrb $6, %xmm0, %eax 7209; AVX1-NEXT: testb $1, %al 7210; AVX1-NEXT: je LBB57_14 7211; AVX1-NEXT: ## BB#13: ## %cond.store11 7212; AVX1-NEXT: vpextrb $6, %xmm1, 6(%rdi) 7213; AVX1-NEXT: LBB57_14: ## %else12 7214; AVX1-NEXT: vpextrb $7, %xmm0, %eax 7215; AVX1-NEXT: testb $1, %al 7216; AVX1-NEXT: je LBB57_16 7217; AVX1-NEXT: ## BB#15: ## %cond.store13 7218; AVX1-NEXT: vpextrb $7, %xmm1, 7(%rdi) 7219; AVX1-NEXT: LBB57_16: ## %else14 7220; AVX1-NEXT: vpextrb $8, %xmm0, %eax 7221; AVX1-NEXT: testb $1, %al 7222; AVX1-NEXT: je LBB57_18 7223; AVX1-NEXT: ## BB#17: ## %cond.store15 7224; AVX1-NEXT: vpextrb $8, %xmm1, 8(%rdi) 7225; AVX1-NEXT: LBB57_18: ## %else16 7226; AVX1-NEXT: vpextrb $9, %xmm0, %eax 7227; AVX1-NEXT: testb $1, %al 7228; AVX1-NEXT: je LBB57_20 7229; AVX1-NEXT: ## BB#19: ## %cond.store17 7230; AVX1-NEXT: vpextrb $9, %xmm1, 9(%rdi) 7231; AVX1-NEXT: LBB57_20: ## %else18 7232; AVX1-NEXT: vpextrb $10, %xmm0, %eax 7233; AVX1-NEXT: testb $1, %al 7234; AVX1-NEXT: je LBB57_22 7235; AVX1-NEXT: ## BB#21: ## %cond.store19 7236; AVX1-NEXT: vpextrb $10, %xmm1, 10(%rdi) 7237; AVX1-NEXT: LBB57_22: ## %else20 7238; AVX1-NEXT: vpextrb $11, %xmm0, %eax 7239; AVX1-NEXT: testb $1, %al 7240; AVX1-NEXT: je LBB57_24 7241; AVX1-NEXT: ## BB#23: ## %cond.store21 7242; AVX1-NEXT: vpextrb $11, %xmm1, 11(%rdi) 7243; AVX1-NEXT: LBB57_24: ## %else22 7244; AVX1-NEXT: vpextrb $12, %xmm0, %eax 7245; AVX1-NEXT: testb $1, %al 7246; AVX1-NEXT: je LBB57_26 7247; AVX1-NEXT: ## BB#25: ## %cond.store23 7248; AVX1-NEXT: vpextrb $12, %xmm1, 12(%rdi) 7249; AVX1-NEXT: LBB57_26: ## %else24 7250; AVX1-NEXT: vpextrb $13, %xmm0, %eax 7251; AVX1-NEXT: testb $1, %al 7252; AVX1-NEXT: je LBB57_28 7253; AVX1-NEXT: ## BB#27: ## %cond.store25 7254; AVX1-NEXT: vpextrb $13, %xmm1, 13(%rdi) 7255; AVX1-NEXT: LBB57_28: ## %else26 7256; AVX1-NEXT: vpextrb $14, %xmm0, %eax 7257; AVX1-NEXT: testb $1, %al 7258; AVX1-NEXT: je LBB57_30 7259; AVX1-NEXT: ## BB#29: ## %cond.store27 7260; AVX1-NEXT: vpextrb $14, %xmm1, 14(%rdi) 7261; AVX1-NEXT: LBB57_30: ## %else28 7262; AVX1-NEXT: vpextrb $15, %xmm0, %eax 7263; AVX1-NEXT: testb $1, %al 7264; AVX1-NEXT: je LBB57_32 7265; AVX1-NEXT: ## BB#31: ## %cond.store29 7266; AVX1-NEXT: vpextrb $15, %xmm1, 15(%rdi) 7267; AVX1-NEXT: LBB57_32: ## %else30 7268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 7269; AVX1-NEXT: vpextrb $0, %xmm0, %eax 7270; AVX1-NEXT: testb $1, %al 7271; AVX1-NEXT: je LBB57_34 7272; AVX1-NEXT: ## BB#33: ## %cond.store31 7273; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7274; AVX1-NEXT: vpextrb $0, %xmm2, 16(%rdi) 7275; AVX1-NEXT: LBB57_34: ## %else32 7276; AVX1-NEXT: vpextrb $1, %xmm0, %eax 7277; AVX1-NEXT: testb $1, %al 7278; AVX1-NEXT: je LBB57_36 7279; AVX1-NEXT: ## BB#35: ## %cond.store33 7280; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7281; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rdi) 7282; AVX1-NEXT: LBB57_36: ## %else34 7283; AVX1-NEXT: vpextrb $2, %xmm0, %eax 7284; AVX1-NEXT: testb $1, %al 7285; AVX1-NEXT: je LBB57_38 7286; AVX1-NEXT: ## BB#37: ## %cond.store35 7287; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7288; AVX1-NEXT: vpextrb $2, %xmm2, 18(%rdi) 7289; AVX1-NEXT: LBB57_38: ## %else36 7290; AVX1-NEXT: vpextrb $3, %xmm0, %eax 7291; AVX1-NEXT: testb $1, %al 7292; AVX1-NEXT: je LBB57_40 7293; AVX1-NEXT: ## BB#39: ## %cond.store37 7294; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7295; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rdi) 7296; AVX1-NEXT: LBB57_40: ## %else38 7297; AVX1-NEXT: vpextrb $4, %xmm0, %eax 7298; AVX1-NEXT: testb $1, %al 7299; AVX1-NEXT: je LBB57_42 7300; AVX1-NEXT: ## BB#41: ## %cond.store39 7301; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7302; AVX1-NEXT: vpextrb $4, %xmm2, 20(%rdi) 7303; AVX1-NEXT: LBB57_42: ## %else40 7304; AVX1-NEXT: vpextrb $5, %xmm0, %eax 7305; AVX1-NEXT: testb $1, %al 7306; AVX1-NEXT: je LBB57_44 7307; AVX1-NEXT: ## BB#43: ## %cond.store41 7308; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7309; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rdi) 7310; AVX1-NEXT: LBB57_44: ## %else42 7311; AVX1-NEXT: vpextrb $6, %xmm0, %eax 7312; AVX1-NEXT: testb $1, %al 7313; AVX1-NEXT: je LBB57_46 7314; AVX1-NEXT: ## BB#45: ## %cond.store43 7315; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7316; AVX1-NEXT: vpextrb $6, %xmm2, 22(%rdi) 7317; AVX1-NEXT: LBB57_46: ## %else44 7318; AVX1-NEXT: vpextrb $7, %xmm0, %eax 7319; AVX1-NEXT: testb $1, %al 7320; AVX1-NEXT: je LBB57_48 7321; AVX1-NEXT: ## BB#47: ## %cond.store45 7322; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7323; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rdi) 7324; AVX1-NEXT: LBB57_48: ## %else46 7325; AVX1-NEXT: vpextrb $8, %xmm0, %eax 7326; AVX1-NEXT: testb $1, %al 7327; AVX1-NEXT: je LBB57_50 7328; AVX1-NEXT: ## BB#49: ## %cond.store47 7329; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7330; AVX1-NEXT: vpextrb $8, %xmm2, 24(%rdi) 7331; AVX1-NEXT: LBB57_50: ## %else48 7332; AVX1-NEXT: vpextrb $9, %xmm0, %eax 7333; AVX1-NEXT: testb $1, %al 7334; AVX1-NEXT: je LBB57_52 7335; AVX1-NEXT: ## BB#51: ## %cond.store49 7336; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7337; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rdi) 7338; AVX1-NEXT: LBB57_52: ## %else50 7339; AVX1-NEXT: vpextrb $10, %xmm0, %eax 7340; AVX1-NEXT: testb $1, %al 7341; AVX1-NEXT: je LBB57_54 7342; AVX1-NEXT: ## BB#53: ## %cond.store51 7343; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7344; AVX1-NEXT: vpextrb $10, %xmm2, 26(%rdi) 7345; AVX1-NEXT: LBB57_54: ## %else52 7346; AVX1-NEXT: vpextrb $11, %xmm0, %eax 7347; AVX1-NEXT: testb $1, %al 7348; AVX1-NEXT: je LBB57_56 7349; AVX1-NEXT: ## BB#55: ## %cond.store53 7350; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7351; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rdi) 7352; AVX1-NEXT: LBB57_56: ## %else54 7353; AVX1-NEXT: vpextrb $12, %xmm0, %eax 7354; AVX1-NEXT: testb $1, %al 7355; AVX1-NEXT: je LBB57_58 7356; AVX1-NEXT: ## BB#57: ## %cond.store55 7357; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7358; AVX1-NEXT: vpextrb $12, %xmm2, 28(%rdi) 7359; AVX1-NEXT: LBB57_58: ## %else56 7360; AVX1-NEXT: vpextrb $13, %xmm0, %eax 7361; AVX1-NEXT: testb $1, %al 7362; AVX1-NEXT: je LBB57_60 7363; AVX1-NEXT: ## BB#59: ## %cond.store57 7364; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7365; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rdi) 7366; AVX1-NEXT: LBB57_60: ## %else58 7367; AVX1-NEXT: vpextrb $14, %xmm0, %eax 7368; AVX1-NEXT: testb $1, %al 7369; AVX1-NEXT: je LBB57_62 7370; AVX1-NEXT: ## BB#61: ## %cond.store59 7371; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 7372; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rdi) 7373; AVX1-NEXT: LBB57_62: ## %else60 7374; AVX1-NEXT: vpextrb $15, %xmm0, %eax 7375; AVX1-NEXT: testb $1, %al 7376; AVX1-NEXT: je LBB57_64 7377; AVX1-NEXT: ## BB#63: ## %cond.store61 7378; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 7379; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rdi) 7380; AVX1-NEXT: LBB57_64: ## %else62 7381; AVX1-NEXT: vzeroupper 7382; AVX1-NEXT: retq 7383; 7384; AVX2-LABEL: test_mask_store_32xi8: 7385; AVX2: ## BB#0: 7386; AVX2-NEXT: vpextrb $0, %xmm0, %eax 7387; AVX2-NEXT: testb $1, %al 7388; AVX2-NEXT: je LBB57_2 7389; AVX2-NEXT: ## BB#1: ## %cond.store 7390; AVX2-NEXT: vpextrb $0, %xmm1, (%rdi) 7391; AVX2-NEXT: LBB57_2: ## %else 7392; AVX2-NEXT: vpextrb $1, %xmm0, %eax 7393; AVX2-NEXT: testb $1, %al 7394; AVX2-NEXT: je LBB57_4 7395; AVX2-NEXT: ## BB#3: ## %cond.store1 7396; AVX2-NEXT: vpextrb $1, %xmm1, 1(%rdi) 7397; AVX2-NEXT: LBB57_4: ## %else2 7398; AVX2-NEXT: vpextrb $2, %xmm0, %eax 7399; AVX2-NEXT: testb $1, %al 7400; AVX2-NEXT: je LBB57_6 7401; AVX2-NEXT: ## BB#5: ## %cond.store3 7402; AVX2-NEXT: vpextrb $2, %xmm1, 2(%rdi) 7403; AVX2-NEXT: LBB57_6: ## %else4 7404; AVX2-NEXT: vpextrb $3, %xmm0, %eax 7405; AVX2-NEXT: testb $1, %al 7406; AVX2-NEXT: je LBB57_8 7407; AVX2-NEXT: ## BB#7: ## %cond.store5 7408; AVX2-NEXT: vpextrb $3, %xmm1, 3(%rdi) 7409; AVX2-NEXT: LBB57_8: ## %else6 7410; AVX2-NEXT: vpextrb $4, %xmm0, %eax 7411; AVX2-NEXT: testb $1, %al 7412; AVX2-NEXT: je LBB57_10 7413; AVX2-NEXT: ## BB#9: ## %cond.store7 7414; AVX2-NEXT: vpextrb $4, %xmm1, 4(%rdi) 7415; AVX2-NEXT: LBB57_10: ## %else8 7416; AVX2-NEXT: vpextrb $5, %xmm0, %eax 7417; AVX2-NEXT: testb $1, %al 7418; AVX2-NEXT: je LBB57_12 7419; AVX2-NEXT: ## BB#11: ## %cond.store9 7420; AVX2-NEXT: vpextrb $5, %xmm1, 5(%rdi) 7421; AVX2-NEXT: LBB57_12: ## %else10 7422; AVX2-NEXT: vpextrb $6, %xmm0, %eax 7423; AVX2-NEXT: testb $1, %al 7424; AVX2-NEXT: je LBB57_14 7425; AVX2-NEXT: ## BB#13: ## %cond.store11 7426; AVX2-NEXT: vpextrb $6, %xmm1, 6(%rdi) 7427; AVX2-NEXT: LBB57_14: ## %else12 7428; AVX2-NEXT: vpextrb $7, %xmm0, %eax 7429; AVX2-NEXT: testb $1, %al 7430; AVX2-NEXT: je LBB57_16 7431; AVX2-NEXT: ## BB#15: ## %cond.store13 7432; AVX2-NEXT: vpextrb $7, %xmm1, 7(%rdi) 7433; AVX2-NEXT: LBB57_16: ## %else14 7434; AVX2-NEXT: vpextrb $8, %xmm0, %eax 7435; AVX2-NEXT: testb $1, %al 7436; AVX2-NEXT: je LBB57_18 7437; AVX2-NEXT: ## BB#17: ## %cond.store15 7438; AVX2-NEXT: vpextrb $8, %xmm1, 8(%rdi) 7439; AVX2-NEXT: LBB57_18: ## %else16 7440; AVX2-NEXT: vpextrb $9, %xmm0, %eax 7441; AVX2-NEXT: testb $1, %al 7442; AVX2-NEXT: je LBB57_20 7443; AVX2-NEXT: ## BB#19: ## %cond.store17 7444; AVX2-NEXT: vpextrb $9, %xmm1, 9(%rdi) 7445; AVX2-NEXT: LBB57_20: ## %else18 7446; AVX2-NEXT: vpextrb $10, %xmm0, %eax 7447; AVX2-NEXT: testb $1, %al 7448; AVX2-NEXT: je LBB57_22 7449; AVX2-NEXT: ## BB#21: ## %cond.store19 7450; AVX2-NEXT: vpextrb $10, %xmm1, 10(%rdi) 7451; AVX2-NEXT: LBB57_22: ## %else20 7452; AVX2-NEXT: vpextrb $11, %xmm0, %eax 7453; AVX2-NEXT: testb $1, %al 7454; AVX2-NEXT: je LBB57_24 7455; AVX2-NEXT: ## BB#23: ## %cond.store21 7456; AVX2-NEXT: vpextrb $11, %xmm1, 11(%rdi) 7457; AVX2-NEXT: LBB57_24: ## %else22 7458; AVX2-NEXT: vpextrb $12, %xmm0, %eax 7459; AVX2-NEXT: testb $1, %al 7460; AVX2-NEXT: je LBB57_26 7461; AVX2-NEXT: ## BB#25: ## %cond.store23 7462; AVX2-NEXT: vpextrb $12, %xmm1, 12(%rdi) 7463; AVX2-NEXT: LBB57_26: ## %else24 7464; AVX2-NEXT: vpextrb $13, %xmm0, %eax 7465; AVX2-NEXT: testb $1, %al 7466; AVX2-NEXT: je LBB57_28 7467; AVX2-NEXT: ## BB#27: ## %cond.store25 7468; AVX2-NEXT: vpextrb $13, %xmm1, 13(%rdi) 7469; AVX2-NEXT: LBB57_28: ## %else26 7470; AVX2-NEXT: vpextrb $14, %xmm0, %eax 7471; AVX2-NEXT: testb $1, %al 7472; AVX2-NEXT: je LBB57_30 7473; AVX2-NEXT: ## BB#29: ## %cond.store27 7474; AVX2-NEXT: vpextrb $14, %xmm1, 14(%rdi) 7475; AVX2-NEXT: LBB57_30: ## %else28 7476; AVX2-NEXT: vpextrb $15, %xmm0, %eax 7477; AVX2-NEXT: testb $1, %al 7478; AVX2-NEXT: je LBB57_32 7479; AVX2-NEXT: ## BB#31: ## %cond.store29 7480; AVX2-NEXT: vpextrb $15, %xmm1, 15(%rdi) 7481; AVX2-NEXT: LBB57_32: ## %else30 7482; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 7483; AVX2-NEXT: vpextrb $0, %xmm0, %eax 7484; AVX2-NEXT: testb $1, %al 7485; AVX2-NEXT: je LBB57_34 7486; AVX2-NEXT: ## BB#33: ## %cond.store31 7487; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7488; AVX2-NEXT: vpextrb $0, %xmm2, 16(%rdi) 7489; AVX2-NEXT: LBB57_34: ## %else32 7490; AVX2-NEXT: vpextrb $1, %xmm0, %eax 7491; AVX2-NEXT: testb $1, %al 7492; AVX2-NEXT: je LBB57_36 7493; AVX2-NEXT: ## BB#35: ## %cond.store33 7494; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7495; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rdi) 7496; AVX2-NEXT: LBB57_36: ## %else34 7497; AVX2-NEXT: vpextrb $2, %xmm0, %eax 7498; AVX2-NEXT: testb $1, %al 7499; AVX2-NEXT: je LBB57_38 7500; AVX2-NEXT: ## BB#37: ## %cond.store35 7501; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7502; AVX2-NEXT: vpextrb $2, %xmm2, 18(%rdi) 7503; AVX2-NEXT: LBB57_38: ## %else36 7504; AVX2-NEXT: vpextrb $3, %xmm0, %eax 7505; AVX2-NEXT: testb $1, %al 7506; AVX2-NEXT: je LBB57_40 7507; AVX2-NEXT: ## BB#39: ## %cond.store37 7508; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7509; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rdi) 7510; AVX2-NEXT: LBB57_40: ## %else38 7511; AVX2-NEXT: vpextrb $4, %xmm0, %eax 7512; AVX2-NEXT: testb $1, %al 7513; AVX2-NEXT: je LBB57_42 7514; AVX2-NEXT: ## BB#41: ## %cond.store39 7515; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7516; AVX2-NEXT: vpextrb $4, %xmm2, 20(%rdi) 7517; AVX2-NEXT: LBB57_42: ## %else40 7518; AVX2-NEXT: vpextrb $5, %xmm0, %eax 7519; AVX2-NEXT: testb $1, %al 7520; AVX2-NEXT: je LBB57_44 7521; AVX2-NEXT: ## BB#43: ## %cond.store41 7522; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7523; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rdi) 7524; AVX2-NEXT: LBB57_44: ## %else42 7525; AVX2-NEXT: vpextrb $6, %xmm0, %eax 7526; AVX2-NEXT: testb $1, %al 7527; AVX2-NEXT: je LBB57_46 7528; AVX2-NEXT: ## BB#45: ## %cond.store43 7529; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7530; AVX2-NEXT: vpextrb $6, %xmm2, 22(%rdi) 7531; AVX2-NEXT: LBB57_46: ## %else44 7532; AVX2-NEXT: vpextrb $7, %xmm0, %eax 7533; AVX2-NEXT: testb $1, %al 7534; AVX2-NEXT: je LBB57_48 7535; AVX2-NEXT: ## BB#47: ## %cond.store45 7536; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7537; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rdi) 7538; AVX2-NEXT: LBB57_48: ## %else46 7539; AVX2-NEXT: vpextrb $8, %xmm0, %eax 7540; AVX2-NEXT: testb $1, %al 7541; AVX2-NEXT: je LBB57_50 7542; AVX2-NEXT: ## BB#49: ## %cond.store47 7543; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7544; AVX2-NEXT: vpextrb $8, %xmm2, 24(%rdi) 7545; AVX2-NEXT: LBB57_50: ## %else48 7546; AVX2-NEXT: vpextrb $9, %xmm0, %eax 7547; AVX2-NEXT: testb $1, %al 7548; AVX2-NEXT: je LBB57_52 7549; AVX2-NEXT: ## BB#51: ## %cond.store49 7550; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7551; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rdi) 7552; AVX2-NEXT: LBB57_52: ## %else50 7553; AVX2-NEXT: vpextrb $10, %xmm0, %eax 7554; AVX2-NEXT: testb $1, %al 7555; AVX2-NEXT: je LBB57_54 7556; AVX2-NEXT: ## BB#53: ## %cond.store51 7557; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7558; AVX2-NEXT: vpextrb $10, %xmm2, 26(%rdi) 7559; AVX2-NEXT: LBB57_54: ## %else52 7560; AVX2-NEXT: vpextrb $11, %xmm0, %eax 7561; AVX2-NEXT: testb $1, %al 7562; AVX2-NEXT: je LBB57_56 7563; AVX2-NEXT: ## BB#55: ## %cond.store53 7564; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7565; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rdi) 7566; AVX2-NEXT: LBB57_56: ## %else54 7567; AVX2-NEXT: vpextrb $12, %xmm0, %eax 7568; AVX2-NEXT: testb $1, %al 7569; AVX2-NEXT: je LBB57_58 7570; AVX2-NEXT: ## BB#57: ## %cond.store55 7571; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7572; AVX2-NEXT: vpextrb $12, %xmm2, 28(%rdi) 7573; AVX2-NEXT: LBB57_58: ## %else56 7574; AVX2-NEXT: vpextrb $13, %xmm0, %eax 7575; AVX2-NEXT: testb $1, %al 7576; AVX2-NEXT: je LBB57_60 7577; AVX2-NEXT: ## BB#59: ## %cond.store57 7578; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7579; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rdi) 7580; AVX2-NEXT: LBB57_60: ## %else58 7581; AVX2-NEXT: vpextrb $14, %xmm0, %eax 7582; AVX2-NEXT: testb $1, %al 7583; AVX2-NEXT: je LBB57_62 7584; AVX2-NEXT: ## BB#61: ## %cond.store59 7585; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 7586; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rdi) 7587; AVX2-NEXT: LBB57_62: ## %else60 7588; AVX2-NEXT: vpextrb $15, %xmm0, %eax 7589; AVX2-NEXT: testb $1, %al 7590; AVX2-NEXT: je LBB57_64 7591; AVX2-NEXT: ## BB#63: ## %cond.store61 7592; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 7593; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rdi) 7594; AVX2-NEXT: LBB57_64: ## %else62 7595; AVX2-NEXT: vzeroupper 7596; AVX2-NEXT: retq 7597; 7598; AVX512F-LABEL: test_mask_store_32xi8: 7599; AVX512F: ## BB#0: 7600; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 7601; AVX512F-NEXT: testb $1, %al 7602; AVX512F-NEXT: je LBB57_2 7603; AVX512F-NEXT: ## BB#1: ## %cond.store 7604; AVX512F-NEXT: vpextrb $0, %xmm1, (%rdi) 7605; AVX512F-NEXT: LBB57_2: ## %else 7606; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 7607; AVX512F-NEXT: testb $1, %al 7608; AVX512F-NEXT: je LBB57_4 7609; AVX512F-NEXT: ## BB#3: ## %cond.store1 7610; AVX512F-NEXT: vpextrb $1, %xmm1, 1(%rdi) 7611; AVX512F-NEXT: LBB57_4: ## %else2 7612; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 7613; AVX512F-NEXT: testb $1, %al 7614; AVX512F-NEXT: je LBB57_6 7615; AVX512F-NEXT: ## BB#5: ## %cond.store3 7616; AVX512F-NEXT: vpextrb $2, %xmm1, 2(%rdi) 7617; AVX512F-NEXT: LBB57_6: ## %else4 7618; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 7619; AVX512F-NEXT: testb $1, %al 7620; AVX512F-NEXT: je LBB57_8 7621; AVX512F-NEXT: ## BB#7: ## %cond.store5 7622; AVX512F-NEXT: vpextrb $3, %xmm1, 3(%rdi) 7623; AVX512F-NEXT: LBB57_8: ## %else6 7624; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 7625; AVX512F-NEXT: testb $1, %al 7626; AVX512F-NEXT: je LBB57_10 7627; AVX512F-NEXT: ## BB#9: ## %cond.store7 7628; AVX512F-NEXT: vpextrb $4, %xmm1, 4(%rdi) 7629; AVX512F-NEXT: LBB57_10: ## %else8 7630; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 7631; AVX512F-NEXT: testb $1, %al 7632; AVX512F-NEXT: je LBB57_12 7633; AVX512F-NEXT: ## BB#11: ## %cond.store9 7634; AVX512F-NEXT: vpextrb $5, %xmm1, 5(%rdi) 7635; AVX512F-NEXT: LBB57_12: ## %else10 7636; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 7637; AVX512F-NEXT: testb $1, %al 7638; AVX512F-NEXT: je LBB57_14 7639; AVX512F-NEXT: ## BB#13: ## %cond.store11 7640; AVX512F-NEXT: vpextrb $6, %xmm1, 6(%rdi) 7641; AVX512F-NEXT: LBB57_14: ## %else12 7642; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 7643; AVX512F-NEXT: testb $1, %al 7644; AVX512F-NEXT: je LBB57_16 7645; AVX512F-NEXT: ## BB#15: ## %cond.store13 7646; AVX512F-NEXT: vpextrb $7, %xmm1, 7(%rdi) 7647; AVX512F-NEXT: LBB57_16: ## %else14 7648; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 7649; AVX512F-NEXT: testb $1, %al 7650; AVX512F-NEXT: je LBB57_18 7651; AVX512F-NEXT: ## BB#17: ## %cond.store15 7652; AVX512F-NEXT: vpextrb $8, %xmm1, 8(%rdi) 7653; AVX512F-NEXT: LBB57_18: ## %else16 7654; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 7655; AVX512F-NEXT: testb $1, %al 7656; AVX512F-NEXT: je LBB57_20 7657; AVX512F-NEXT: ## BB#19: ## %cond.store17 7658; AVX512F-NEXT: vpextrb $9, %xmm1, 9(%rdi) 7659; AVX512F-NEXT: LBB57_20: ## %else18 7660; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 7661; AVX512F-NEXT: testb $1, %al 7662; AVX512F-NEXT: je LBB57_22 7663; AVX512F-NEXT: ## BB#21: ## %cond.store19 7664; AVX512F-NEXT: vpextrb $10, %xmm1, 10(%rdi) 7665; AVX512F-NEXT: LBB57_22: ## %else20 7666; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 7667; AVX512F-NEXT: testb $1, %al 7668; AVX512F-NEXT: je LBB57_24 7669; AVX512F-NEXT: ## BB#23: ## %cond.store21 7670; AVX512F-NEXT: vpextrb $11, %xmm1, 11(%rdi) 7671; AVX512F-NEXT: LBB57_24: ## %else22 7672; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 7673; AVX512F-NEXT: testb $1, %al 7674; AVX512F-NEXT: je LBB57_26 7675; AVX512F-NEXT: ## BB#25: ## %cond.store23 7676; AVX512F-NEXT: vpextrb $12, %xmm1, 12(%rdi) 7677; AVX512F-NEXT: LBB57_26: ## %else24 7678; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 7679; AVX512F-NEXT: testb $1, %al 7680; AVX512F-NEXT: je LBB57_28 7681; AVX512F-NEXT: ## BB#27: ## %cond.store25 7682; AVX512F-NEXT: vpextrb $13, %xmm1, 13(%rdi) 7683; AVX512F-NEXT: LBB57_28: ## %else26 7684; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 7685; AVX512F-NEXT: testb $1, %al 7686; AVX512F-NEXT: je LBB57_30 7687; AVX512F-NEXT: ## BB#29: ## %cond.store27 7688; AVX512F-NEXT: vpextrb $14, %xmm1, 14(%rdi) 7689; AVX512F-NEXT: LBB57_30: ## %else28 7690; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 7691; AVX512F-NEXT: testb $1, %al 7692; AVX512F-NEXT: je LBB57_32 7693; AVX512F-NEXT: ## BB#31: ## %cond.store29 7694; AVX512F-NEXT: vpextrb $15, %xmm1, 15(%rdi) 7695; AVX512F-NEXT: LBB57_32: ## %else30 7696; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 7697; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 7698; AVX512F-NEXT: testb $1, %al 7699; AVX512F-NEXT: je LBB57_34 7700; AVX512F-NEXT: ## BB#33: ## %cond.store31 7701; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7702; AVX512F-NEXT: vpextrb $0, %xmm2, 16(%rdi) 7703; AVX512F-NEXT: LBB57_34: ## %else32 7704; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 7705; AVX512F-NEXT: testb $1, %al 7706; AVX512F-NEXT: je LBB57_36 7707; AVX512F-NEXT: ## BB#35: ## %cond.store33 7708; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7709; AVX512F-NEXT: vpextrb $1, %xmm2, 17(%rdi) 7710; AVX512F-NEXT: LBB57_36: ## %else34 7711; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 7712; AVX512F-NEXT: testb $1, %al 7713; AVX512F-NEXT: je LBB57_38 7714; AVX512F-NEXT: ## BB#37: ## %cond.store35 7715; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7716; AVX512F-NEXT: vpextrb $2, %xmm2, 18(%rdi) 7717; AVX512F-NEXT: LBB57_38: ## %else36 7718; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 7719; AVX512F-NEXT: testb $1, %al 7720; AVX512F-NEXT: je LBB57_40 7721; AVX512F-NEXT: ## BB#39: ## %cond.store37 7722; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7723; AVX512F-NEXT: vpextrb $3, %xmm2, 19(%rdi) 7724; AVX512F-NEXT: LBB57_40: ## %else38 7725; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 7726; AVX512F-NEXT: testb $1, %al 7727; AVX512F-NEXT: je LBB57_42 7728; AVX512F-NEXT: ## BB#41: ## %cond.store39 7729; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7730; AVX512F-NEXT: vpextrb $4, %xmm2, 20(%rdi) 7731; AVX512F-NEXT: LBB57_42: ## %else40 7732; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 7733; AVX512F-NEXT: testb $1, %al 7734; AVX512F-NEXT: je LBB57_44 7735; AVX512F-NEXT: ## BB#43: ## %cond.store41 7736; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7737; AVX512F-NEXT: vpextrb $5, %xmm2, 21(%rdi) 7738; AVX512F-NEXT: LBB57_44: ## %else42 7739; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 7740; AVX512F-NEXT: testb $1, %al 7741; AVX512F-NEXT: je LBB57_46 7742; AVX512F-NEXT: ## BB#45: ## %cond.store43 7743; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7744; AVX512F-NEXT: vpextrb $6, %xmm2, 22(%rdi) 7745; AVX512F-NEXT: LBB57_46: ## %else44 7746; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 7747; AVX512F-NEXT: testb $1, %al 7748; AVX512F-NEXT: je LBB57_48 7749; AVX512F-NEXT: ## BB#47: ## %cond.store45 7750; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7751; AVX512F-NEXT: vpextrb $7, %xmm2, 23(%rdi) 7752; AVX512F-NEXT: LBB57_48: ## %else46 7753; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 7754; AVX512F-NEXT: testb $1, %al 7755; AVX512F-NEXT: je LBB57_50 7756; AVX512F-NEXT: ## BB#49: ## %cond.store47 7757; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7758; AVX512F-NEXT: vpextrb $8, %xmm2, 24(%rdi) 7759; AVX512F-NEXT: LBB57_50: ## %else48 7760; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 7761; AVX512F-NEXT: testb $1, %al 7762; AVX512F-NEXT: je LBB57_52 7763; AVX512F-NEXT: ## BB#51: ## %cond.store49 7764; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7765; AVX512F-NEXT: vpextrb $9, %xmm2, 25(%rdi) 7766; AVX512F-NEXT: LBB57_52: ## %else50 7767; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 7768; AVX512F-NEXT: testb $1, %al 7769; AVX512F-NEXT: je LBB57_54 7770; AVX512F-NEXT: ## BB#53: ## %cond.store51 7771; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7772; AVX512F-NEXT: vpextrb $10, %xmm2, 26(%rdi) 7773; AVX512F-NEXT: LBB57_54: ## %else52 7774; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 7775; AVX512F-NEXT: testb $1, %al 7776; AVX512F-NEXT: je LBB57_56 7777; AVX512F-NEXT: ## BB#55: ## %cond.store53 7778; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7779; AVX512F-NEXT: vpextrb $11, %xmm2, 27(%rdi) 7780; AVX512F-NEXT: LBB57_56: ## %else54 7781; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 7782; AVX512F-NEXT: testb $1, %al 7783; AVX512F-NEXT: je LBB57_58 7784; AVX512F-NEXT: ## BB#57: ## %cond.store55 7785; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7786; AVX512F-NEXT: vpextrb $12, %xmm2, 28(%rdi) 7787; AVX512F-NEXT: LBB57_58: ## %else56 7788; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 7789; AVX512F-NEXT: testb $1, %al 7790; AVX512F-NEXT: je LBB57_60 7791; AVX512F-NEXT: ## BB#59: ## %cond.store57 7792; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7793; AVX512F-NEXT: vpextrb $13, %xmm2, 29(%rdi) 7794; AVX512F-NEXT: LBB57_60: ## %else58 7795; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 7796; AVX512F-NEXT: testb $1, %al 7797; AVX512F-NEXT: je LBB57_62 7798; AVX512F-NEXT: ## BB#61: ## %cond.store59 7799; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 7800; AVX512F-NEXT: vpextrb $14, %xmm2, 30(%rdi) 7801; AVX512F-NEXT: LBB57_62: ## %else60 7802; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 7803; AVX512F-NEXT: testb $1, %al 7804; AVX512F-NEXT: je LBB57_64 7805; AVX512F-NEXT: ## BB#63: ## %cond.store61 7806; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 7807; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi) 7808; AVX512F-NEXT: LBB57_64: ## %else62 7809; AVX512F-NEXT: retq 7810; 7811; SKX-LABEL: test_mask_store_32xi8: 7812; SKX: ## BB#0: 7813; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 7814; SKX-NEXT: vpmovb2m %ymm0, %k1 7815; SKX-NEXT: vmovdqu8 %ymm1, (%rdi) {%k1} 7816; SKX-NEXT: retq 7817 call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 7818 ret void 7819} 7820declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 7821 7822define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { 7823; AVX1-LABEL: test_mask_store_64xi8: 7824; AVX1: ## BB#0: 7825; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax 7826; AVX1-NEXT: testb $1, %dil 7827; AVX1-NEXT: je LBB58_2 7828; AVX1-NEXT: ## BB#1: ## %cond.store 7829; AVX1-NEXT: vpextrb $0, %xmm0, (%rax) 7830; AVX1-NEXT: LBB58_2: ## %else 7831; AVX1-NEXT: testb $1, %sil 7832; AVX1-NEXT: je LBB58_4 7833; AVX1-NEXT: ## BB#3: ## %cond.store1 7834; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rax) 7835; AVX1-NEXT: LBB58_4: ## %else2 7836; AVX1-NEXT: testb $1, %dl 7837; AVX1-NEXT: je LBB58_6 7838; AVX1-NEXT: ## BB#5: ## %cond.store3 7839; AVX1-NEXT: vpextrb $2, %xmm0, 2(%rax) 7840; AVX1-NEXT: LBB58_6: ## %else4 7841; AVX1-NEXT: testb $1, %cl 7842; AVX1-NEXT: je LBB58_8 7843; AVX1-NEXT: ## BB#7: ## %cond.store5 7844; AVX1-NEXT: vpextrb $3, %xmm0, 3(%rax) 7845; AVX1-NEXT: LBB58_8: ## %else6 7846; AVX1-NEXT: testb $1, %r8b 7847; AVX1-NEXT: je LBB58_10 7848; AVX1-NEXT: ## BB#9: ## %cond.store7 7849; AVX1-NEXT: vpextrb $4, %xmm0, 4(%rax) 7850; AVX1-NEXT: LBB58_10: ## %else8 7851; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7852; AVX1-NEXT: testb $1, %r9b 7853; AVX1-NEXT: je LBB58_12 7854; AVX1-NEXT: ## BB#11: ## %cond.store9 7855; AVX1-NEXT: vpextrb $5, %xmm0, 5(%rax) 7856; AVX1-NEXT: LBB58_12: ## %else10 7857; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7858; AVX1-NEXT: testb $1, %cl 7859; AVX1-NEXT: je LBB58_14 7860; AVX1-NEXT: ## BB#13: ## %cond.store11 7861; AVX1-NEXT: vpextrb $6, %xmm0, 6(%rax) 7862; AVX1-NEXT: LBB58_14: ## %else12 7863; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7864; AVX1-NEXT: testb $1, %dl 7865; AVX1-NEXT: je LBB58_16 7866; AVX1-NEXT: ## BB#15: ## %cond.store13 7867; AVX1-NEXT: vpextrb $7, %xmm0, 7(%rax) 7868; AVX1-NEXT: LBB58_16: ## %else14 7869; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7870; AVX1-NEXT: testb $1, %cl 7871; AVX1-NEXT: je LBB58_18 7872; AVX1-NEXT: ## BB#17: ## %cond.store15 7873; AVX1-NEXT: vpextrb $8, %xmm0, 8(%rax) 7874; AVX1-NEXT: LBB58_18: ## %else16 7875; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7876; AVX1-NEXT: testb $1, %dl 7877; AVX1-NEXT: je LBB58_20 7878; AVX1-NEXT: ## BB#19: ## %cond.store17 7879; AVX1-NEXT: vpextrb $9, %xmm0, 9(%rax) 7880; AVX1-NEXT: LBB58_20: ## %else18 7881; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7882; AVX1-NEXT: testb $1, %cl 7883; AVX1-NEXT: je LBB58_22 7884; AVX1-NEXT: ## BB#21: ## %cond.store19 7885; AVX1-NEXT: vpextrb $10, %xmm0, 10(%rax) 7886; AVX1-NEXT: LBB58_22: ## %else20 7887; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7888; AVX1-NEXT: testb $1, %dl 7889; AVX1-NEXT: je LBB58_24 7890; AVX1-NEXT: ## BB#23: ## %cond.store21 7891; AVX1-NEXT: vpextrb $11, %xmm0, 11(%rax) 7892; AVX1-NEXT: LBB58_24: ## %else22 7893; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7894; AVX1-NEXT: testb $1, %cl 7895; AVX1-NEXT: je LBB58_26 7896; AVX1-NEXT: ## BB#25: ## %cond.store23 7897; AVX1-NEXT: vpextrb $12, %xmm0, 12(%rax) 7898; AVX1-NEXT: LBB58_26: ## %else24 7899; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7900; AVX1-NEXT: testb $1, %dl 7901; AVX1-NEXT: je LBB58_28 7902; AVX1-NEXT: ## BB#27: ## %cond.store25 7903; AVX1-NEXT: vpextrb $13, %xmm0, 13(%rax) 7904; AVX1-NEXT: LBB58_28: ## %else26 7905; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7906; AVX1-NEXT: testb $1, %cl 7907; AVX1-NEXT: je LBB58_30 7908; AVX1-NEXT: ## BB#29: ## %cond.store27 7909; AVX1-NEXT: vpextrb $14, %xmm0, 14(%rax) 7910; AVX1-NEXT: LBB58_30: ## %else28 7911; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7912; AVX1-NEXT: testb $1, %dl 7913; AVX1-NEXT: je LBB58_32 7914; AVX1-NEXT: ## BB#31: ## %cond.store29 7915; AVX1-NEXT: vpextrb $15, %xmm0, 15(%rax) 7916; AVX1-NEXT: LBB58_32: ## %else30 7917; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7918; AVX1-NEXT: testb $1, %cl 7919; AVX1-NEXT: je LBB58_34 7920; AVX1-NEXT: ## BB#33: ## %cond.store31 7921; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7922; AVX1-NEXT: vpextrb $0, %xmm2, 16(%rax) 7923; AVX1-NEXT: LBB58_34: ## %else32 7924; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7925; AVX1-NEXT: testb $1, %dl 7926; AVX1-NEXT: je LBB58_36 7927; AVX1-NEXT: ## BB#35: ## %cond.store33 7928; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7929; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rax) 7930; AVX1-NEXT: LBB58_36: ## %else34 7931; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7932; AVX1-NEXT: testb $1, %cl 7933; AVX1-NEXT: je LBB58_38 7934; AVX1-NEXT: ## BB#37: ## %cond.store35 7935; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7936; AVX1-NEXT: vpextrb $2, %xmm2, 18(%rax) 7937; AVX1-NEXT: LBB58_38: ## %else36 7938; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7939; AVX1-NEXT: testb $1, %dl 7940; AVX1-NEXT: je LBB58_40 7941; AVX1-NEXT: ## BB#39: ## %cond.store37 7942; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7943; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rax) 7944; AVX1-NEXT: LBB58_40: ## %else38 7945; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7946; AVX1-NEXT: testb $1, %cl 7947; AVX1-NEXT: je LBB58_42 7948; AVX1-NEXT: ## BB#41: ## %cond.store39 7949; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7950; AVX1-NEXT: vpextrb $4, %xmm2, 20(%rax) 7951; AVX1-NEXT: LBB58_42: ## %else40 7952; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7953; AVX1-NEXT: testb $1, %dl 7954; AVX1-NEXT: je LBB58_44 7955; AVX1-NEXT: ## BB#43: ## %cond.store41 7956; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7957; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rax) 7958; AVX1-NEXT: LBB58_44: ## %else42 7959; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7960; AVX1-NEXT: testb $1, %cl 7961; AVX1-NEXT: je LBB58_46 7962; AVX1-NEXT: ## BB#45: ## %cond.store43 7963; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7964; AVX1-NEXT: vpextrb $6, %xmm2, 22(%rax) 7965; AVX1-NEXT: LBB58_46: ## %else44 7966; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7967; AVX1-NEXT: testb $1, %dl 7968; AVX1-NEXT: je LBB58_48 7969; AVX1-NEXT: ## BB#47: ## %cond.store45 7970; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7971; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rax) 7972; AVX1-NEXT: LBB58_48: ## %else46 7973; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7974; AVX1-NEXT: testb $1, %cl 7975; AVX1-NEXT: je LBB58_50 7976; AVX1-NEXT: ## BB#49: ## %cond.store47 7977; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7978; AVX1-NEXT: vpextrb $8, %xmm2, 24(%rax) 7979; AVX1-NEXT: LBB58_50: ## %else48 7980; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7981; AVX1-NEXT: testb $1, %dl 7982; AVX1-NEXT: je LBB58_52 7983; AVX1-NEXT: ## BB#51: ## %cond.store49 7984; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7985; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rax) 7986; AVX1-NEXT: LBB58_52: ## %else50 7987; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 7988; AVX1-NEXT: testb $1, %cl 7989; AVX1-NEXT: je LBB58_54 7990; AVX1-NEXT: ## BB#53: ## %cond.store51 7991; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7992; AVX1-NEXT: vpextrb $10, %xmm2, 26(%rax) 7993; AVX1-NEXT: LBB58_54: ## %else52 7994; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 7995; AVX1-NEXT: testb $1, %dl 7996; AVX1-NEXT: je LBB58_56 7997; AVX1-NEXT: ## BB#55: ## %cond.store53 7998; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7999; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rax) 8000; AVX1-NEXT: LBB58_56: ## %else54 8001; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8002; AVX1-NEXT: testb $1, %cl 8003; AVX1-NEXT: je LBB58_58 8004; AVX1-NEXT: ## BB#57: ## %cond.store55 8005; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 8006; AVX1-NEXT: vpextrb $12, %xmm2, 28(%rax) 8007; AVX1-NEXT: LBB58_58: ## %else56 8008; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8009; AVX1-NEXT: testb $1, %dl 8010; AVX1-NEXT: je LBB58_60 8011; AVX1-NEXT: ## BB#59: ## %cond.store57 8012; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 8013; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rax) 8014; AVX1-NEXT: LBB58_60: ## %else58 8015; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8016; AVX1-NEXT: testb $1, %cl 8017; AVX1-NEXT: je LBB58_62 8018; AVX1-NEXT: ## BB#61: ## %cond.store59 8019; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 8020; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rax) 8021; AVX1-NEXT: LBB58_62: ## %else60 8022; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8023; AVX1-NEXT: testb $1, %dl 8024; AVX1-NEXT: je LBB58_64 8025; AVX1-NEXT: ## BB#63: ## %cond.store61 8026; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 8027; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rax) 8028; AVX1-NEXT: LBB58_64: ## %else62 8029; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8030; AVX1-NEXT: testb $1, %cl 8031; AVX1-NEXT: je LBB58_66 8032; AVX1-NEXT: ## BB#65: ## %cond.store63 8033; AVX1-NEXT: vpextrb $0, %xmm1, 32(%rax) 8034; AVX1-NEXT: LBB58_66: ## %else64 8035; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8036; AVX1-NEXT: testb $1, %dl 8037; AVX1-NEXT: je LBB58_68 8038; AVX1-NEXT: ## BB#67: ## %cond.store65 8039; AVX1-NEXT: vpextrb $1, %xmm1, 33(%rax) 8040; AVX1-NEXT: LBB58_68: ## %else66 8041; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8042; AVX1-NEXT: testb $1, %cl 8043; AVX1-NEXT: je LBB58_70 8044; AVX1-NEXT: ## BB#69: ## %cond.store67 8045; AVX1-NEXT: vpextrb $2, %xmm1, 34(%rax) 8046; AVX1-NEXT: LBB58_70: ## %else68 8047; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8048; AVX1-NEXT: testb $1, %dl 8049; AVX1-NEXT: je LBB58_72 8050; AVX1-NEXT: ## BB#71: ## %cond.store69 8051; AVX1-NEXT: vpextrb $3, %xmm1, 35(%rax) 8052; AVX1-NEXT: LBB58_72: ## %else70 8053; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8054; AVX1-NEXT: testb $1, %cl 8055; AVX1-NEXT: je LBB58_74 8056; AVX1-NEXT: ## BB#73: ## %cond.store71 8057; AVX1-NEXT: vpextrb $4, %xmm1, 36(%rax) 8058; AVX1-NEXT: LBB58_74: ## %else72 8059; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8060; AVX1-NEXT: testb $1, %dl 8061; AVX1-NEXT: je LBB58_76 8062; AVX1-NEXT: ## BB#75: ## %cond.store73 8063; AVX1-NEXT: vpextrb $5, %xmm1, 37(%rax) 8064; AVX1-NEXT: LBB58_76: ## %else74 8065; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8066; AVX1-NEXT: testb $1, %cl 8067; AVX1-NEXT: je LBB58_78 8068; AVX1-NEXT: ## BB#77: ## %cond.store75 8069; AVX1-NEXT: vpextrb $6, %xmm1, 38(%rax) 8070; AVX1-NEXT: LBB58_78: ## %else76 8071; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8072; AVX1-NEXT: testb $1, %dl 8073; AVX1-NEXT: je LBB58_80 8074; AVX1-NEXT: ## BB#79: ## %cond.store77 8075; AVX1-NEXT: vpextrb $7, %xmm1, 39(%rax) 8076; AVX1-NEXT: LBB58_80: ## %else78 8077; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8078; AVX1-NEXT: testb $1, %cl 8079; AVX1-NEXT: je LBB58_82 8080; AVX1-NEXT: ## BB#81: ## %cond.store79 8081; AVX1-NEXT: vpextrb $8, %xmm1, 40(%rax) 8082; AVX1-NEXT: LBB58_82: ## %else80 8083; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8084; AVX1-NEXT: testb $1, %dl 8085; AVX1-NEXT: je LBB58_84 8086; AVX1-NEXT: ## BB#83: ## %cond.store81 8087; AVX1-NEXT: vpextrb $9, %xmm1, 41(%rax) 8088; AVX1-NEXT: LBB58_84: ## %else82 8089; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8090; AVX1-NEXT: testb $1, %cl 8091; AVX1-NEXT: je LBB58_86 8092; AVX1-NEXT: ## BB#85: ## %cond.store83 8093; AVX1-NEXT: vpextrb $10, %xmm1, 42(%rax) 8094; AVX1-NEXT: LBB58_86: ## %else84 8095; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8096; AVX1-NEXT: testb $1, %dl 8097; AVX1-NEXT: je LBB58_88 8098; AVX1-NEXT: ## BB#87: ## %cond.store85 8099; AVX1-NEXT: vpextrb $11, %xmm1, 43(%rax) 8100; AVX1-NEXT: LBB58_88: ## %else86 8101; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8102; AVX1-NEXT: testb $1, %cl 8103; AVX1-NEXT: je LBB58_90 8104; AVX1-NEXT: ## BB#89: ## %cond.store87 8105; AVX1-NEXT: vpextrb $12, %xmm1, 44(%rax) 8106; AVX1-NEXT: LBB58_90: ## %else88 8107; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8108; AVX1-NEXT: testb $1, %dl 8109; AVX1-NEXT: je LBB58_92 8110; AVX1-NEXT: ## BB#91: ## %cond.store89 8111; AVX1-NEXT: vpextrb $13, %xmm1, 45(%rax) 8112; AVX1-NEXT: LBB58_92: ## %else90 8113; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8114; AVX1-NEXT: testb $1, %cl 8115; AVX1-NEXT: je LBB58_94 8116; AVX1-NEXT: ## BB#93: ## %cond.store91 8117; AVX1-NEXT: vpextrb $14, %xmm1, 46(%rax) 8118; AVX1-NEXT: LBB58_94: ## %else92 8119; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8120; AVX1-NEXT: testb $1, %dl 8121; AVX1-NEXT: je LBB58_96 8122; AVX1-NEXT: ## BB#95: ## %cond.store93 8123; AVX1-NEXT: vpextrb $15, %xmm1, 47(%rax) 8124; AVX1-NEXT: LBB58_96: ## %else94 8125; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8126; AVX1-NEXT: testb $1, %cl 8127; AVX1-NEXT: je LBB58_98 8128; AVX1-NEXT: ## BB#97: ## %cond.store95 8129; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8130; AVX1-NEXT: vpextrb $0, %xmm0, 48(%rax) 8131; AVX1-NEXT: LBB58_98: ## %else96 8132; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8133; AVX1-NEXT: testb $1, %dl 8134; AVX1-NEXT: je LBB58_100 8135; AVX1-NEXT: ## BB#99: ## %cond.store97 8136; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8137; AVX1-NEXT: vpextrb $1, %xmm0, 49(%rax) 8138; AVX1-NEXT: LBB58_100: ## %else98 8139; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8140; AVX1-NEXT: testb $1, %cl 8141; AVX1-NEXT: je LBB58_102 8142; AVX1-NEXT: ## BB#101: ## %cond.store99 8143; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8144; AVX1-NEXT: vpextrb $2, %xmm0, 50(%rax) 8145; AVX1-NEXT: LBB58_102: ## %else100 8146; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8147; AVX1-NEXT: testb $1, %dl 8148; AVX1-NEXT: je LBB58_104 8149; AVX1-NEXT: ## BB#103: ## %cond.store101 8150; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8151; AVX1-NEXT: vpextrb $3, %xmm0, 51(%rax) 8152; AVX1-NEXT: LBB58_104: ## %else102 8153; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8154; AVX1-NEXT: testb $1, %cl 8155; AVX1-NEXT: je LBB58_106 8156; AVX1-NEXT: ## BB#105: ## %cond.store103 8157; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8158; AVX1-NEXT: vpextrb $4, %xmm0, 52(%rax) 8159; AVX1-NEXT: LBB58_106: ## %else104 8160; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8161; AVX1-NEXT: testb $1, %dl 8162; AVX1-NEXT: je LBB58_108 8163; AVX1-NEXT: ## BB#107: ## %cond.store105 8164; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8165; AVX1-NEXT: vpextrb $5, %xmm0, 53(%rax) 8166; AVX1-NEXT: LBB58_108: ## %else106 8167; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8168; AVX1-NEXT: testb $1, %cl 8169; AVX1-NEXT: je LBB58_110 8170; AVX1-NEXT: ## BB#109: ## %cond.store107 8171; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8172; AVX1-NEXT: vpextrb $6, %xmm0, 54(%rax) 8173; AVX1-NEXT: LBB58_110: ## %else108 8174; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8175; AVX1-NEXT: testb $1, %dl 8176; AVX1-NEXT: je LBB58_112 8177; AVX1-NEXT: ## BB#111: ## %cond.store109 8178; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8179; AVX1-NEXT: vpextrb $7, %xmm0, 55(%rax) 8180; AVX1-NEXT: LBB58_112: ## %else110 8181; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8182; AVX1-NEXT: testb $1, %cl 8183; AVX1-NEXT: je LBB58_114 8184; AVX1-NEXT: ## BB#113: ## %cond.store111 8185; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8186; AVX1-NEXT: vpextrb $8, %xmm0, 56(%rax) 8187; AVX1-NEXT: LBB58_114: ## %else112 8188; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8189; AVX1-NEXT: testb $1, %dl 8190; AVX1-NEXT: je LBB58_116 8191; AVX1-NEXT: ## BB#115: ## %cond.store113 8192; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8193; AVX1-NEXT: vpextrb $9, %xmm0, 57(%rax) 8194; AVX1-NEXT: LBB58_116: ## %else114 8195; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8196; AVX1-NEXT: testb $1, %cl 8197; AVX1-NEXT: je LBB58_118 8198; AVX1-NEXT: ## BB#117: ## %cond.store115 8199; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8200; AVX1-NEXT: vpextrb $10, %xmm0, 58(%rax) 8201; AVX1-NEXT: LBB58_118: ## %else116 8202; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8203; AVX1-NEXT: testb $1, %dl 8204; AVX1-NEXT: je LBB58_120 8205; AVX1-NEXT: ## BB#119: ## %cond.store117 8206; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8207; AVX1-NEXT: vpextrb $11, %xmm0, 59(%rax) 8208; AVX1-NEXT: LBB58_120: ## %else118 8209; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8210; AVX1-NEXT: testb $1, %cl 8211; AVX1-NEXT: je LBB58_122 8212; AVX1-NEXT: ## BB#121: ## %cond.store119 8213; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8214; AVX1-NEXT: vpextrb $12, %xmm0, 60(%rax) 8215; AVX1-NEXT: LBB58_122: ## %else120 8216; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl 8217; AVX1-NEXT: testb $1, %dl 8218; AVX1-NEXT: je LBB58_124 8219; AVX1-NEXT: ## BB#123: ## %cond.store121 8220; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8221; AVX1-NEXT: vpextrb $13, %xmm0, 61(%rax) 8222; AVX1-NEXT: LBB58_124: ## %else122 8223; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl 8224; AVX1-NEXT: testb $1, %cl 8225; AVX1-NEXT: je LBB58_126 8226; AVX1-NEXT: ## BB#125: ## %cond.store123 8227; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8228; AVX1-NEXT: vpextrb $14, %xmm0, 62(%rax) 8229; AVX1-NEXT: LBB58_126: ## %else124 8230; AVX1-NEXT: testb $1, %dl 8231; AVX1-NEXT: je LBB58_128 8232; AVX1-NEXT: ## BB#127: ## %cond.store125 8233; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 8234; AVX1-NEXT: vpextrb $15, %xmm0, 63(%rax) 8235; AVX1-NEXT: LBB58_128: ## %else126 8236; AVX1-NEXT: vzeroupper 8237; AVX1-NEXT: retq 8238; 8239; AVX2-LABEL: test_mask_store_64xi8: 8240; AVX2: ## BB#0: 8241; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 8242; AVX2-NEXT: testb $1, %dil 8243; AVX2-NEXT: je LBB58_2 8244; AVX2-NEXT: ## BB#1: ## %cond.store 8245; AVX2-NEXT: vpextrb $0, %xmm0, (%rax) 8246; AVX2-NEXT: LBB58_2: ## %else 8247; AVX2-NEXT: testb $1, %sil 8248; AVX2-NEXT: je LBB58_4 8249; AVX2-NEXT: ## BB#3: ## %cond.store1 8250; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rax) 8251; AVX2-NEXT: LBB58_4: ## %else2 8252; AVX2-NEXT: testb $1, %dl 8253; AVX2-NEXT: je LBB58_6 8254; AVX2-NEXT: ## BB#5: ## %cond.store3 8255; AVX2-NEXT: vpextrb $2, %xmm0, 2(%rax) 8256; AVX2-NEXT: LBB58_6: ## %else4 8257; AVX2-NEXT: testb $1, %cl 8258; AVX2-NEXT: je LBB58_8 8259; AVX2-NEXT: ## BB#7: ## %cond.store5 8260; AVX2-NEXT: vpextrb $3, %xmm0, 3(%rax) 8261; AVX2-NEXT: LBB58_8: ## %else6 8262; AVX2-NEXT: testb $1, %r8b 8263; AVX2-NEXT: je LBB58_10 8264; AVX2-NEXT: ## BB#9: ## %cond.store7 8265; AVX2-NEXT: vpextrb $4, %xmm0, 4(%rax) 8266; AVX2-NEXT: LBB58_10: ## %else8 8267; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8268; AVX2-NEXT: testb $1, %r9b 8269; AVX2-NEXT: je LBB58_12 8270; AVX2-NEXT: ## BB#11: ## %cond.store9 8271; AVX2-NEXT: vpextrb $5, %xmm0, 5(%rax) 8272; AVX2-NEXT: LBB58_12: ## %else10 8273; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8274; AVX2-NEXT: testb $1, %cl 8275; AVX2-NEXT: je LBB58_14 8276; AVX2-NEXT: ## BB#13: ## %cond.store11 8277; AVX2-NEXT: vpextrb $6, %xmm0, 6(%rax) 8278; AVX2-NEXT: LBB58_14: ## %else12 8279; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8280; AVX2-NEXT: testb $1, %dl 8281; AVX2-NEXT: je LBB58_16 8282; AVX2-NEXT: ## BB#15: ## %cond.store13 8283; AVX2-NEXT: vpextrb $7, %xmm0, 7(%rax) 8284; AVX2-NEXT: LBB58_16: ## %else14 8285; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8286; AVX2-NEXT: testb $1, %cl 8287; AVX2-NEXT: je LBB58_18 8288; AVX2-NEXT: ## BB#17: ## %cond.store15 8289; AVX2-NEXT: vpextrb $8, %xmm0, 8(%rax) 8290; AVX2-NEXT: LBB58_18: ## %else16 8291; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8292; AVX2-NEXT: testb $1, %dl 8293; AVX2-NEXT: je LBB58_20 8294; AVX2-NEXT: ## BB#19: ## %cond.store17 8295; AVX2-NEXT: vpextrb $9, %xmm0, 9(%rax) 8296; AVX2-NEXT: LBB58_20: ## %else18 8297; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8298; AVX2-NEXT: testb $1, %cl 8299; AVX2-NEXT: je LBB58_22 8300; AVX2-NEXT: ## BB#21: ## %cond.store19 8301; AVX2-NEXT: vpextrb $10, %xmm0, 10(%rax) 8302; AVX2-NEXT: LBB58_22: ## %else20 8303; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8304; AVX2-NEXT: testb $1, %dl 8305; AVX2-NEXT: je LBB58_24 8306; AVX2-NEXT: ## BB#23: ## %cond.store21 8307; AVX2-NEXT: vpextrb $11, %xmm0, 11(%rax) 8308; AVX2-NEXT: LBB58_24: ## %else22 8309; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8310; AVX2-NEXT: testb $1, %cl 8311; AVX2-NEXT: je LBB58_26 8312; AVX2-NEXT: ## BB#25: ## %cond.store23 8313; AVX2-NEXT: vpextrb $12, %xmm0, 12(%rax) 8314; AVX2-NEXT: LBB58_26: ## %else24 8315; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8316; AVX2-NEXT: testb $1, %dl 8317; AVX2-NEXT: je LBB58_28 8318; AVX2-NEXT: ## BB#27: ## %cond.store25 8319; AVX2-NEXT: vpextrb $13, %xmm0, 13(%rax) 8320; AVX2-NEXT: LBB58_28: ## %else26 8321; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8322; AVX2-NEXT: testb $1, %cl 8323; AVX2-NEXT: je LBB58_30 8324; AVX2-NEXT: ## BB#29: ## %cond.store27 8325; AVX2-NEXT: vpextrb $14, %xmm0, 14(%rax) 8326; AVX2-NEXT: LBB58_30: ## %else28 8327; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8328; AVX2-NEXT: testb $1, %dl 8329; AVX2-NEXT: je LBB58_32 8330; AVX2-NEXT: ## BB#31: ## %cond.store29 8331; AVX2-NEXT: vpextrb $15, %xmm0, 15(%rax) 8332; AVX2-NEXT: LBB58_32: ## %else30 8333; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8334; AVX2-NEXT: testb $1, %cl 8335; AVX2-NEXT: je LBB58_34 8336; AVX2-NEXT: ## BB#33: ## %cond.store31 8337; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8338; AVX2-NEXT: vpextrb $0, %xmm2, 16(%rax) 8339; AVX2-NEXT: LBB58_34: ## %else32 8340; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8341; AVX2-NEXT: testb $1, %dl 8342; AVX2-NEXT: je LBB58_36 8343; AVX2-NEXT: ## BB#35: ## %cond.store33 8344; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8345; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rax) 8346; AVX2-NEXT: LBB58_36: ## %else34 8347; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8348; AVX2-NEXT: testb $1, %cl 8349; AVX2-NEXT: je LBB58_38 8350; AVX2-NEXT: ## BB#37: ## %cond.store35 8351; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8352; AVX2-NEXT: vpextrb $2, %xmm2, 18(%rax) 8353; AVX2-NEXT: LBB58_38: ## %else36 8354; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8355; AVX2-NEXT: testb $1, %dl 8356; AVX2-NEXT: je LBB58_40 8357; AVX2-NEXT: ## BB#39: ## %cond.store37 8358; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8359; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rax) 8360; AVX2-NEXT: LBB58_40: ## %else38 8361; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8362; AVX2-NEXT: testb $1, %cl 8363; AVX2-NEXT: je LBB58_42 8364; AVX2-NEXT: ## BB#41: ## %cond.store39 8365; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8366; AVX2-NEXT: vpextrb $4, %xmm2, 20(%rax) 8367; AVX2-NEXT: LBB58_42: ## %else40 8368; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8369; AVX2-NEXT: testb $1, %dl 8370; AVX2-NEXT: je LBB58_44 8371; AVX2-NEXT: ## BB#43: ## %cond.store41 8372; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8373; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rax) 8374; AVX2-NEXT: LBB58_44: ## %else42 8375; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8376; AVX2-NEXT: testb $1, %cl 8377; AVX2-NEXT: je LBB58_46 8378; AVX2-NEXT: ## BB#45: ## %cond.store43 8379; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8380; AVX2-NEXT: vpextrb $6, %xmm2, 22(%rax) 8381; AVX2-NEXT: LBB58_46: ## %else44 8382; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8383; AVX2-NEXT: testb $1, %dl 8384; AVX2-NEXT: je LBB58_48 8385; AVX2-NEXT: ## BB#47: ## %cond.store45 8386; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8387; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rax) 8388; AVX2-NEXT: LBB58_48: ## %else46 8389; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8390; AVX2-NEXT: testb $1, %cl 8391; AVX2-NEXT: je LBB58_50 8392; AVX2-NEXT: ## BB#49: ## %cond.store47 8393; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8394; AVX2-NEXT: vpextrb $8, %xmm2, 24(%rax) 8395; AVX2-NEXT: LBB58_50: ## %else48 8396; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8397; AVX2-NEXT: testb $1, %dl 8398; AVX2-NEXT: je LBB58_52 8399; AVX2-NEXT: ## BB#51: ## %cond.store49 8400; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8401; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rax) 8402; AVX2-NEXT: LBB58_52: ## %else50 8403; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8404; AVX2-NEXT: testb $1, %cl 8405; AVX2-NEXT: je LBB58_54 8406; AVX2-NEXT: ## BB#53: ## %cond.store51 8407; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8408; AVX2-NEXT: vpextrb $10, %xmm2, 26(%rax) 8409; AVX2-NEXT: LBB58_54: ## %else52 8410; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8411; AVX2-NEXT: testb $1, %dl 8412; AVX2-NEXT: je LBB58_56 8413; AVX2-NEXT: ## BB#55: ## %cond.store53 8414; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8415; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rax) 8416; AVX2-NEXT: LBB58_56: ## %else54 8417; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8418; AVX2-NEXT: testb $1, %cl 8419; AVX2-NEXT: je LBB58_58 8420; AVX2-NEXT: ## BB#57: ## %cond.store55 8421; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8422; AVX2-NEXT: vpextrb $12, %xmm2, 28(%rax) 8423; AVX2-NEXT: LBB58_58: ## %else56 8424; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8425; AVX2-NEXT: testb $1, %dl 8426; AVX2-NEXT: je LBB58_60 8427; AVX2-NEXT: ## BB#59: ## %cond.store57 8428; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8429; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rax) 8430; AVX2-NEXT: LBB58_60: ## %else58 8431; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8432; AVX2-NEXT: testb $1, %cl 8433; AVX2-NEXT: je LBB58_62 8434; AVX2-NEXT: ## BB#61: ## %cond.store59 8435; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 8436; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rax) 8437; AVX2-NEXT: LBB58_62: ## %else60 8438; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8439; AVX2-NEXT: testb $1, %dl 8440; AVX2-NEXT: je LBB58_64 8441; AVX2-NEXT: ## BB#63: ## %cond.store61 8442; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 8443; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rax) 8444; AVX2-NEXT: LBB58_64: ## %else62 8445; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8446; AVX2-NEXT: testb $1, %cl 8447; AVX2-NEXT: je LBB58_66 8448; AVX2-NEXT: ## BB#65: ## %cond.store63 8449; AVX2-NEXT: vpextrb $0, %xmm1, 32(%rax) 8450; AVX2-NEXT: LBB58_66: ## %else64 8451; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8452; AVX2-NEXT: testb $1, %dl 8453; AVX2-NEXT: je LBB58_68 8454; AVX2-NEXT: ## BB#67: ## %cond.store65 8455; AVX2-NEXT: vpextrb $1, %xmm1, 33(%rax) 8456; AVX2-NEXT: LBB58_68: ## %else66 8457; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8458; AVX2-NEXT: testb $1, %cl 8459; AVX2-NEXT: je LBB58_70 8460; AVX2-NEXT: ## BB#69: ## %cond.store67 8461; AVX2-NEXT: vpextrb $2, %xmm1, 34(%rax) 8462; AVX2-NEXT: LBB58_70: ## %else68 8463; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8464; AVX2-NEXT: testb $1, %dl 8465; AVX2-NEXT: je LBB58_72 8466; AVX2-NEXT: ## BB#71: ## %cond.store69 8467; AVX2-NEXT: vpextrb $3, %xmm1, 35(%rax) 8468; AVX2-NEXT: LBB58_72: ## %else70 8469; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8470; AVX2-NEXT: testb $1, %cl 8471; AVX2-NEXT: je LBB58_74 8472; AVX2-NEXT: ## BB#73: ## %cond.store71 8473; AVX2-NEXT: vpextrb $4, %xmm1, 36(%rax) 8474; AVX2-NEXT: LBB58_74: ## %else72 8475; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8476; AVX2-NEXT: testb $1, %dl 8477; AVX2-NEXT: je LBB58_76 8478; AVX2-NEXT: ## BB#75: ## %cond.store73 8479; AVX2-NEXT: vpextrb $5, %xmm1, 37(%rax) 8480; AVX2-NEXT: LBB58_76: ## %else74 8481; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8482; AVX2-NEXT: testb $1, %cl 8483; AVX2-NEXT: je LBB58_78 8484; AVX2-NEXT: ## BB#77: ## %cond.store75 8485; AVX2-NEXT: vpextrb $6, %xmm1, 38(%rax) 8486; AVX2-NEXT: LBB58_78: ## %else76 8487; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8488; AVX2-NEXT: testb $1, %dl 8489; AVX2-NEXT: je LBB58_80 8490; AVX2-NEXT: ## BB#79: ## %cond.store77 8491; AVX2-NEXT: vpextrb $7, %xmm1, 39(%rax) 8492; AVX2-NEXT: LBB58_80: ## %else78 8493; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8494; AVX2-NEXT: testb $1, %cl 8495; AVX2-NEXT: je LBB58_82 8496; AVX2-NEXT: ## BB#81: ## %cond.store79 8497; AVX2-NEXT: vpextrb $8, %xmm1, 40(%rax) 8498; AVX2-NEXT: LBB58_82: ## %else80 8499; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8500; AVX2-NEXT: testb $1, %dl 8501; AVX2-NEXT: je LBB58_84 8502; AVX2-NEXT: ## BB#83: ## %cond.store81 8503; AVX2-NEXT: vpextrb $9, %xmm1, 41(%rax) 8504; AVX2-NEXT: LBB58_84: ## %else82 8505; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8506; AVX2-NEXT: testb $1, %cl 8507; AVX2-NEXT: je LBB58_86 8508; AVX2-NEXT: ## BB#85: ## %cond.store83 8509; AVX2-NEXT: vpextrb $10, %xmm1, 42(%rax) 8510; AVX2-NEXT: LBB58_86: ## %else84 8511; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8512; AVX2-NEXT: testb $1, %dl 8513; AVX2-NEXT: je LBB58_88 8514; AVX2-NEXT: ## BB#87: ## %cond.store85 8515; AVX2-NEXT: vpextrb $11, %xmm1, 43(%rax) 8516; AVX2-NEXT: LBB58_88: ## %else86 8517; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8518; AVX2-NEXT: testb $1, %cl 8519; AVX2-NEXT: je LBB58_90 8520; AVX2-NEXT: ## BB#89: ## %cond.store87 8521; AVX2-NEXT: vpextrb $12, %xmm1, 44(%rax) 8522; AVX2-NEXT: LBB58_90: ## %else88 8523; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8524; AVX2-NEXT: testb $1, %dl 8525; AVX2-NEXT: je LBB58_92 8526; AVX2-NEXT: ## BB#91: ## %cond.store89 8527; AVX2-NEXT: vpextrb $13, %xmm1, 45(%rax) 8528; AVX2-NEXT: LBB58_92: ## %else90 8529; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8530; AVX2-NEXT: testb $1, %cl 8531; AVX2-NEXT: je LBB58_94 8532; AVX2-NEXT: ## BB#93: ## %cond.store91 8533; AVX2-NEXT: vpextrb $14, %xmm1, 46(%rax) 8534; AVX2-NEXT: LBB58_94: ## %else92 8535; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8536; AVX2-NEXT: testb $1, %dl 8537; AVX2-NEXT: je LBB58_96 8538; AVX2-NEXT: ## BB#95: ## %cond.store93 8539; AVX2-NEXT: vpextrb $15, %xmm1, 47(%rax) 8540; AVX2-NEXT: LBB58_96: ## %else94 8541; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8542; AVX2-NEXT: testb $1, %cl 8543; AVX2-NEXT: je LBB58_98 8544; AVX2-NEXT: ## BB#97: ## %cond.store95 8545; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8546; AVX2-NEXT: vpextrb $0, %xmm0, 48(%rax) 8547; AVX2-NEXT: LBB58_98: ## %else96 8548; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8549; AVX2-NEXT: testb $1, %dl 8550; AVX2-NEXT: je LBB58_100 8551; AVX2-NEXT: ## BB#99: ## %cond.store97 8552; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8553; AVX2-NEXT: vpextrb $1, %xmm0, 49(%rax) 8554; AVX2-NEXT: LBB58_100: ## %else98 8555; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8556; AVX2-NEXT: testb $1, %cl 8557; AVX2-NEXT: je LBB58_102 8558; AVX2-NEXT: ## BB#101: ## %cond.store99 8559; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8560; AVX2-NEXT: vpextrb $2, %xmm0, 50(%rax) 8561; AVX2-NEXT: LBB58_102: ## %else100 8562; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8563; AVX2-NEXT: testb $1, %dl 8564; AVX2-NEXT: je LBB58_104 8565; AVX2-NEXT: ## BB#103: ## %cond.store101 8566; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8567; AVX2-NEXT: vpextrb $3, %xmm0, 51(%rax) 8568; AVX2-NEXT: LBB58_104: ## %else102 8569; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8570; AVX2-NEXT: testb $1, %cl 8571; AVX2-NEXT: je LBB58_106 8572; AVX2-NEXT: ## BB#105: ## %cond.store103 8573; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8574; AVX2-NEXT: vpextrb $4, %xmm0, 52(%rax) 8575; AVX2-NEXT: LBB58_106: ## %else104 8576; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8577; AVX2-NEXT: testb $1, %dl 8578; AVX2-NEXT: je LBB58_108 8579; AVX2-NEXT: ## BB#107: ## %cond.store105 8580; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8581; AVX2-NEXT: vpextrb $5, %xmm0, 53(%rax) 8582; AVX2-NEXT: LBB58_108: ## %else106 8583; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8584; AVX2-NEXT: testb $1, %cl 8585; AVX2-NEXT: je LBB58_110 8586; AVX2-NEXT: ## BB#109: ## %cond.store107 8587; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8588; AVX2-NEXT: vpextrb $6, %xmm0, 54(%rax) 8589; AVX2-NEXT: LBB58_110: ## %else108 8590; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8591; AVX2-NEXT: testb $1, %dl 8592; AVX2-NEXT: je LBB58_112 8593; AVX2-NEXT: ## BB#111: ## %cond.store109 8594; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8595; AVX2-NEXT: vpextrb $7, %xmm0, 55(%rax) 8596; AVX2-NEXT: LBB58_112: ## %else110 8597; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8598; AVX2-NEXT: testb $1, %cl 8599; AVX2-NEXT: je LBB58_114 8600; AVX2-NEXT: ## BB#113: ## %cond.store111 8601; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8602; AVX2-NEXT: vpextrb $8, %xmm0, 56(%rax) 8603; AVX2-NEXT: LBB58_114: ## %else112 8604; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8605; AVX2-NEXT: testb $1, %dl 8606; AVX2-NEXT: je LBB58_116 8607; AVX2-NEXT: ## BB#115: ## %cond.store113 8608; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8609; AVX2-NEXT: vpextrb $9, %xmm0, 57(%rax) 8610; AVX2-NEXT: LBB58_116: ## %else114 8611; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8612; AVX2-NEXT: testb $1, %cl 8613; AVX2-NEXT: je LBB58_118 8614; AVX2-NEXT: ## BB#117: ## %cond.store115 8615; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8616; AVX2-NEXT: vpextrb $10, %xmm0, 58(%rax) 8617; AVX2-NEXT: LBB58_118: ## %else116 8618; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8619; AVX2-NEXT: testb $1, %dl 8620; AVX2-NEXT: je LBB58_120 8621; AVX2-NEXT: ## BB#119: ## %cond.store117 8622; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8623; AVX2-NEXT: vpextrb $11, %xmm0, 59(%rax) 8624; AVX2-NEXT: LBB58_120: ## %else118 8625; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8626; AVX2-NEXT: testb $1, %cl 8627; AVX2-NEXT: je LBB58_122 8628; AVX2-NEXT: ## BB#121: ## %cond.store119 8629; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8630; AVX2-NEXT: vpextrb $12, %xmm0, 60(%rax) 8631; AVX2-NEXT: LBB58_122: ## %else120 8632; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl 8633; AVX2-NEXT: testb $1, %dl 8634; AVX2-NEXT: je LBB58_124 8635; AVX2-NEXT: ## BB#123: ## %cond.store121 8636; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8637; AVX2-NEXT: vpextrb $13, %xmm0, 61(%rax) 8638; AVX2-NEXT: LBB58_124: ## %else122 8639; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl 8640; AVX2-NEXT: testb $1, %cl 8641; AVX2-NEXT: je LBB58_126 8642; AVX2-NEXT: ## BB#125: ## %cond.store123 8643; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8644; AVX2-NEXT: vpextrb $14, %xmm0, 62(%rax) 8645; AVX2-NEXT: LBB58_126: ## %else124 8646; AVX2-NEXT: testb $1, %dl 8647; AVX2-NEXT: je LBB58_128 8648; AVX2-NEXT: ## BB#127: ## %cond.store125 8649; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 8650; AVX2-NEXT: vpextrb $15, %xmm0, 63(%rax) 8651; AVX2-NEXT: LBB58_128: ## %else126 8652; AVX2-NEXT: vzeroupper 8653; AVX2-NEXT: retq 8654; 8655; AVX512F-LABEL: test_mask_store_64xi8: 8656; AVX512F: ## BB#0: 8657; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 8658; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 8659; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 8660; AVX512F-NEXT: kshiftlw $15, %k0, %k1 8661; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8662; AVX512F-NEXT: kmovw %k1, %eax 8663; AVX512F-NEXT: testb %al, %al 8664; AVX512F-NEXT: je LBB58_2 8665; AVX512F-NEXT: ## BB#1: ## %cond.store 8666; AVX512F-NEXT: vpextrb $0, %xmm4, (%rdi) 8667; AVX512F-NEXT: LBB58_2: ## %else 8668; AVX512F-NEXT: kshiftlw $14, %k0, %k1 8669; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8670; AVX512F-NEXT: kmovw %k1, %eax 8671; AVX512F-NEXT: testb %al, %al 8672; AVX512F-NEXT: je LBB58_4 8673; AVX512F-NEXT: ## BB#3: ## %cond.store1 8674; AVX512F-NEXT: vpextrb $1, %xmm4, 1(%rdi) 8675; AVX512F-NEXT: LBB58_4: ## %else2 8676; AVX512F-NEXT: kshiftlw $13, %k0, %k1 8677; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8678; AVX512F-NEXT: kmovw %k1, %eax 8679; AVX512F-NEXT: testb %al, %al 8680; AVX512F-NEXT: je LBB58_6 8681; AVX512F-NEXT: ## BB#5: ## %cond.store3 8682; AVX512F-NEXT: vpextrb $2, %xmm4, 2(%rdi) 8683; AVX512F-NEXT: LBB58_6: ## %else4 8684; AVX512F-NEXT: kshiftlw $12, %k0, %k1 8685; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8686; AVX512F-NEXT: kmovw %k1, %eax 8687; AVX512F-NEXT: testb %al, %al 8688; AVX512F-NEXT: je LBB58_8 8689; AVX512F-NEXT: ## BB#7: ## %cond.store5 8690; AVX512F-NEXT: vpextrb $3, %xmm4, 3(%rdi) 8691; AVX512F-NEXT: LBB58_8: ## %else6 8692; AVX512F-NEXT: kshiftlw $11, %k0, %k1 8693; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8694; AVX512F-NEXT: kmovw %k1, %eax 8695; AVX512F-NEXT: testb %al, %al 8696; AVX512F-NEXT: je LBB58_10 8697; AVX512F-NEXT: ## BB#9: ## %cond.store7 8698; AVX512F-NEXT: vpextrb $4, %xmm4, 4(%rdi) 8699; AVX512F-NEXT: LBB58_10: ## %else8 8700; AVX512F-NEXT: kshiftlw $10, %k0, %k1 8701; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8702; AVX512F-NEXT: kmovw %k1, %eax 8703; AVX512F-NEXT: testb %al, %al 8704; AVX512F-NEXT: je LBB58_12 8705; AVX512F-NEXT: ## BB#11: ## %cond.store9 8706; AVX512F-NEXT: vpextrb $5, %xmm4, 5(%rdi) 8707; AVX512F-NEXT: LBB58_12: ## %else10 8708; AVX512F-NEXT: kshiftlw $9, %k0, %k1 8709; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8710; AVX512F-NEXT: kmovw %k1, %eax 8711; AVX512F-NEXT: testb %al, %al 8712; AVX512F-NEXT: je LBB58_14 8713; AVX512F-NEXT: ## BB#13: ## %cond.store11 8714; AVX512F-NEXT: vpextrb $6, %xmm4, 6(%rdi) 8715; AVX512F-NEXT: LBB58_14: ## %else12 8716; AVX512F-NEXT: kshiftlw $8, %k0, %k1 8717; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8718; AVX512F-NEXT: kmovw %k1, %eax 8719; AVX512F-NEXT: testb %al, %al 8720; AVX512F-NEXT: je LBB58_16 8721; AVX512F-NEXT: ## BB#15: ## %cond.store13 8722; AVX512F-NEXT: vpextrb $7, %xmm4, 7(%rdi) 8723; AVX512F-NEXT: LBB58_16: ## %else14 8724; AVX512F-NEXT: kshiftlw $7, %k0, %k1 8725; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8726; AVX512F-NEXT: kmovw %k1, %eax 8727; AVX512F-NEXT: testb %al, %al 8728; AVX512F-NEXT: je LBB58_18 8729; AVX512F-NEXT: ## BB#17: ## %cond.store15 8730; AVX512F-NEXT: vpextrb $8, %xmm4, 8(%rdi) 8731; AVX512F-NEXT: LBB58_18: ## %else16 8732; AVX512F-NEXT: kshiftlw $6, %k0, %k1 8733; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8734; AVX512F-NEXT: kmovw %k1, %eax 8735; AVX512F-NEXT: testb %al, %al 8736; AVX512F-NEXT: je LBB58_20 8737; AVX512F-NEXT: ## BB#19: ## %cond.store17 8738; AVX512F-NEXT: vpextrb $9, %xmm4, 9(%rdi) 8739; AVX512F-NEXT: LBB58_20: ## %else18 8740; AVX512F-NEXT: kshiftlw $5, %k0, %k1 8741; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8742; AVX512F-NEXT: kmovw %k1, %eax 8743; AVX512F-NEXT: testb %al, %al 8744; AVX512F-NEXT: je LBB58_22 8745; AVX512F-NEXT: ## BB#21: ## %cond.store19 8746; AVX512F-NEXT: vpextrb $10, %xmm4, 10(%rdi) 8747; AVX512F-NEXT: LBB58_22: ## %else20 8748; AVX512F-NEXT: kshiftlw $4, %k0, %k1 8749; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8750; AVX512F-NEXT: kmovw %k1, %eax 8751; AVX512F-NEXT: testb %al, %al 8752; AVX512F-NEXT: je LBB58_24 8753; AVX512F-NEXT: ## BB#23: ## %cond.store21 8754; AVX512F-NEXT: vpextrb $11, %xmm4, 11(%rdi) 8755; AVX512F-NEXT: LBB58_24: ## %else22 8756; AVX512F-NEXT: kshiftlw $3, %k0, %k1 8757; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8758; AVX512F-NEXT: kmovw %k1, %eax 8759; AVX512F-NEXT: testb %al, %al 8760; AVX512F-NEXT: je LBB58_26 8761; AVX512F-NEXT: ## BB#25: ## %cond.store23 8762; AVX512F-NEXT: vpextrb $12, %xmm4, 12(%rdi) 8763; AVX512F-NEXT: LBB58_26: ## %else24 8764; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0 8765; AVX512F-NEXT: kshiftlw $2, %k0, %k1 8766; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8767; AVX512F-NEXT: kmovw %k1, %eax 8768; AVX512F-NEXT: testb %al, %al 8769; AVX512F-NEXT: je LBB58_28 8770; AVX512F-NEXT: ## BB#27: ## %cond.store25 8771; AVX512F-NEXT: vpextrb $13, %xmm4, 13(%rdi) 8772; AVX512F-NEXT: LBB58_28: ## %else26 8773; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 8774; AVX512F-NEXT: kshiftlw $1, %k0, %k1 8775; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8776; AVX512F-NEXT: kmovw %k1, %eax 8777; AVX512F-NEXT: testb %al, %al 8778; AVX512F-NEXT: je LBB58_30 8779; AVX512F-NEXT: ## BB#29: ## %cond.store27 8780; AVX512F-NEXT: vpextrb $14, %xmm4, 14(%rdi) 8781; AVX512F-NEXT: LBB58_30: ## %else28 8782; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 8783; AVX512F-NEXT: kshiftlw $0, %k0, %k0 8784; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8785; AVX512F-NEXT: kmovw %k0, %eax 8786; AVX512F-NEXT: testb %al, %al 8787; AVX512F-NEXT: je LBB58_32 8788; AVX512F-NEXT: ## BB#31: ## %cond.store29 8789; AVX512F-NEXT: vpextrb $15, %xmm4, 15(%rdi) 8790; AVX512F-NEXT: LBB58_32: ## %else30 8791; AVX512F-NEXT: kshiftlw $15, %k1, %k0 8792; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8793; AVX512F-NEXT: kmovw %k0, %eax 8794; AVX512F-NEXT: testb %al, %al 8795; AVX512F-NEXT: je LBB58_34 8796; AVX512F-NEXT: ## BB#33: ## %cond.store31 8797; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8798; AVX512F-NEXT: vpextrb $0, %xmm0, 16(%rdi) 8799; AVX512F-NEXT: LBB58_34: ## %else32 8800; AVX512F-NEXT: kshiftlw $14, %k1, %k0 8801; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8802; AVX512F-NEXT: kmovw %k0, %eax 8803; AVX512F-NEXT: testb %al, %al 8804; AVX512F-NEXT: je LBB58_36 8805; AVX512F-NEXT: ## BB#35: ## %cond.store33 8806; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8807; AVX512F-NEXT: vpextrb $1, %xmm0, 17(%rdi) 8808; AVX512F-NEXT: LBB58_36: ## %else34 8809; AVX512F-NEXT: kshiftlw $13, %k1, %k0 8810; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8811; AVX512F-NEXT: kmovw %k0, %eax 8812; AVX512F-NEXT: testb %al, %al 8813; AVX512F-NEXT: je LBB58_38 8814; AVX512F-NEXT: ## BB#37: ## %cond.store35 8815; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8816; AVX512F-NEXT: vpextrb $2, %xmm0, 18(%rdi) 8817; AVX512F-NEXT: LBB58_38: ## %else36 8818; AVX512F-NEXT: kshiftlw $12, %k1, %k0 8819; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8820; AVX512F-NEXT: kmovw %k0, %eax 8821; AVX512F-NEXT: testb %al, %al 8822; AVX512F-NEXT: je LBB58_40 8823; AVX512F-NEXT: ## BB#39: ## %cond.store37 8824; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8825; AVX512F-NEXT: vpextrb $3, %xmm0, 19(%rdi) 8826; AVX512F-NEXT: LBB58_40: ## %else38 8827; AVX512F-NEXT: kshiftlw $11, %k1, %k0 8828; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8829; AVX512F-NEXT: kmovw %k0, %eax 8830; AVX512F-NEXT: testb %al, %al 8831; AVX512F-NEXT: je LBB58_42 8832; AVX512F-NEXT: ## BB#41: ## %cond.store39 8833; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8834; AVX512F-NEXT: vpextrb $4, %xmm0, 20(%rdi) 8835; AVX512F-NEXT: LBB58_42: ## %else40 8836; AVX512F-NEXT: kshiftlw $10, %k1, %k0 8837; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8838; AVX512F-NEXT: kmovw %k0, %eax 8839; AVX512F-NEXT: testb %al, %al 8840; AVX512F-NEXT: je LBB58_44 8841; AVX512F-NEXT: ## BB#43: ## %cond.store41 8842; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8843; AVX512F-NEXT: vpextrb $5, %xmm0, 21(%rdi) 8844; AVX512F-NEXT: LBB58_44: ## %else42 8845; AVX512F-NEXT: kshiftlw $9, %k1, %k0 8846; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8847; AVX512F-NEXT: kmovw %k0, %eax 8848; AVX512F-NEXT: testb %al, %al 8849; AVX512F-NEXT: je LBB58_46 8850; AVX512F-NEXT: ## BB#45: ## %cond.store43 8851; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8852; AVX512F-NEXT: vpextrb $6, %xmm0, 22(%rdi) 8853; AVX512F-NEXT: LBB58_46: ## %else44 8854; AVX512F-NEXT: kshiftlw $8, %k1, %k0 8855; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8856; AVX512F-NEXT: kmovw %k0, %eax 8857; AVX512F-NEXT: testb %al, %al 8858; AVX512F-NEXT: je LBB58_48 8859; AVX512F-NEXT: ## BB#47: ## %cond.store45 8860; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8861; AVX512F-NEXT: vpextrb $7, %xmm0, 23(%rdi) 8862; AVX512F-NEXT: LBB58_48: ## %else46 8863; AVX512F-NEXT: kshiftlw $7, %k1, %k0 8864; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8865; AVX512F-NEXT: kmovw %k0, %eax 8866; AVX512F-NEXT: testb %al, %al 8867; AVX512F-NEXT: je LBB58_50 8868; AVX512F-NEXT: ## BB#49: ## %cond.store47 8869; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8870; AVX512F-NEXT: vpextrb $8, %xmm0, 24(%rdi) 8871; AVX512F-NEXT: LBB58_50: ## %else48 8872; AVX512F-NEXT: kshiftlw $6, %k1, %k0 8873; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8874; AVX512F-NEXT: kmovw %k0, %eax 8875; AVX512F-NEXT: testb %al, %al 8876; AVX512F-NEXT: je LBB58_52 8877; AVX512F-NEXT: ## BB#51: ## %cond.store49 8878; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8879; AVX512F-NEXT: vpextrb $9, %xmm0, 25(%rdi) 8880; AVX512F-NEXT: LBB58_52: ## %else50 8881; AVX512F-NEXT: kshiftlw $5, %k1, %k0 8882; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8883; AVX512F-NEXT: kmovw %k0, %eax 8884; AVX512F-NEXT: testb %al, %al 8885; AVX512F-NEXT: je LBB58_54 8886; AVX512F-NEXT: ## BB#53: ## %cond.store51 8887; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8888; AVX512F-NEXT: vpextrb $10, %xmm0, 26(%rdi) 8889; AVX512F-NEXT: LBB58_54: ## %else52 8890; AVX512F-NEXT: kshiftlw $4, %k1, %k0 8891; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8892; AVX512F-NEXT: kmovw %k0, %eax 8893; AVX512F-NEXT: testb %al, %al 8894; AVX512F-NEXT: je LBB58_56 8895; AVX512F-NEXT: ## BB#55: ## %cond.store53 8896; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8897; AVX512F-NEXT: vpextrb $11, %xmm0, 27(%rdi) 8898; AVX512F-NEXT: LBB58_56: ## %else54 8899; AVX512F-NEXT: kshiftlw $3, %k1, %k0 8900; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8901; AVX512F-NEXT: kmovw %k0, %eax 8902; AVX512F-NEXT: testb %al, %al 8903; AVX512F-NEXT: je LBB58_58 8904; AVX512F-NEXT: ## BB#57: ## %cond.store55 8905; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8906; AVX512F-NEXT: vpextrb $12, %xmm0, 28(%rdi) 8907; AVX512F-NEXT: LBB58_58: ## %else56 8908; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm0 8909; AVX512F-NEXT: kshiftlw $2, %k1, %k0 8910; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8911; AVX512F-NEXT: kmovw %k0, %eax 8912; AVX512F-NEXT: testb %al, %al 8913; AVX512F-NEXT: je LBB58_60 8914; AVX512F-NEXT: ## BB#59: ## %cond.store57 8915; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm1 8916; AVX512F-NEXT: vpextrb $13, %xmm1, 29(%rdi) 8917; AVX512F-NEXT: LBB58_60: ## %else58 8918; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 8919; AVX512F-NEXT: kshiftlw $1, %k1, %k0 8920; AVX512F-NEXT: kshiftrw $15, %k0, %k0 8921; AVX512F-NEXT: kmovw %k0, %eax 8922; AVX512F-NEXT: testb %al, %al 8923; AVX512F-NEXT: je LBB58_62 8924; AVX512F-NEXT: ## BB#61: ## %cond.store59 8925; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm1 8926; AVX512F-NEXT: vpextrb $14, %xmm1, 30(%rdi) 8927; AVX512F-NEXT: LBB58_62: ## %else60 8928; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 8929; AVX512F-NEXT: kshiftlw $0, %k1, %k1 8930; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8931; AVX512F-NEXT: kmovw %k1, %eax 8932; AVX512F-NEXT: testb %al, %al 8933; AVX512F-NEXT: je LBB58_64 8934; AVX512F-NEXT: ## BB#63: ## %cond.store61 8935; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 8936; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi) 8937; AVX512F-NEXT: LBB58_64: ## %else62 8938; AVX512F-NEXT: kshiftlw $15, %k0, %k1 8939; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8940; AVX512F-NEXT: kmovw %k1, %eax 8941; AVX512F-NEXT: testb %al, %al 8942; AVX512F-NEXT: je LBB58_66 8943; AVX512F-NEXT: ## BB#65: ## %cond.store63 8944; AVX512F-NEXT: vpextrb $0, %xmm5, 32(%rdi) 8945; AVX512F-NEXT: LBB58_66: ## %else64 8946; AVX512F-NEXT: kshiftlw $14, %k0, %k1 8947; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8948; AVX512F-NEXT: kmovw %k1, %eax 8949; AVX512F-NEXT: testb %al, %al 8950; AVX512F-NEXT: je LBB58_68 8951; AVX512F-NEXT: ## BB#67: ## %cond.store65 8952; AVX512F-NEXT: vpextrb $1, %xmm5, 33(%rdi) 8953; AVX512F-NEXT: LBB58_68: ## %else66 8954; AVX512F-NEXT: kshiftlw $13, %k0, %k1 8955; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8956; AVX512F-NEXT: kmovw %k1, %eax 8957; AVX512F-NEXT: testb %al, %al 8958; AVX512F-NEXT: je LBB58_70 8959; AVX512F-NEXT: ## BB#69: ## %cond.store67 8960; AVX512F-NEXT: vpextrb $2, %xmm5, 34(%rdi) 8961; AVX512F-NEXT: LBB58_70: ## %else68 8962; AVX512F-NEXT: kshiftlw $12, %k0, %k1 8963; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8964; AVX512F-NEXT: kmovw %k1, %eax 8965; AVX512F-NEXT: testb %al, %al 8966; AVX512F-NEXT: je LBB58_72 8967; AVX512F-NEXT: ## BB#71: ## %cond.store69 8968; AVX512F-NEXT: vpextrb $3, %xmm5, 35(%rdi) 8969; AVX512F-NEXT: LBB58_72: ## %else70 8970; AVX512F-NEXT: kshiftlw $11, %k0, %k1 8971; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8972; AVX512F-NEXT: kmovw %k1, %eax 8973; AVX512F-NEXT: testb %al, %al 8974; AVX512F-NEXT: je LBB58_74 8975; AVX512F-NEXT: ## BB#73: ## %cond.store71 8976; AVX512F-NEXT: vpextrb $4, %xmm5, 36(%rdi) 8977; AVX512F-NEXT: LBB58_74: ## %else72 8978; AVX512F-NEXT: kshiftlw $10, %k0, %k1 8979; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8980; AVX512F-NEXT: kmovw %k1, %eax 8981; AVX512F-NEXT: testb %al, %al 8982; AVX512F-NEXT: je LBB58_76 8983; AVX512F-NEXT: ## BB#75: ## %cond.store73 8984; AVX512F-NEXT: vpextrb $5, %xmm5, 37(%rdi) 8985; AVX512F-NEXT: LBB58_76: ## %else74 8986; AVX512F-NEXT: kshiftlw $9, %k0, %k1 8987; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8988; AVX512F-NEXT: kmovw %k1, %eax 8989; AVX512F-NEXT: testb %al, %al 8990; AVX512F-NEXT: je LBB58_78 8991; AVX512F-NEXT: ## BB#77: ## %cond.store75 8992; AVX512F-NEXT: vpextrb $6, %xmm5, 38(%rdi) 8993; AVX512F-NEXT: LBB58_78: ## %else76 8994; AVX512F-NEXT: kshiftlw $8, %k0, %k1 8995; AVX512F-NEXT: kshiftrw $15, %k1, %k1 8996; AVX512F-NEXT: kmovw %k1, %eax 8997; AVX512F-NEXT: testb %al, %al 8998; AVX512F-NEXT: je LBB58_80 8999; AVX512F-NEXT: ## BB#79: ## %cond.store77 9000; AVX512F-NEXT: vpextrb $7, %xmm5, 39(%rdi) 9001; AVX512F-NEXT: LBB58_80: ## %else78 9002; AVX512F-NEXT: kshiftlw $7, %k0, %k1 9003; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9004; AVX512F-NEXT: kmovw %k1, %eax 9005; AVX512F-NEXT: testb %al, %al 9006; AVX512F-NEXT: je LBB58_82 9007; AVX512F-NEXT: ## BB#81: ## %cond.store79 9008; AVX512F-NEXT: vpextrb $8, %xmm5, 40(%rdi) 9009; AVX512F-NEXT: LBB58_82: ## %else80 9010; AVX512F-NEXT: kshiftlw $6, %k0, %k1 9011; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9012; AVX512F-NEXT: kmovw %k1, %eax 9013; AVX512F-NEXT: testb %al, %al 9014; AVX512F-NEXT: je LBB58_84 9015; AVX512F-NEXT: ## BB#83: ## %cond.store81 9016; AVX512F-NEXT: vpextrb $9, %xmm5, 41(%rdi) 9017; AVX512F-NEXT: LBB58_84: ## %else82 9018; AVX512F-NEXT: kshiftlw $5, %k0, %k1 9019; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9020; AVX512F-NEXT: kmovw %k1, %eax 9021; AVX512F-NEXT: testb %al, %al 9022; AVX512F-NEXT: je LBB58_86 9023; AVX512F-NEXT: ## BB#85: ## %cond.store83 9024; AVX512F-NEXT: vpextrb $10, %xmm5, 42(%rdi) 9025; AVX512F-NEXT: LBB58_86: ## %else84 9026; AVX512F-NEXT: kshiftlw $4, %k0, %k1 9027; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9028; AVX512F-NEXT: kmovw %k1, %eax 9029; AVX512F-NEXT: testb %al, %al 9030; AVX512F-NEXT: je LBB58_88 9031; AVX512F-NEXT: ## BB#87: ## %cond.store85 9032; AVX512F-NEXT: vpextrb $11, %xmm5, 43(%rdi) 9033; AVX512F-NEXT: LBB58_88: ## %else86 9034; AVX512F-NEXT: kshiftlw $3, %k0, %k1 9035; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9036; AVX512F-NEXT: kmovw %k1, %eax 9037; AVX512F-NEXT: testb %al, %al 9038; AVX512F-NEXT: je LBB58_90 9039; AVX512F-NEXT: ## BB#89: ## %cond.store87 9040; AVX512F-NEXT: vpextrb $12, %xmm5, 44(%rdi) 9041; AVX512F-NEXT: LBB58_90: ## %else88 9042; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm0 9043; AVX512F-NEXT: kshiftlw $2, %k0, %k1 9044; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9045; AVX512F-NEXT: kmovw %k1, %eax 9046; AVX512F-NEXT: testb %al, %al 9047; AVX512F-NEXT: je LBB58_92 9048; AVX512F-NEXT: ## BB#91: ## %cond.store89 9049; AVX512F-NEXT: vpextrb $13, %xmm5, 45(%rdi) 9050; AVX512F-NEXT: LBB58_92: ## %else90 9051; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 9052; AVX512F-NEXT: kshiftlw $1, %k0, %k1 9053; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9054; AVX512F-NEXT: kmovw %k1, %eax 9055; AVX512F-NEXT: testb %al, %al 9056; AVX512F-NEXT: je LBB58_94 9057; AVX512F-NEXT: ## BB#93: ## %cond.store91 9058; AVX512F-NEXT: vpextrb $14, %xmm5, 46(%rdi) 9059; AVX512F-NEXT: LBB58_94: ## %else92 9060; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 9061; AVX512F-NEXT: kshiftlw $0, %k0, %k0 9062; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9063; AVX512F-NEXT: kmovw %k0, %eax 9064; AVX512F-NEXT: testb %al, %al 9065; AVX512F-NEXT: je LBB58_96 9066; AVX512F-NEXT: ## BB#95: ## %cond.store93 9067; AVX512F-NEXT: vpextrb $15, %xmm5, 47(%rdi) 9068; AVX512F-NEXT: LBB58_96: ## %else94 9069; AVX512F-NEXT: kshiftlw $15, %k1, %k0 9070; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9071; AVX512F-NEXT: kmovw %k0, %eax 9072; AVX512F-NEXT: testb %al, %al 9073; AVX512F-NEXT: je LBB58_98 9074; AVX512F-NEXT: ## BB#97: ## %cond.store95 9075; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9076; AVX512F-NEXT: vpextrb $0, %xmm0, 48(%rdi) 9077; AVX512F-NEXT: LBB58_98: ## %else96 9078; AVX512F-NEXT: kshiftlw $14, %k1, %k0 9079; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9080; AVX512F-NEXT: kmovw %k0, %eax 9081; AVX512F-NEXT: testb %al, %al 9082; AVX512F-NEXT: je LBB58_100 9083; AVX512F-NEXT: ## BB#99: ## %cond.store97 9084; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9085; AVX512F-NEXT: vpextrb $1, %xmm0, 49(%rdi) 9086; AVX512F-NEXT: LBB58_100: ## %else98 9087; AVX512F-NEXT: kshiftlw $13, %k1, %k0 9088; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9089; AVX512F-NEXT: kmovw %k0, %eax 9090; AVX512F-NEXT: testb %al, %al 9091; AVX512F-NEXT: je LBB58_102 9092; AVX512F-NEXT: ## BB#101: ## %cond.store99 9093; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9094; AVX512F-NEXT: vpextrb $2, %xmm0, 50(%rdi) 9095; AVX512F-NEXT: LBB58_102: ## %else100 9096; AVX512F-NEXT: kshiftlw $12, %k1, %k0 9097; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9098; AVX512F-NEXT: kmovw %k0, %eax 9099; AVX512F-NEXT: testb %al, %al 9100; AVX512F-NEXT: je LBB58_104 9101; AVX512F-NEXT: ## BB#103: ## %cond.store101 9102; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9103; AVX512F-NEXT: vpextrb $3, %xmm0, 51(%rdi) 9104; AVX512F-NEXT: LBB58_104: ## %else102 9105; AVX512F-NEXT: kshiftlw $11, %k1, %k0 9106; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9107; AVX512F-NEXT: kmovw %k0, %eax 9108; AVX512F-NEXT: testb %al, %al 9109; AVX512F-NEXT: je LBB58_106 9110; AVX512F-NEXT: ## BB#105: ## %cond.store103 9111; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9112; AVX512F-NEXT: vpextrb $4, %xmm0, 52(%rdi) 9113; AVX512F-NEXT: LBB58_106: ## %else104 9114; AVX512F-NEXT: kshiftlw $10, %k1, %k0 9115; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9116; AVX512F-NEXT: kmovw %k0, %eax 9117; AVX512F-NEXT: testb %al, %al 9118; AVX512F-NEXT: je LBB58_108 9119; AVX512F-NEXT: ## BB#107: ## %cond.store105 9120; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9121; AVX512F-NEXT: vpextrb $5, %xmm0, 53(%rdi) 9122; AVX512F-NEXT: LBB58_108: ## %else106 9123; AVX512F-NEXT: kshiftlw $9, %k1, %k0 9124; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9125; AVX512F-NEXT: kmovw %k0, %eax 9126; AVX512F-NEXT: testb %al, %al 9127; AVX512F-NEXT: je LBB58_110 9128; AVX512F-NEXT: ## BB#109: ## %cond.store107 9129; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9130; AVX512F-NEXT: vpextrb $6, %xmm0, 54(%rdi) 9131; AVX512F-NEXT: LBB58_110: ## %else108 9132; AVX512F-NEXT: kshiftlw $8, %k1, %k0 9133; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9134; AVX512F-NEXT: kmovw %k0, %eax 9135; AVX512F-NEXT: testb %al, %al 9136; AVX512F-NEXT: je LBB58_112 9137; AVX512F-NEXT: ## BB#111: ## %cond.store109 9138; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9139; AVX512F-NEXT: vpextrb $7, %xmm0, 55(%rdi) 9140; AVX512F-NEXT: LBB58_112: ## %else110 9141; AVX512F-NEXT: kshiftlw $7, %k1, %k0 9142; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9143; AVX512F-NEXT: kmovw %k0, %eax 9144; AVX512F-NEXT: testb %al, %al 9145; AVX512F-NEXT: je LBB58_114 9146; AVX512F-NEXT: ## BB#113: ## %cond.store111 9147; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9148; AVX512F-NEXT: vpextrb $8, %xmm0, 56(%rdi) 9149; AVX512F-NEXT: LBB58_114: ## %else112 9150; AVX512F-NEXT: kshiftlw $6, %k1, %k0 9151; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9152; AVX512F-NEXT: kmovw %k0, %eax 9153; AVX512F-NEXT: testb %al, %al 9154; AVX512F-NEXT: je LBB58_116 9155; AVX512F-NEXT: ## BB#115: ## %cond.store113 9156; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9157; AVX512F-NEXT: vpextrb $9, %xmm0, 57(%rdi) 9158; AVX512F-NEXT: LBB58_116: ## %else114 9159; AVX512F-NEXT: kshiftlw $5, %k1, %k0 9160; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9161; AVX512F-NEXT: kmovw %k0, %eax 9162; AVX512F-NEXT: testb %al, %al 9163; AVX512F-NEXT: je LBB58_118 9164; AVX512F-NEXT: ## BB#117: ## %cond.store115 9165; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9166; AVX512F-NEXT: vpextrb $10, %xmm0, 58(%rdi) 9167; AVX512F-NEXT: LBB58_118: ## %else116 9168; AVX512F-NEXT: kshiftlw $4, %k1, %k0 9169; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9170; AVX512F-NEXT: kmovw %k0, %eax 9171; AVX512F-NEXT: testb %al, %al 9172; AVX512F-NEXT: je LBB58_120 9173; AVX512F-NEXT: ## BB#119: ## %cond.store117 9174; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9175; AVX512F-NEXT: vpextrb $11, %xmm0, 59(%rdi) 9176; AVX512F-NEXT: LBB58_120: ## %else118 9177; AVX512F-NEXT: kshiftlw $3, %k1, %k0 9178; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9179; AVX512F-NEXT: kmovw %k0, %eax 9180; AVX512F-NEXT: testb %al, %al 9181; AVX512F-NEXT: je LBB58_122 9182; AVX512F-NEXT: ## BB#121: ## %cond.store119 9183; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9184; AVX512F-NEXT: vpextrb $12, %xmm0, 60(%rdi) 9185; AVX512F-NEXT: LBB58_122: ## %else120 9186; AVX512F-NEXT: kshiftlw $2, %k1, %k0 9187; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9188; AVX512F-NEXT: kmovw %k0, %eax 9189; AVX512F-NEXT: testb %al, %al 9190; AVX512F-NEXT: je LBB58_124 9191; AVX512F-NEXT: ## BB#123: ## %cond.store121 9192; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9193; AVX512F-NEXT: vpextrb $13, %xmm0, 61(%rdi) 9194; AVX512F-NEXT: LBB58_124: ## %else122 9195; AVX512F-NEXT: kshiftlw $1, %k1, %k0 9196; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9197; AVX512F-NEXT: kmovw %k0, %eax 9198; AVX512F-NEXT: testb %al, %al 9199; AVX512F-NEXT: je LBB58_126 9200; AVX512F-NEXT: ## BB#125: ## %cond.store123 9201; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9202; AVX512F-NEXT: vpextrb $14, %xmm0, 62(%rdi) 9203; AVX512F-NEXT: LBB58_126: ## %else124 9204; AVX512F-NEXT: kshiftlw $0, %k1, %k0 9205; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9206; AVX512F-NEXT: kmovw %k0, %eax 9207; AVX512F-NEXT: testb %al, %al 9208; AVX512F-NEXT: je LBB58_128 9209; AVX512F-NEXT: ## BB#127: ## %cond.store125 9210; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 9211; AVX512F-NEXT: vpextrb $15, %xmm0, 63(%rdi) 9212; AVX512F-NEXT: LBB58_128: ## %else126 9213; AVX512F-NEXT: retq 9214; 9215; SKX-LABEL: test_mask_store_64xi8: 9216; SKX: ## BB#0: 9217; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 9218; SKX-NEXT: vpmovb2m %zmm0, %k1 9219; SKX-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 9220; SKX-NEXT: retq 9221 call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask) 9222 ret void 9223} 9224declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) 9225 9226define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 9227; AVX-LABEL: test_mask_store_8xi16: 9228; AVX: ## BB#0: 9229; AVX-NEXT: vpextrb $0, %xmm0, %eax 9230; AVX-NEXT: testb $1, %al 9231; AVX-NEXT: je LBB59_2 9232; AVX-NEXT: ## BB#1: ## %cond.store 9233; AVX-NEXT: vmovd %xmm1, %eax 9234; AVX-NEXT: movw %ax, (%rdi) 9235; AVX-NEXT: LBB59_2: ## %else 9236; AVX-NEXT: vpextrb $2, %xmm0, %eax 9237; AVX-NEXT: testb $1, %al 9238; AVX-NEXT: je LBB59_4 9239; AVX-NEXT: ## BB#3: ## %cond.store1 9240; AVX-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9241; AVX-NEXT: LBB59_4: ## %else2 9242; AVX-NEXT: vpextrb $4, %xmm0, %eax 9243; AVX-NEXT: testb $1, %al 9244; AVX-NEXT: je LBB59_6 9245; AVX-NEXT: ## BB#5: ## %cond.store3 9246; AVX-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9247; AVX-NEXT: LBB59_6: ## %else4 9248; AVX-NEXT: vpextrb $6, %xmm0, %eax 9249; AVX-NEXT: testb $1, %al 9250; AVX-NEXT: je LBB59_8 9251; AVX-NEXT: ## BB#7: ## %cond.store5 9252; AVX-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9253; AVX-NEXT: LBB59_8: ## %else6 9254; AVX-NEXT: vpextrb $8, %xmm0, %eax 9255; AVX-NEXT: testb $1, %al 9256; AVX-NEXT: je LBB59_10 9257; AVX-NEXT: ## BB#9: ## %cond.store7 9258; AVX-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9259; AVX-NEXT: LBB59_10: ## %else8 9260; AVX-NEXT: vpextrb $10, %xmm0, %eax 9261; AVX-NEXT: testb $1, %al 9262; AVX-NEXT: je LBB59_12 9263; AVX-NEXT: ## BB#11: ## %cond.store9 9264; AVX-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9265; AVX-NEXT: LBB59_12: ## %else10 9266; AVX-NEXT: vpextrb $12, %xmm0, %eax 9267; AVX-NEXT: testb $1, %al 9268; AVX-NEXT: je LBB59_14 9269; AVX-NEXT: ## BB#13: ## %cond.store11 9270; AVX-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9271; AVX-NEXT: LBB59_14: ## %else12 9272; AVX-NEXT: vpextrb $14, %xmm0, %eax 9273; AVX-NEXT: testb $1, %al 9274; AVX-NEXT: je LBB59_16 9275; AVX-NEXT: ## BB#15: ## %cond.store13 9276; AVX-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9277; AVX-NEXT: LBB59_16: ## %else14 9278; AVX-NEXT: retq 9279; 9280; AVX512F-LABEL: test_mask_store_8xi16: 9281; AVX512F: ## BB#0: 9282; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 9283; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 9284; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 9285; AVX512F-NEXT: kshiftlw $15, %k0, %k1 9286; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9287; AVX512F-NEXT: kmovw %k1, %eax 9288; AVX512F-NEXT: testb %al, %al 9289; AVX512F-NEXT: je LBB59_2 9290; AVX512F-NEXT: ## BB#1: ## %cond.store 9291; AVX512F-NEXT: vmovd %xmm1, %eax 9292; AVX512F-NEXT: movw %ax, (%rdi) 9293; AVX512F-NEXT: LBB59_2: ## %else 9294; AVX512F-NEXT: kshiftlw $14, %k0, %k1 9295; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9296; AVX512F-NEXT: kmovw %k1, %eax 9297; AVX512F-NEXT: testb %al, %al 9298; AVX512F-NEXT: je LBB59_4 9299; AVX512F-NEXT: ## BB#3: ## %cond.store1 9300; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9301; AVX512F-NEXT: LBB59_4: ## %else2 9302; AVX512F-NEXT: kshiftlw $13, %k0, %k1 9303; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9304; AVX512F-NEXT: kmovw %k1, %eax 9305; AVX512F-NEXT: testb %al, %al 9306; AVX512F-NEXT: je LBB59_6 9307; AVX512F-NEXT: ## BB#5: ## %cond.store3 9308; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9309; AVX512F-NEXT: LBB59_6: ## %else4 9310; AVX512F-NEXT: kshiftlw $12, %k0, %k1 9311; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9312; AVX512F-NEXT: kmovw %k1, %eax 9313; AVX512F-NEXT: testb %al, %al 9314; AVX512F-NEXT: je LBB59_8 9315; AVX512F-NEXT: ## BB#7: ## %cond.store5 9316; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9317; AVX512F-NEXT: LBB59_8: ## %else6 9318; AVX512F-NEXT: kshiftlw $11, %k0, %k1 9319; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9320; AVX512F-NEXT: kmovw %k1, %eax 9321; AVX512F-NEXT: testb %al, %al 9322; AVX512F-NEXT: je LBB59_10 9323; AVX512F-NEXT: ## BB#9: ## %cond.store7 9324; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9325; AVX512F-NEXT: LBB59_10: ## %else8 9326; AVX512F-NEXT: kshiftlw $10, %k0, %k1 9327; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9328; AVX512F-NEXT: kmovw %k1, %eax 9329; AVX512F-NEXT: testb %al, %al 9330; AVX512F-NEXT: je LBB59_12 9331; AVX512F-NEXT: ## BB#11: ## %cond.store9 9332; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9333; AVX512F-NEXT: LBB59_12: ## %else10 9334; AVX512F-NEXT: kshiftlw $9, %k0, %k1 9335; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9336; AVX512F-NEXT: kmovw %k1, %eax 9337; AVX512F-NEXT: testb %al, %al 9338; AVX512F-NEXT: je LBB59_14 9339; AVX512F-NEXT: ## BB#13: ## %cond.store11 9340; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9341; AVX512F-NEXT: LBB59_14: ## %else12 9342; AVX512F-NEXT: kshiftlw $8, %k0, %k0 9343; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9344; AVX512F-NEXT: kmovw %k0, %eax 9345; AVX512F-NEXT: testb %al, %al 9346; AVX512F-NEXT: je LBB59_16 9347; AVX512F-NEXT: ## BB#15: ## %cond.store13 9348; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9349; AVX512F-NEXT: LBB59_16: ## %else14 9350; AVX512F-NEXT: retq 9351; 9352; SKX-LABEL: test_mask_store_8xi16: 9353; SKX: ## BB#0: 9354; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 9355; SKX-NEXT: vpmovw2m %xmm0, %k1 9356; SKX-NEXT: vmovdqu16 %xmm1, (%rdi) {%k1} 9357; SKX-NEXT: retq 9358 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 9359 ret void 9360} 9361declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 9362 9363define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 9364; AVX1-LABEL: test_mask_store_16xi16: 9365; AVX1: ## BB#0: 9366; AVX1-NEXT: vpextrb $0, %xmm0, %eax 9367; AVX1-NEXT: testb $1, %al 9368; AVX1-NEXT: je LBB60_2 9369; AVX1-NEXT: ## BB#1: ## %cond.store 9370; AVX1-NEXT: vmovd %xmm1, %eax 9371; AVX1-NEXT: movw %ax, (%rdi) 9372; AVX1-NEXT: LBB60_2: ## %else 9373; AVX1-NEXT: vpextrb $1, %xmm0, %eax 9374; AVX1-NEXT: testb $1, %al 9375; AVX1-NEXT: je LBB60_4 9376; AVX1-NEXT: ## BB#3: ## %cond.store1 9377; AVX1-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9378; AVX1-NEXT: LBB60_4: ## %else2 9379; AVX1-NEXT: vpextrb $2, %xmm0, %eax 9380; AVX1-NEXT: testb $1, %al 9381; AVX1-NEXT: je LBB60_6 9382; AVX1-NEXT: ## BB#5: ## %cond.store3 9383; AVX1-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9384; AVX1-NEXT: LBB60_6: ## %else4 9385; AVX1-NEXT: vpextrb $3, %xmm0, %eax 9386; AVX1-NEXT: testb $1, %al 9387; AVX1-NEXT: je LBB60_8 9388; AVX1-NEXT: ## BB#7: ## %cond.store5 9389; AVX1-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9390; AVX1-NEXT: LBB60_8: ## %else6 9391; AVX1-NEXT: vpextrb $4, %xmm0, %eax 9392; AVX1-NEXT: testb $1, %al 9393; AVX1-NEXT: je LBB60_10 9394; AVX1-NEXT: ## BB#9: ## %cond.store7 9395; AVX1-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9396; AVX1-NEXT: LBB60_10: ## %else8 9397; AVX1-NEXT: vpextrb $5, %xmm0, %eax 9398; AVX1-NEXT: testb $1, %al 9399; AVX1-NEXT: je LBB60_12 9400; AVX1-NEXT: ## BB#11: ## %cond.store9 9401; AVX1-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9402; AVX1-NEXT: LBB60_12: ## %else10 9403; AVX1-NEXT: vpextrb $6, %xmm0, %eax 9404; AVX1-NEXT: testb $1, %al 9405; AVX1-NEXT: je LBB60_14 9406; AVX1-NEXT: ## BB#13: ## %cond.store11 9407; AVX1-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9408; AVX1-NEXT: LBB60_14: ## %else12 9409; AVX1-NEXT: vpextrb $7, %xmm0, %eax 9410; AVX1-NEXT: testb $1, %al 9411; AVX1-NEXT: je LBB60_16 9412; AVX1-NEXT: ## BB#15: ## %cond.store13 9413; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9414; AVX1-NEXT: LBB60_16: ## %else14 9415; AVX1-NEXT: vpextrb $8, %xmm0, %eax 9416; AVX1-NEXT: testb $1, %al 9417; AVX1-NEXT: je LBB60_18 9418; AVX1-NEXT: ## BB#17: ## %cond.store15 9419; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9420; AVX1-NEXT: vmovd %xmm2, %eax 9421; AVX1-NEXT: movw %ax, 16(%rdi) 9422; AVX1-NEXT: LBB60_18: ## %else16 9423; AVX1-NEXT: vpextrb $9, %xmm0, %eax 9424; AVX1-NEXT: testb $1, %al 9425; AVX1-NEXT: je LBB60_20 9426; AVX1-NEXT: ## BB#19: ## %cond.store17 9427; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9428; AVX1-NEXT: vpextrw $1, %xmm2, 18(%rdi) 9429; AVX1-NEXT: LBB60_20: ## %else18 9430; AVX1-NEXT: vpextrb $10, %xmm0, %eax 9431; AVX1-NEXT: testb $1, %al 9432; AVX1-NEXT: je LBB60_22 9433; AVX1-NEXT: ## BB#21: ## %cond.store19 9434; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9435; AVX1-NEXT: vpextrw $2, %xmm2, 20(%rdi) 9436; AVX1-NEXT: LBB60_22: ## %else20 9437; AVX1-NEXT: vpextrb $11, %xmm0, %eax 9438; AVX1-NEXT: testb $1, %al 9439; AVX1-NEXT: je LBB60_24 9440; AVX1-NEXT: ## BB#23: ## %cond.store21 9441; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9442; AVX1-NEXT: vpextrw $3, %xmm2, 22(%rdi) 9443; AVX1-NEXT: LBB60_24: ## %else22 9444; AVX1-NEXT: vpextrb $12, %xmm0, %eax 9445; AVX1-NEXT: testb $1, %al 9446; AVX1-NEXT: je LBB60_26 9447; AVX1-NEXT: ## BB#25: ## %cond.store23 9448; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9449; AVX1-NEXT: vpextrw $4, %xmm2, 24(%rdi) 9450; AVX1-NEXT: LBB60_26: ## %else24 9451; AVX1-NEXT: vpextrb $13, %xmm0, %eax 9452; AVX1-NEXT: testb $1, %al 9453; AVX1-NEXT: je LBB60_28 9454; AVX1-NEXT: ## BB#27: ## %cond.store25 9455; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9456; AVX1-NEXT: vpextrw $5, %xmm2, 26(%rdi) 9457; AVX1-NEXT: LBB60_28: ## %else26 9458; AVX1-NEXT: vpextrb $14, %xmm0, %eax 9459; AVX1-NEXT: testb $1, %al 9460; AVX1-NEXT: je LBB60_30 9461; AVX1-NEXT: ## BB#29: ## %cond.store27 9462; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 9463; AVX1-NEXT: vpextrw $6, %xmm2, 28(%rdi) 9464; AVX1-NEXT: LBB60_30: ## %else28 9465; AVX1-NEXT: vpextrb $15, %xmm0, %eax 9466; AVX1-NEXT: testb $1, %al 9467; AVX1-NEXT: je LBB60_32 9468; AVX1-NEXT: ## BB#31: ## %cond.store29 9469; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 9470; AVX1-NEXT: vpextrw $7, %xmm0, 30(%rdi) 9471; AVX1-NEXT: LBB60_32: ## %else30 9472; AVX1-NEXT: vzeroupper 9473; AVX1-NEXT: retq 9474; 9475; AVX2-LABEL: test_mask_store_16xi16: 9476; AVX2: ## BB#0: 9477; AVX2-NEXT: vpextrb $0, %xmm0, %eax 9478; AVX2-NEXT: testb $1, %al 9479; AVX2-NEXT: je LBB60_2 9480; AVX2-NEXT: ## BB#1: ## %cond.store 9481; AVX2-NEXT: vmovd %xmm1, %eax 9482; AVX2-NEXT: movw %ax, (%rdi) 9483; AVX2-NEXT: LBB60_2: ## %else 9484; AVX2-NEXT: vpextrb $1, %xmm0, %eax 9485; AVX2-NEXT: testb $1, %al 9486; AVX2-NEXT: je LBB60_4 9487; AVX2-NEXT: ## BB#3: ## %cond.store1 9488; AVX2-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9489; AVX2-NEXT: LBB60_4: ## %else2 9490; AVX2-NEXT: vpextrb $2, %xmm0, %eax 9491; AVX2-NEXT: testb $1, %al 9492; AVX2-NEXT: je LBB60_6 9493; AVX2-NEXT: ## BB#5: ## %cond.store3 9494; AVX2-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9495; AVX2-NEXT: LBB60_6: ## %else4 9496; AVX2-NEXT: vpextrb $3, %xmm0, %eax 9497; AVX2-NEXT: testb $1, %al 9498; AVX2-NEXT: je LBB60_8 9499; AVX2-NEXT: ## BB#7: ## %cond.store5 9500; AVX2-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9501; AVX2-NEXT: LBB60_8: ## %else6 9502; AVX2-NEXT: vpextrb $4, %xmm0, %eax 9503; AVX2-NEXT: testb $1, %al 9504; AVX2-NEXT: je LBB60_10 9505; AVX2-NEXT: ## BB#9: ## %cond.store7 9506; AVX2-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9507; AVX2-NEXT: LBB60_10: ## %else8 9508; AVX2-NEXT: vpextrb $5, %xmm0, %eax 9509; AVX2-NEXT: testb $1, %al 9510; AVX2-NEXT: je LBB60_12 9511; AVX2-NEXT: ## BB#11: ## %cond.store9 9512; AVX2-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9513; AVX2-NEXT: LBB60_12: ## %else10 9514; AVX2-NEXT: vpextrb $6, %xmm0, %eax 9515; AVX2-NEXT: testb $1, %al 9516; AVX2-NEXT: je LBB60_14 9517; AVX2-NEXT: ## BB#13: ## %cond.store11 9518; AVX2-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9519; AVX2-NEXT: LBB60_14: ## %else12 9520; AVX2-NEXT: vpextrb $7, %xmm0, %eax 9521; AVX2-NEXT: testb $1, %al 9522; AVX2-NEXT: je LBB60_16 9523; AVX2-NEXT: ## BB#15: ## %cond.store13 9524; AVX2-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9525; AVX2-NEXT: LBB60_16: ## %else14 9526; AVX2-NEXT: vpextrb $8, %xmm0, %eax 9527; AVX2-NEXT: testb $1, %al 9528; AVX2-NEXT: je LBB60_18 9529; AVX2-NEXT: ## BB#17: ## %cond.store15 9530; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9531; AVX2-NEXT: vmovd %xmm2, %eax 9532; AVX2-NEXT: movw %ax, 16(%rdi) 9533; AVX2-NEXT: LBB60_18: ## %else16 9534; AVX2-NEXT: vpextrb $9, %xmm0, %eax 9535; AVX2-NEXT: testb $1, %al 9536; AVX2-NEXT: je LBB60_20 9537; AVX2-NEXT: ## BB#19: ## %cond.store17 9538; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9539; AVX2-NEXT: vpextrw $1, %xmm2, 18(%rdi) 9540; AVX2-NEXT: LBB60_20: ## %else18 9541; AVX2-NEXT: vpextrb $10, %xmm0, %eax 9542; AVX2-NEXT: testb $1, %al 9543; AVX2-NEXT: je LBB60_22 9544; AVX2-NEXT: ## BB#21: ## %cond.store19 9545; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9546; AVX2-NEXT: vpextrw $2, %xmm2, 20(%rdi) 9547; AVX2-NEXT: LBB60_22: ## %else20 9548; AVX2-NEXT: vpextrb $11, %xmm0, %eax 9549; AVX2-NEXT: testb $1, %al 9550; AVX2-NEXT: je LBB60_24 9551; AVX2-NEXT: ## BB#23: ## %cond.store21 9552; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9553; AVX2-NEXT: vpextrw $3, %xmm2, 22(%rdi) 9554; AVX2-NEXT: LBB60_24: ## %else22 9555; AVX2-NEXT: vpextrb $12, %xmm0, %eax 9556; AVX2-NEXT: testb $1, %al 9557; AVX2-NEXT: je LBB60_26 9558; AVX2-NEXT: ## BB#25: ## %cond.store23 9559; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9560; AVX2-NEXT: vpextrw $4, %xmm2, 24(%rdi) 9561; AVX2-NEXT: LBB60_26: ## %else24 9562; AVX2-NEXT: vpextrb $13, %xmm0, %eax 9563; AVX2-NEXT: testb $1, %al 9564; AVX2-NEXT: je LBB60_28 9565; AVX2-NEXT: ## BB#27: ## %cond.store25 9566; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9567; AVX2-NEXT: vpextrw $5, %xmm2, 26(%rdi) 9568; AVX2-NEXT: LBB60_28: ## %else26 9569; AVX2-NEXT: vpextrb $14, %xmm0, %eax 9570; AVX2-NEXT: testb $1, %al 9571; AVX2-NEXT: je LBB60_30 9572; AVX2-NEXT: ## BB#29: ## %cond.store27 9573; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 9574; AVX2-NEXT: vpextrw $6, %xmm2, 28(%rdi) 9575; AVX2-NEXT: LBB60_30: ## %else28 9576; AVX2-NEXT: vpextrb $15, %xmm0, %eax 9577; AVX2-NEXT: testb $1, %al 9578; AVX2-NEXT: je LBB60_32 9579; AVX2-NEXT: ## BB#31: ## %cond.store29 9580; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 9581; AVX2-NEXT: vpextrw $7, %xmm0, 30(%rdi) 9582; AVX2-NEXT: LBB60_32: ## %else30 9583; AVX2-NEXT: vzeroupper 9584; AVX2-NEXT: retq 9585; 9586; AVX512F-LABEL: test_mask_store_16xi16: 9587; AVX512F: ## BB#0: 9588; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 9589; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 9590; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 9591; AVX512F-NEXT: kshiftlw $15, %k0, %k1 9592; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9593; AVX512F-NEXT: kmovw %k1, %eax 9594; AVX512F-NEXT: testb %al, %al 9595; AVX512F-NEXT: je LBB60_2 9596; AVX512F-NEXT: ## BB#1: ## %cond.store 9597; AVX512F-NEXT: vmovd %xmm1, %eax 9598; AVX512F-NEXT: movw %ax, (%rdi) 9599; AVX512F-NEXT: LBB60_2: ## %else 9600; AVX512F-NEXT: kshiftlw $14, %k0, %k1 9601; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9602; AVX512F-NEXT: kmovw %k1, %eax 9603; AVX512F-NEXT: testb %al, %al 9604; AVX512F-NEXT: je LBB60_4 9605; AVX512F-NEXT: ## BB#3: ## %cond.store1 9606; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9607; AVX512F-NEXT: LBB60_4: ## %else2 9608; AVX512F-NEXT: kshiftlw $13, %k0, %k1 9609; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9610; AVX512F-NEXT: kmovw %k1, %eax 9611; AVX512F-NEXT: testb %al, %al 9612; AVX512F-NEXT: je LBB60_6 9613; AVX512F-NEXT: ## BB#5: ## %cond.store3 9614; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9615; AVX512F-NEXT: LBB60_6: ## %else4 9616; AVX512F-NEXT: kshiftlw $12, %k0, %k1 9617; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9618; AVX512F-NEXT: kmovw %k1, %eax 9619; AVX512F-NEXT: testb %al, %al 9620; AVX512F-NEXT: je LBB60_8 9621; AVX512F-NEXT: ## BB#7: ## %cond.store5 9622; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9623; AVX512F-NEXT: LBB60_8: ## %else6 9624; AVX512F-NEXT: kshiftlw $11, %k0, %k1 9625; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9626; AVX512F-NEXT: kmovw %k1, %eax 9627; AVX512F-NEXT: testb %al, %al 9628; AVX512F-NEXT: je LBB60_10 9629; AVX512F-NEXT: ## BB#9: ## %cond.store7 9630; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9631; AVX512F-NEXT: LBB60_10: ## %else8 9632; AVX512F-NEXT: kshiftlw $10, %k0, %k1 9633; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9634; AVX512F-NEXT: kmovw %k1, %eax 9635; AVX512F-NEXT: testb %al, %al 9636; AVX512F-NEXT: je LBB60_12 9637; AVX512F-NEXT: ## BB#11: ## %cond.store9 9638; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9639; AVX512F-NEXT: LBB60_12: ## %else10 9640; AVX512F-NEXT: kshiftlw $9, %k0, %k1 9641; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9642; AVX512F-NEXT: kmovw %k1, %eax 9643; AVX512F-NEXT: testb %al, %al 9644; AVX512F-NEXT: je LBB60_14 9645; AVX512F-NEXT: ## BB#13: ## %cond.store11 9646; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9647; AVX512F-NEXT: LBB60_14: ## %else12 9648; AVX512F-NEXT: kshiftlw $8, %k0, %k1 9649; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9650; AVX512F-NEXT: kmovw %k1, %eax 9651; AVX512F-NEXT: testb %al, %al 9652; AVX512F-NEXT: je LBB60_16 9653; AVX512F-NEXT: ## BB#15: ## %cond.store13 9654; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9655; AVX512F-NEXT: LBB60_16: ## %else14 9656; AVX512F-NEXT: kshiftlw $7, %k0, %k1 9657; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9658; AVX512F-NEXT: kmovw %k1, %eax 9659; AVX512F-NEXT: testb %al, %al 9660; AVX512F-NEXT: je LBB60_18 9661; AVX512F-NEXT: ## BB#17: ## %cond.store15 9662; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9663; AVX512F-NEXT: vmovd %xmm0, %eax 9664; AVX512F-NEXT: movw %ax, 16(%rdi) 9665; AVX512F-NEXT: LBB60_18: ## %else16 9666; AVX512F-NEXT: kshiftlw $6, %k0, %k1 9667; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9668; AVX512F-NEXT: kmovw %k1, %eax 9669; AVX512F-NEXT: testb %al, %al 9670; AVX512F-NEXT: je LBB60_20 9671; AVX512F-NEXT: ## BB#19: ## %cond.store17 9672; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9673; AVX512F-NEXT: vpextrw $1, %xmm0, 18(%rdi) 9674; AVX512F-NEXT: LBB60_20: ## %else18 9675; AVX512F-NEXT: kshiftlw $5, %k0, %k1 9676; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9677; AVX512F-NEXT: kmovw %k1, %eax 9678; AVX512F-NEXT: testb %al, %al 9679; AVX512F-NEXT: je LBB60_22 9680; AVX512F-NEXT: ## BB#21: ## %cond.store19 9681; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9682; AVX512F-NEXT: vpextrw $2, %xmm0, 20(%rdi) 9683; AVX512F-NEXT: LBB60_22: ## %else20 9684; AVX512F-NEXT: kshiftlw $4, %k0, %k1 9685; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9686; AVX512F-NEXT: kmovw %k1, %eax 9687; AVX512F-NEXT: testb %al, %al 9688; AVX512F-NEXT: je LBB60_24 9689; AVX512F-NEXT: ## BB#23: ## %cond.store21 9690; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9691; AVX512F-NEXT: vpextrw $3, %xmm0, 22(%rdi) 9692; AVX512F-NEXT: LBB60_24: ## %else22 9693; AVX512F-NEXT: kshiftlw $3, %k0, %k1 9694; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9695; AVX512F-NEXT: kmovw %k1, %eax 9696; AVX512F-NEXT: testb %al, %al 9697; AVX512F-NEXT: je LBB60_26 9698; AVX512F-NEXT: ## BB#25: ## %cond.store23 9699; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9700; AVX512F-NEXT: vpextrw $4, %xmm0, 24(%rdi) 9701; AVX512F-NEXT: LBB60_26: ## %else24 9702; AVX512F-NEXT: kshiftlw $2, %k0, %k1 9703; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9704; AVX512F-NEXT: kmovw %k1, %eax 9705; AVX512F-NEXT: testb %al, %al 9706; AVX512F-NEXT: je LBB60_28 9707; AVX512F-NEXT: ## BB#27: ## %cond.store25 9708; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9709; AVX512F-NEXT: vpextrw $5, %xmm0, 26(%rdi) 9710; AVX512F-NEXT: LBB60_28: ## %else26 9711; AVX512F-NEXT: kshiftlw $1, %k0, %k1 9712; AVX512F-NEXT: kshiftrw $15, %k1, %k1 9713; AVX512F-NEXT: kmovw %k1, %eax 9714; AVX512F-NEXT: testb %al, %al 9715; AVX512F-NEXT: je LBB60_30 9716; AVX512F-NEXT: ## BB#29: ## %cond.store27 9717; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9718; AVX512F-NEXT: vpextrw $6, %xmm0, 28(%rdi) 9719; AVX512F-NEXT: LBB60_30: ## %else28 9720; AVX512F-NEXT: kshiftlw $0, %k0, %k0 9721; AVX512F-NEXT: kshiftrw $15, %k0, %k0 9722; AVX512F-NEXT: kmovw %k0, %eax 9723; AVX512F-NEXT: testb %al, %al 9724; AVX512F-NEXT: je LBB60_32 9725; AVX512F-NEXT: ## BB#31: ## %cond.store29 9726; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 9727; AVX512F-NEXT: vpextrw $7, %xmm0, 30(%rdi) 9728; AVX512F-NEXT: LBB60_32: ## %else30 9729; AVX512F-NEXT: retq 9730; 9731; SKX-LABEL: test_mask_store_16xi16: 9732; SKX: ## BB#0: 9733; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 9734; SKX-NEXT: vpmovb2m %xmm0, %k1 9735; SKX-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1} 9736; SKX-NEXT: retq 9737 call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 9738 ret void 9739} 9740declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 9741 9742define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { 9743; AVX1-LABEL: test_mask_store_32xi16: 9744; AVX1: ## BB#0: 9745; AVX1-NEXT: vpextrb $0, %xmm0, %eax 9746; AVX1-NEXT: testb $1, %al 9747; AVX1-NEXT: je LBB61_2 9748; AVX1-NEXT: ## BB#1: ## %cond.store 9749; AVX1-NEXT: vmovd %xmm1, %eax 9750; AVX1-NEXT: movw %ax, (%rdi) 9751; AVX1-NEXT: LBB61_2: ## %else 9752; AVX1-NEXT: vpextrb $1, %xmm0, %eax 9753; AVX1-NEXT: testb $1, %al 9754; AVX1-NEXT: je LBB61_4 9755; AVX1-NEXT: ## BB#3: ## %cond.store1 9756; AVX1-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9757; AVX1-NEXT: LBB61_4: ## %else2 9758; AVX1-NEXT: vpextrb $2, %xmm0, %eax 9759; AVX1-NEXT: testb $1, %al 9760; AVX1-NEXT: je LBB61_6 9761; AVX1-NEXT: ## BB#5: ## %cond.store3 9762; AVX1-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9763; AVX1-NEXT: LBB61_6: ## %else4 9764; AVX1-NEXT: vpextrb $3, %xmm0, %eax 9765; AVX1-NEXT: testb $1, %al 9766; AVX1-NEXT: je LBB61_8 9767; AVX1-NEXT: ## BB#7: ## %cond.store5 9768; AVX1-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9769; AVX1-NEXT: LBB61_8: ## %else6 9770; AVX1-NEXT: vpextrb $4, %xmm0, %eax 9771; AVX1-NEXT: testb $1, %al 9772; AVX1-NEXT: je LBB61_10 9773; AVX1-NEXT: ## BB#9: ## %cond.store7 9774; AVX1-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9775; AVX1-NEXT: LBB61_10: ## %else8 9776; AVX1-NEXT: vpextrb $5, %xmm0, %eax 9777; AVX1-NEXT: testb $1, %al 9778; AVX1-NEXT: je LBB61_12 9779; AVX1-NEXT: ## BB#11: ## %cond.store9 9780; AVX1-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9781; AVX1-NEXT: LBB61_12: ## %else10 9782; AVX1-NEXT: vpextrb $6, %xmm0, %eax 9783; AVX1-NEXT: testb $1, %al 9784; AVX1-NEXT: je LBB61_14 9785; AVX1-NEXT: ## BB#13: ## %cond.store11 9786; AVX1-NEXT: vpextrw $6, %xmm1, 12(%rdi) 9787; AVX1-NEXT: LBB61_14: ## %else12 9788; AVX1-NEXT: vpextrb $7, %xmm0, %eax 9789; AVX1-NEXT: testb $1, %al 9790; AVX1-NEXT: je LBB61_16 9791; AVX1-NEXT: ## BB#15: ## %cond.store13 9792; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi) 9793; AVX1-NEXT: LBB61_16: ## %else14 9794; AVX1-NEXT: vpextrb $8, %xmm0, %eax 9795; AVX1-NEXT: testb $1, %al 9796; AVX1-NEXT: je LBB61_18 9797; AVX1-NEXT: ## BB#17: ## %cond.store15 9798; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9799; AVX1-NEXT: vmovd %xmm3, %eax 9800; AVX1-NEXT: movw %ax, 16(%rdi) 9801; AVX1-NEXT: LBB61_18: ## %else16 9802; AVX1-NEXT: vpextrb $9, %xmm0, %eax 9803; AVX1-NEXT: testb $1, %al 9804; AVX1-NEXT: je LBB61_20 9805; AVX1-NEXT: ## BB#19: ## %cond.store17 9806; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9807; AVX1-NEXT: vpextrw $1, %xmm3, 18(%rdi) 9808; AVX1-NEXT: LBB61_20: ## %else18 9809; AVX1-NEXT: vpextrb $10, %xmm0, %eax 9810; AVX1-NEXT: testb $1, %al 9811; AVX1-NEXT: je LBB61_22 9812; AVX1-NEXT: ## BB#21: ## %cond.store19 9813; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9814; AVX1-NEXT: vpextrw $2, %xmm3, 20(%rdi) 9815; AVX1-NEXT: LBB61_22: ## %else20 9816; AVX1-NEXT: vpextrb $11, %xmm0, %eax 9817; AVX1-NEXT: testb $1, %al 9818; AVX1-NEXT: je LBB61_24 9819; AVX1-NEXT: ## BB#23: ## %cond.store21 9820; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9821; AVX1-NEXT: vpextrw $3, %xmm3, 22(%rdi) 9822; AVX1-NEXT: LBB61_24: ## %else22 9823; AVX1-NEXT: vpextrb $12, %xmm0, %eax 9824; AVX1-NEXT: testb $1, %al 9825; AVX1-NEXT: je LBB61_26 9826; AVX1-NEXT: ## BB#25: ## %cond.store23 9827; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9828; AVX1-NEXT: vpextrw $4, %xmm3, 24(%rdi) 9829; AVX1-NEXT: LBB61_26: ## %else24 9830; AVX1-NEXT: vpextrb $13, %xmm0, %eax 9831; AVX1-NEXT: testb $1, %al 9832; AVX1-NEXT: je LBB61_28 9833; AVX1-NEXT: ## BB#27: ## %cond.store25 9834; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9835; AVX1-NEXT: vpextrw $5, %xmm3, 26(%rdi) 9836; AVX1-NEXT: LBB61_28: ## %else26 9837; AVX1-NEXT: vpextrb $14, %xmm0, %eax 9838; AVX1-NEXT: testb $1, %al 9839; AVX1-NEXT: je LBB61_30 9840; AVX1-NEXT: ## BB#29: ## %cond.store27 9841; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 9842; AVX1-NEXT: vpextrw $6, %xmm3, 28(%rdi) 9843; AVX1-NEXT: LBB61_30: ## %else28 9844; AVX1-NEXT: vpextrb $15, %xmm0, %eax 9845; AVX1-NEXT: testb $1, %al 9846; AVX1-NEXT: je LBB61_32 9847; AVX1-NEXT: ## BB#31: ## %cond.store29 9848; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 9849; AVX1-NEXT: vpextrw $7, %xmm1, 30(%rdi) 9850; AVX1-NEXT: LBB61_32: ## %else30 9851; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 9852; AVX1-NEXT: vpextrb $0, %xmm0, %eax 9853; AVX1-NEXT: testb $1, %al 9854; AVX1-NEXT: je LBB61_34 9855; AVX1-NEXT: ## BB#33: ## %cond.store31 9856; AVX1-NEXT: vmovd %xmm2, %eax 9857; AVX1-NEXT: movw %ax, 32(%rdi) 9858; AVX1-NEXT: LBB61_34: ## %else32 9859; AVX1-NEXT: vpextrb $1, %xmm0, %eax 9860; AVX1-NEXT: testb $1, %al 9861; AVX1-NEXT: je LBB61_36 9862; AVX1-NEXT: ## BB#35: ## %cond.store33 9863; AVX1-NEXT: vpextrw $1, %xmm2, 34(%rdi) 9864; AVX1-NEXT: LBB61_36: ## %else34 9865; AVX1-NEXT: vpextrb $2, %xmm0, %eax 9866; AVX1-NEXT: testb $1, %al 9867; AVX1-NEXT: je LBB61_38 9868; AVX1-NEXT: ## BB#37: ## %cond.store35 9869; AVX1-NEXT: vpextrw $2, %xmm2, 36(%rdi) 9870; AVX1-NEXT: LBB61_38: ## %else36 9871; AVX1-NEXT: vpextrb $3, %xmm0, %eax 9872; AVX1-NEXT: testb $1, %al 9873; AVX1-NEXT: je LBB61_40 9874; AVX1-NEXT: ## BB#39: ## %cond.store37 9875; AVX1-NEXT: vpextrw $3, %xmm2, 38(%rdi) 9876; AVX1-NEXT: LBB61_40: ## %else38 9877; AVX1-NEXT: vpextrb $4, %xmm0, %eax 9878; AVX1-NEXT: testb $1, %al 9879; AVX1-NEXT: je LBB61_42 9880; AVX1-NEXT: ## BB#41: ## %cond.store39 9881; AVX1-NEXT: vpextrw $4, %xmm2, 40(%rdi) 9882; AVX1-NEXT: LBB61_42: ## %else40 9883; AVX1-NEXT: vpextrb $5, %xmm0, %eax 9884; AVX1-NEXT: testb $1, %al 9885; AVX1-NEXT: je LBB61_44 9886; AVX1-NEXT: ## BB#43: ## %cond.store41 9887; AVX1-NEXT: vpextrw $5, %xmm2, 42(%rdi) 9888; AVX1-NEXT: LBB61_44: ## %else42 9889; AVX1-NEXT: vpextrb $6, %xmm0, %eax 9890; AVX1-NEXT: testb $1, %al 9891; AVX1-NEXT: je LBB61_46 9892; AVX1-NEXT: ## BB#45: ## %cond.store43 9893; AVX1-NEXT: vpextrw $6, %xmm2, 44(%rdi) 9894; AVX1-NEXT: LBB61_46: ## %else44 9895; AVX1-NEXT: vpextrb $7, %xmm0, %eax 9896; AVX1-NEXT: testb $1, %al 9897; AVX1-NEXT: je LBB61_48 9898; AVX1-NEXT: ## BB#47: ## %cond.store45 9899; AVX1-NEXT: vpextrw $7, %xmm2, 46(%rdi) 9900; AVX1-NEXT: LBB61_48: ## %else46 9901; AVX1-NEXT: vpextrb $8, %xmm0, %eax 9902; AVX1-NEXT: testb $1, %al 9903; AVX1-NEXT: je LBB61_50 9904; AVX1-NEXT: ## BB#49: ## %cond.store47 9905; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9906; AVX1-NEXT: vmovd %xmm1, %eax 9907; AVX1-NEXT: movw %ax, 48(%rdi) 9908; AVX1-NEXT: LBB61_50: ## %else48 9909; AVX1-NEXT: vpextrb $9, %xmm0, %eax 9910; AVX1-NEXT: testb $1, %al 9911; AVX1-NEXT: je LBB61_52 9912; AVX1-NEXT: ## BB#51: ## %cond.store49 9913; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9914; AVX1-NEXT: vpextrw $1, %xmm1, 50(%rdi) 9915; AVX1-NEXT: LBB61_52: ## %else50 9916; AVX1-NEXT: vpextrb $10, %xmm0, %eax 9917; AVX1-NEXT: testb $1, %al 9918; AVX1-NEXT: je LBB61_54 9919; AVX1-NEXT: ## BB#53: ## %cond.store51 9920; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9921; AVX1-NEXT: vpextrw $2, %xmm1, 52(%rdi) 9922; AVX1-NEXT: LBB61_54: ## %else52 9923; AVX1-NEXT: vpextrb $11, %xmm0, %eax 9924; AVX1-NEXT: testb $1, %al 9925; AVX1-NEXT: je LBB61_56 9926; AVX1-NEXT: ## BB#55: ## %cond.store53 9927; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9928; AVX1-NEXT: vpextrw $3, %xmm1, 54(%rdi) 9929; AVX1-NEXT: LBB61_56: ## %else54 9930; AVX1-NEXT: vpextrb $12, %xmm0, %eax 9931; AVX1-NEXT: testb $1, %al 9932; AVX1-NEXT: je LBB61_58 9933; AVX1-NEXT: ## BB#57: ## %cond.store55 9934; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9935; AVX1-NEXT: vpextrw $4, %xmm1, 56(%rdi) 9936; AVX1-NEXT: LBB61_58: ## %else56 9937; AVX1-NEXT: vpextrb $13, %xmm0, %eax 9938; AVX1-NEXT: testb $1, %al 9939; AVX1-NEXT: je LBB61_60 9940; AVX1-NEXT: ## BB#59: ## %cond.store57 9941; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9942; AVX1-NEXT: vpextrw $5, %xmm1, 58(%rdi) 9943; AVX1-NEXT: LBB61_60: ## %else58 9944; AVX1-NEXT: vpextrb $14, %xmm0, %eax 9945; AVX1-NEXT: testb $1, %al 9946; AVX1-NEXT: je LBB61_62 9947; AVX1-NEXT: ## BB#61: ## %cond.store59 9948; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 9949; AVX1-NEXT: vpextrw $6, %xmm1, 60(%rdi) 9950; AVX1-NEXT: LBB61_62: ## %else60 9951; AVX1-NEXT: vpextrb $15, %xmm0, %eax 9952; AVX1-NEXT: testb $1, %al 9953; AVX1-NEXT: je LBB61_64 9954; AVX1-NEXT: ## BB#63: ## %cond.store61 9955; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0 9956; AVX1-NEXT: vpextrw $7, %xmm0, 62(%rdi) 9957; AVX1-NEXT: LBB61_64: ## %else62 9958; AVX1-NEXT: vzeroupper 9959; AVX1-NEXT: retq 9960; 9961; AVX2-LABEL: test_mask_store_32xi16: 9962; AVX2: ## BB#0: 9963; AVX2-NEXT: vpextrb $0, %xmm0, %eax 9964; AVX2-NEXT: testb $1, %al 9965; AVX2-NEXT: je LBB61_2 9966; AVX2-NEXT: ## BB#1: ## %cond.store 9967; AVX2-NEXT: vmovd %xmm1, %eax 9968; AVX2-NEXT: movw %ax, (%rdi) 9969; AVX2-NEXT: LBB61_2: ## %else 9970; AVX2-NEXT: vpextrb $1, %xmm0, %eax 9971; AVX2-NEXT: testb $1, %al 9972; AVX2-NEXT: je LBB61_4 9973; AVX2-NEXT: ## BB#3: ## %cond.store1 9974; AVX2-NEXT: vpextrw $1, %xmm1, 2(%rdi) 9975; AVX2-NEXT: LBB61_4: ## %else2 9976; AVX2-NEXT: vpextrb $2, %xmm0, %eax 9977; AVX2-NEXT: testb $1, %al 9978; AVX2-NEXT: je LBB61_6 9979; AVX2-NEXT: ## BB#5: ## %cond.store3 9980; AVX2-NEXT: vpextrw $2, %xmm1, 4(%rdi) 9981; AVX2-NEXT: LBB61_6: ## %else4 9982; AVX2-NEXT: vpextrb $3, %xmm0, %eax 9983; AVX2-NEXT: testb $1, %al 9984; AVX2-NEXT: je LBB61_8 9985; AVX2-NEXT: ## BB#7: ## %cond.store5 9986; AVX2-NEXT: vpextrw $3, %xmm1, 6(%rdi) 9987; AVX2-NEXT: LBB61_8: ## %else6 9988; AVX2-NEXT: vpextrb $4, %xmm0, %eax 9989; AVX2-NEXT: testb $1, %al 9990; AVX2-NEXT: je LBB61_10 9991; AVX2-NEXT: ## BB#9: ## %cond.store7 9992; AVX2-NEXT: vpextrw $4, %xmm1, 8(%rdi) 9993; AVX2-NEXT: LBB61_10: ## %else8 9994; AVX2-NEXT: vpextrb $5, %xmm0, %eax 9995; AVX2-NEXT: testb $1, %al 9996; AVX2-NEXT: je LBB61_12 9997; AVX2-NEXT: ## BB#11: ## %cond.store9 9998; AVX2-NEXT: vpextrw $5, %xmm1, 10(%rdi) 9999; AVX2-NEXT: LBB61_12: ## %else10 10000; AVX2-NEXT: vpextrb $6, %xmm0, %eax 10001; AVX2-NEXT: testb $1, %al 10002; AVX2-NEXT: je LBB61_14 10003; AVX2-NEXT: ## BB#13: ## %cond.store11 10004; AVX2-NEXT: vpextrw $6, %xmm1, 12(%rdi) 10005; AVX2-NEXT: LBB61_14: ## %else12 10006; AVX2-NEXT: vpextrb $7, %xmm0, %eax 10007; AVX2-NEXT: testb $1, %al 10008; AVX2-NEXT: je LBB61_16 10009; AVX2-NEXT: ## BB#15: ## %cond.store13 10010; AVX2-NEXT: vpextrw $7, %xmm1, 14(%rdi) 10011; AVX2-NEXT: LBB61_16: ## %else14 10012; AVX2-NEXT: vpextrb $8, %xmm0, %eax 10013; AVX2-NEXT: testb $1, %al 10014; AVX2-NEXT: je LBB61_18 10015; AVX2-NEXT: ## BB#17: ## %cond.store15 10016; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10017; AVX2-NEXT: vmovd %xmm3, %eax 10018; AVX2-NEXT: movw %ax, 16(%rdi) 10019; AVX2-NEXT: LBB61_18: ## %else16 10020; AVX2-NEXT: vpextrb $9, %xmm0, %eax 10021; AVX2-NEXT: testb $1, %al 10022; AVX2-NEXT: je LBB61_20 10023; AVX2-NEXT: ## BB#19: ## %cond.store17 10024; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10025; AVX2-NEXT: vpextrw $1, %xmm3, 18(%rdi) 10026; AVX2-NEXT: LBB61_20: ## %else18 10027; AVX2-NEXT: vpextrb $10, %xmm0, %eax 10028; AVX2-NEXT: testb $1, %al 10029; AVX2-NEXT: je LBB61_22 10030; AVX2-NEXT: ## BB#21: ## %cond.store19 10031; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10032; AVX2-NEXT: vpextrw $2, %xmm3, 20(%rdi) 10033; AVX2-NEXT: LBB61_22: ## %else20 10034; AVX2-NEXT: vpextrb $11, %xmm0, %eax 10035; AVX2-NEXT: testb $1, %al 10036; AVX2-NEXT: je LBB61_24 10037; AVX2-NEXT: ## BB#23: ## %cond.store21 10038; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10039; AVX2-NEXT: vpextrw $3, %xmm3, 22(%rdi) 10040; AVX2-NEXT: LBB61_24: ## %else22 10041; AVX2-NEXT: vpextrb $12, %xmm0, %eax 10042; AVX2-NEXT: testb $1, %al 10043; AVX2-NEXT: je LBB61_26 10044; AVX2-NEXT: ## BB#25: ## %cond.store23 10045; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10046; AVX2-NEXT: vpextrw $4, %xmm3, 24(%rdi) 10047; AVX2-NEXT: LBB61_26: ## %else24 10048; AVX2-NEXT: vpextrb $13, %xmm0, %eax 10049; AVX2-NEXT: testb $1, %al 10050; AVX2-NEXT: je LBB61_28 10051; AVX2-NEXT: ## BB#27: ## %cond.store25 10052; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10053; AVX2-NEXT: vpextrw $5, %xmm3, 26(%rdi) 10054; AVX2-NEXT: LBB61_28: ## %else26 10055; AVX2-NEXT: vpextrb $14, %xmm0, %eax 10056; AVX2-NEXT: testb $1, %al 10057; AVX2-NEXT: je LBB61_30 10058; AVX2-NEXT: ## BB#29: ## %cond.store27 10059; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 10060; AVX2-NEXT: vpextrw $6, %xmm3, 28(%rdi) 10061; AVX2-NEXT: LBB61_30: ## %else28 10062; AVX2-NEXT: vpextrb $15, %xmm0, %eax 10063; AVX2-NEXT: testb $1, %al 10064; AVX2-NEXT: je LBB61_32 10065; AVX2-NEXT: ## BB#31: ## %cond.store29 10066; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 10067; AVX2-NEXT: vpextrw $7, %xmm1, 30(%rdi) 10068; AVX2-NEXT: LBB61_32: ## %else30 10069; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 10070; AVX2-NEXT: vpextrb $0, %xmm0, %eax 10071; AVX2-NEXT: testb $1, %al 10072; AVX2-NEXT: je LBB61_34 10073; AVX2-NEXT: ## BB#33: ## %cond.store31 10074; AVX2-NEXT: vmovd %xmm2, %eax 10075; AVX2-NEXT: movw %ax, 32(%rdi) 10076; AVX2-NEXT: LBB61_34: ## %else32 10077; AVX2-NEXT: vpextrb $1, %xmm0, %eax 10078; AVX2-NEXT: testb $1, %al 10079; AVX2-NEXT: je LBB61_36 10080; AVX2-NEXT: ## BB#35: ## %cond.store33 10081; AVX2-NEXT: vpextrw $1, %xmm2, 34(%rdi) 10082; AVX2-NEXT: LBB61_36: ## %else34 10083; AVX2-NEXT: vpextrb $2, %xmm0, %eax 10084; AVX2-NEXT: testb $1, %al 10085; AVX2-NEXT: je LBB61_38 10086; AVX2-NEXT: ## BB#37: ## %cond.store35 10087; AVX2-NEXT: vpextrw $2, %xmm2, 36(%rdi) 10088; AVX2-NEXT: LBB61_38: ## %else36 10089; AVX2-NEXT: vpextrb $3, %xmm0, %eax 10090; AVX2-NEXT: testb $1, %al 10091; AVX2-NEXT: je LBB61_40 10092; AVX2-NEXT: ## BB#39: ## %cond.store37 10093; AVX2-NEXT: vpextrw $3, %xmm2, 38(%rdi) 10094; AVX2-NEXT: LBB61_40: ## %else38 10095; AVX2-NEXT: vpextrb $4, %xmm0, %eax 10096; AVX2-NEXT: testb $1, %al 10097; AVX2-NEXT: je LBB61_42 10098; AVX2-NEXT: ## BB#41: ## %cond.store39 10099; AVX2-NEXT: vpextrw $4, %xmm2, 40(%rdi) 10100; AVX2-NEXT: LBB61_42: ## %else40 10101; AVX2-NEXT: vpextrb $5, %xmm0, %eax 10102; AVX2-NEXT: testb $1, %al 10103; AVX2-NEXT: je LBB61_44 10104; AVX2-NEXT: ## BB#43: ## %cond.store41 10105; AVX2-NEXT: vpextrw $5, %xmm2, 42(%rdi) 10106; AVX2-NEXT: LBB61_44: ## %else42 10107; AVX2-NEXT: vpextrb $6, %xmm0, %eax 10108; AVX2-NEXT: testb $1, %al 10109; AVX2-NEXT: je LBB61_46 10110; AVX2-NEXT: ## BB#45: ## %cond.store43 10111; AVX2-NEXT: vpextrw $6, %xmm2, 44(%rdi) 10112; AVX2-NEXT: LBB61_46: ## %else44 10113; AVX2-NEXT: vpextrb $7, %xmm0, %eax 10114; AVX2-NEXT: testb $1, %al 10115; AVX2-NEXT: je LBB61_48 10116; AVX2-NEXT: ## BB#47: ## %cond.store45 10117; AVX2-NEXT: vpextrw $7, %xmm2, 46(%rdi) 10118; AVX2-NEXT: LBB61_48: ## %else46 10119; AVX2-NEXT: vpextrb $8, %xmm0, %eax 10120; AVX2-NEXT: testb $1, %al 10121; AVX2-NEXT: je LBB61_50 10122; AVX2-NEXT: ## BB#49: ## %cond.store47 10123; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10124; AVX2-NEXT: vmovd %xmm1, %eax 10125; AVX2-NEXT: movw %ax, 48(%rdi) 10126; AVX2-NEXT: LBB61_50: ## %else48 10127; AVX2-NEXT: vpextrb $9, %xmm0, %eax 10128; AVX2-NEXT: testb $1, %al 10129; AVX2-NEXT: je LBB61_52 10130; AVX2-NEXT: ## BB#51: ## %cond.store49 10131; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10132; AVX2-NEXT: vpextrw $1, %xmm1, 50(%rdi) 10133; AVX2-NEXT: LBB61_52: ## %else50 10134; AVX2-NEXT: vpextrb $10, %xmm0, %eax 10135; AVX2-NEXT: testb $1, %al 10136; AVX2-NEXT: je LBB61_54 10137; AVX2-NEXT: ## BB#53: ## %cond.store51 10138; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10139; AVX2-NEXT: vpextrw $2, %xmm1, 52(%rdi) 10140; AVX2-NEXT: LBB61_54: ## %else52 10141; AVX2-NEXT: vpextrb $11, %xmm0, %eax 10142; AVX2-NEXT: testb $1, %al 10143; AVX2-NEXT: je LBB61_56 10144; AVX2-NEXT: ## BB#55: ## %cond.store53 10145; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10146; AVX2-NEXT: vpextrw $3, %xmm1, 54(%rdi) 10147; AVX2-NEXT: LBB61_56: ## %else54 10148; AVX2-NEXT: vpextrb $12, %xmm0, %eax 10149; AVX2-NEXT: testb $1, %al 10150; AVX2-NEXT: je LBB61_58 10151; AVX2-NEXT: ## BB#57: ## %cond.store55 10152; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10153; AVX2-NEXT: vpextrw $4, %xmm1, 56(%rdi) 10154; AVX2-NEXT: LBB61_58: ## %else56 10155; AVX2-NEXT: vpextrb $13, %xmm0, %eax 10156; AVX2-NEXT: testb $1, %al 10157; AVX2-NEXT: je LBB61_60 10158; AVX2-NEXT: ## BB#59: ## %cond.store57 10159; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10160; AVX2-NEXT: vpextrw $5, %xmm1, 58(%rdi) 10161; AVX2-NEXT: LBB61_60: ## %else58 10162; AVX2-NEXT: vpextrb $14, %xmm0, %eax 10163; AVX2-NEXT: testb $1, %al 10164; AVX2-NEXT: je LBB61_62 10165; AVX2-NEXT: ## BB#61: ## %cond.store59 10166; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 10167; AVX2-NEXT: vpextrw $6, %xmm1, 60(%rdi) 10168; AVX2-NEXT: LBB61_62: ## %else60 10169; AVX2-NEXT: vpextrb $15, %xmm0, %eax 10170; AVX2-NEXT: testb $1, %al 10171; AVX2-NEXT: je LBB61_64 10172; AVX2-NEXT: ## BB#63: ## %cond.store61 10173; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm0 10174; AVX2-NEXT: vpextrw $7, %xmm0, 62(%rdi) 10175; AVX2-NEXT: LBB61_64: ## %else62 10176; AVX2-NEXT: vzeroupper 10177; AVX2-NEXT: retq 10178; 10179; AVX512F-LABEL: test_mask_store_32xi16: 10180; AVX512F: ## BB#0: 10181; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 10182; AVX512F-NEXT: testb $1, %al 10183; AVX512F-NEXT: je LBB61_2 10184; AVX512F-NEXT: ## BB#1: ## %cond.store 10185; AVX512F-NEXT: vmovd %xmm1, %eax 10186; AVX512F-NEXT: movw %ax, (%rdi) 10187; AVX512F-NEXT: LBB61_2: ## %else 10188; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 10189; AVX512F-NEXT: testb $1, %al 10190; AVX512F-NEXT: je LBB61_4 10191; AVX512F-NEXT: ## BB#3: ## %cond.store1 10192; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) 10193; AVX512F-NEXT: LBB61_4: ## %else2 10194; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 10195; AVX512F-NEXT: testb $1, %al 10196; AVX512F-NEXT: je LBB61_6 10197; AVX512F-NEXT: ## BB#5: ## %cond.store3 10198; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) 10199; AVX512F-NEXT: LBB61_6: ## %else4 10200; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 10201; AVX512F-NEXT: testb $1, %al 10202; AVX512F-NEXT: je LBB61_8 10203; AVX512F-NEXT: ## BB#7: ## %cond.store5 10204; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) 10205; AVX512F-NEXT: LBB61_8: ## %else6 10206; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 10207; AVX512F-NEXT: testb $1, %al 10208; AVX512F-NEXT: je LBB61_10 10209; AVX512F-NEXT: ## BB#9: ## %cond.store7 10210; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) 10211; AVX512F-NEXT: LBB61_10: ## %else8 10212; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 10213; AVX512F-NEXT: testb $1, %al 10214; AVX512F-NEXT: je LBB61_12 10215; AVX512F-NEXT: ## BB#11: ## %cond.store9 10216; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) 10217; AVX512F-NEXT: LBB61_12: ## %else10 10218; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 10219; AVX512F-NEXT: testb $1, %al 10220; AVX512F-NEXT: je LBB61_14 10221; AVX512F-NEXT: ## BB#13: ## %cond.store11 10222; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) 10223; AVX512F-NEXT: LBB61_14: ## %else12 10224; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 10225; AVX512F-NEXT: testb $1, %al 10226; AVX512F-NEXT: je LBB61_16 10227; AVX512F-NEXT: ## BB#15: ## %cond.store13 10228; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) 10229; AVX512F-NEXT: LBB61_16: ## %else14 10230; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 10231; AVX512F-NEXT: testb $1, %al 10232; AVX512F-NEXT: je LBB61_18 10233; AVX512F-NEXT: ## BB#17: ## %cond.store15 10234; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10235; AVX512F-NEXT: vmovd %xmm3, %eax 10236; AVX512F-NEXT: movw %ax, 16(%rdi) 10237; AVX512F-NEXT: LBB61_18: ## %else16 10238; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 10239; AVX512F-NEXT: testb $1, %al 10240; AVX512F-NEXT: je LBB61_20 10241; AVX512F-NEXT: ## BB#19: ## %cond.store17 10242; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10243; AVX512F-NEXT: vpextrw $1, %xmm3, 18(%rdi) 10244; AVX512F-NEXT: LBB61_20: ## %else18 10245; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 10246; AVX512F-NEXT: testb $1, %al 10247; AVX512F-NEXT: je LBB61_22 10248; AVX512F-NEXT: ## BB#21: ## %cond.store19 10249; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10250; AVX512F-NEXT: vpextrw $2, %xmm3, 20(%rdi) 10251; AVX512F-NEXT: LBB61_22: ## %else20 10252; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 10253; AVX512F-NEXT: testb $1, %al 10254; AVX512F-NEXT: je LBB61_24 10255; AVX512F-NEXT: ## BB#23: ## %cond.store21 10256; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10257; AVX512F-NEXT: vpextrw $3, %xmm3, 22(%rdi) 10258; AVX512F-NEXT: LBB61_24: ## %else22 10259; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 10260; AVX512F-NEXT: testb $1, %al 10261; AVX512F-NEXT: je LBB61_26 10262; AVX512F-NEXT: ## BB#25: ## %cond.store23 10263; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10264; AVX512F-NEXT: vpextrw $4, %xmm3, 24(%rdi) 10265; AVX512F-NEXT: LBB61_26: ## %else24 10266; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 10267; AVX512F-NEXT: testb $1, %al 10268; AVX512F-NEXT: je LBB61_28 10269; AVX512F-NEXT: ## BB#27: ## %cond.store25 10270; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10271; AVX512F-NEXT: vpextrw $5, %xmm3, 26(%rdi) 10272; AVX512F-NEXT: LBB61_28: ## %else26 10273; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 10274; AVX512F-NEXT: testb $1, %al 10275; AVX512F-NEXT: je LBB61_30 10276; AVX512F-NEXT: ## BB#29: ## %cond.store27 10277; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 10278; AVX512F-NEXT: vpextrw $6, %xmm3, 28(%rdi) 10279; AVX512F-NEXT: LBB61_30: ## %else28 10280; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 10281; AVX512F-NEXT: testb $1, %al 10282; AVX512F-NEXT: je LBB61_32 10283; AVX512F-NEXT: ## BB#31: ## %cond.store29 10284; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 10285; AVX512F-NEXT: vpextrw $7, %xmm1, 30(%rdi) 10286; AVX512F-NEXT: LBB61_32: ## %else30 10287; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 10288; AVX512F-NEXT: vpextrb $0, %xmm0, %eax 10289; AVX512F-NEXT: testb $1, %al 10290; AVX512F-NEXT: je LBB61_34 10291; AVX512F-NEXT: ## BB#33: ## %cond.store31 10292; AVX512F-NEXT: vmovd %xmm2, %eax 10293; AVX512F-NEXT: movw %ax, 32(%rdi) 10294; AVX512F-NEXT: LBB61_34: ## %else32 10295; AVX512F-NEXT: vpextrb $1, %xmm0, %eax 10296; AVX512F-NEXT: testb $1, %al 10297; AVX512F-NEXT: je LBB61_36 10298; AVX512F-NEXT: ## BB#35: ## %cond.store33 10299; AVX512F-NEXT: vpextrw $1, %xmm2, 34(%rdi) 10300; AVX512F-NEXT: LBB61_36: ## %else34 10301; AVX512F-NEXT: vpextrb $2, %xmm0, %eax 10302; AVX512F-NEXT: testb $1, %al 10303; AVX512F-NEXT: je LBB61_38 10304; AVX512F-NEXT: ## BB#37: ## %cond.store35 10305; AVX512F-NEXT: vpextrw $2, %xmm2, 36(%rdi) 10306; AVX512F-NEXT: LBB61_38: ## %else36 10307; AVX512F-NEXT: vpextrb $3, %xmm0, %eax 10308; AVX512F-NEXT: testb $1, %al 10309; AVX512F-NEXT: je LBB61_40 10310; AVX512F-NEXT: ## BB#39: ## %cond.store37 10311; AVX512F-NEXT: vpextrw $3, %xmm2, 38(%rdi) 10312; AVX512F-NEXT: LBB61_40: ## %else38 10313; AVX512F-NEXT: vpextrb $4, %xmm0, %eax 10314; AVX512F-NEXT: testb $1, %al 10315; AVX512F-NEXT: je LBB61_42 10316; AVX512F-NEXT: ## BB#41: ## %cond.store39 10317; AVX512F-NEXT: vpextrw $4, %xmm2, 40(%rdi) 10318; AVX512F-NEXT: LBB61_42: ## %else40 10319; AVX512F-NEXT: vpextrb $5, %xmm0, %eax 10320; AVX512F-NEXT: testb $1, %al 10321; AVX512F-NEXT: je LBB61_44 10322; AVX512F-NEXT: ## BB#43: ## %cond.store41 10323; AVX512F-NEXT: vpextrw $5, %xmm2, 42(%rdi) 10324; AVX512F-NEXT: LBB61_44: ## %else42 10325; AVX512F-NEXT: vpextrb $6, %xmm0, %eax 10326; AVX512F-NEXT: testb $1, %al 10327; AVX512F-NEXT: je LBB61_46 10328; AVX512F-NEXT: ## BB#45: ## %cond.store43 10329; AVX512F-NEXT: vpextrw $6, %xmm2, 44(%rdi) 10330; AVX512F-NEXT: LBB61_46: ## %else44 10331; AVX512F-NEXT: vpextrb $7, %xmm0, %eax 10332; AVX512F-NEXT: testb $1, %al 10333; AVX512F-NEXT: je LBB61_48 10334; AVX512F-NEXT: ## BB#47: ## %cond.store45 10335; AVX512F-NEXT: vpextrw $7, %xmm2, 46(%rdi) 10336; AVX512F-NEXT: LBB61_48: ## %else46 10337; AVX512F-NEXT: vpextrb $8, %xmm0, %eax 10338; AVX512F-NEXT: testb $1, %al 10339; AVX512F-NEXT: je LBB61_50 10340; AVX512F-NEXT: ## BB#49: ## %cond.store47 10341; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10342; AVX512F-NEXT: vmovd %xmm1, %eax 10343; AVX512F-NEXT: movw %ax, 48(%rdi) 10344; AVX512F-NEXT: LBB61_50: ## %else48 10345; AVX512F-NEXT: vpextrb $9, %xmm0, %eax 10346; AVX512F-NEXT: testb $1, %al 10347; AVX512F-NEXT: je LBB61_52 10348; AVX512F-NEXT: ## BB#51: ## %cond.store49 10349; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10350; AVX512F-NEXT: vpextrw $1, %xmm1, 50(%rdi) 10351; AVX512F-NEXT: LBB61_52: ## %else50 10352; AVX512F-NEXT: vpextrb $10, %xmm0, %eax 10353; AVX512F-NEXT: testb $1, %al 10354; AVX512F-NEXT: je LBB61_54 10355; AVX512F-NEXT: ## BB#53: ## %cond.store51 10356; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10357; AVX512F-NEXT: vpextrw $2, %xmm1, 52(%rdi) 10358; AVX512F-NEXT: LBB61_54: ## %else52 10359; AVX512F-NEXT: vpextrb $11, %xmm0, %eax 10360; AVX512F-NEXT: testb $1, %al 10361; AVX512F-NEXT: je LBB61_56 10362; AVX512F-NEXT: ## BB#55: ## %cond.store53 10363; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10364; AVX512F-NEXT: vpextrw $3, %xmm1, 54(%rdi) 10365; AVX512F-NEXT: LBB61_56: ## %else54 10366; AVX512F-NEXT: vpextrb $12, %xmm0, %eax 10367; AVX512F-NEXT: testb $1, %al 10368; AVX512F-NEXT: je LBB61_58 10369; AVX512F-NEXT: ## BB#57: ## %cond.store55 10370; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10371; AVX512F-NEXT: vpextrw $4, %xmm1, 56(%rdi) 10372; AVX512F-NEXT: LBB61_58: ## %else56 10373; AVX512F-NEXT: vpextrb $13, %xmm0, %eax 10374; AVX512F-NEXT: testb $1, %al 10375; AVX512F-NEXT: je LBB61_60 10376; AVX512F-NEXT: ## BB#59: ## %cond.store57 10377; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10378; AVX512F-NEXT: vpextrw $5, %xmm1, 58(%rdi) 10379; AVX512F-NEXT: LBB61_60: ## %else58 10380; AVX512F-NEXT: vpextrb $14, %xmm0, %eax 10381; AVX512F-NEXT: testb $1, %al 10382; AVX512F-NEXT: je LBB61_62 10383; AVX512F-NEXT: ## BB#61: ## %cond.store59 10384; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 10385; AVX512F-NEXT: vpextrw $6, %xmm1, 60(%rdi) 10386; AVX512F-NEXT: LBB61_62: ## %else60 10387; AVX512F-NEXT: vpextrb $15, %xmm0, %eax 10388; AVX512F-NEXT: testb $1, %al 10389; AVX512F-NEXT: je LBB61_64 10390; AVX512F-NEXT: ## BB#63: ## %cond.store61 10391; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm0 10392; AVX512F-NEXT: vpextrw $7, %xmm0, 62(%rdi) 10393; AVX512F-NEXT: LBB61_64: ## %else62 10394; AVX512F-NEXT: retq 10395; 10396; SKX-LABEL: test_mask_store_32xi16: 10397; SKX: ## BB#0: 10398; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 10399; SKX-NEXT: vpmovb2m %ymm0, %k1 10400; SKX-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 10401; SKX-NEXT: retq 10402 call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask) 10403 ret void 10404} 10405 10406declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>) 10407