1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 3 4declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 5define i32 @test_kortestz(i16 %a0, i16 %a1) { 6; CHECK-LABEL: test_kortestz: 7; CHECK: ## BB#0: 8; CHECK-NEXT: kmovw %esi, %k0 9; CHECK-NEXT: kmovw %edi, %k1 10; CHECK-NEXT: xorl %eax, %eax 11; CHECK-NEXT: kortestw %k0, %k1 12; CHECK-NEXT: sete %al 13; CHECK-NEXT: retq 14 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 15 ret i32 %res 16} 17 18declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 19define i32 @test_kortestc(i16 %a0, i16 %a1) { 20; CHECK-LABEL: test_kortestc: 21; CHECK: ## BB#0: 22; CHECK-NEXT: kmovw %esi, %k0 23; CHECK-NEXT: kmovw %edi, %k1 24; CHECK-NEXT: kortestw %k0, %k1 25; CHECK-NEXT: sbbl %eax, %eax 26; CHECK-NEXT: andl $1, %eax 27; CHECK-NEXT: retq 28 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 29 ret i32 %res 30} 31 32declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 33define i16 @test_kand(i16 %a0, i16 %a1) { 34; CHECK-LABEL: test_kand: 35; CHECK: ## BB#0: 36; CHECK-NEXT: movw $8, %ax 37; CHECK-NEXT: kmovw %eax, %k0 38; CHECK-NEXT: kmovw %edi, %k1 39; CHECK-NEXT: kandw %k0, %k1, %k0 40; CHECK-NEXT: kmovw %esi, %k1 41; CHECK-NEXT: kandw %k1, %k0, %k0 42; CHECK-NEXT: kmovw %k0, %eax 43; CHECK-NEXT: retq 44 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 45 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 46 ret i16 %t2 47} 48 49declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 50define i16 @test_knot(i16 %a0) { 51; CHECK-LABEL: test_knot: 52; CHECK: ## BB#0: 53; CHECK-NEXT: kmovw %edi, %k0 54; CHECK-NEXT: knotw %k0, %k0 55; CHECK-NEXT: kmovw %k0, %eax 56; CHECK-NEXT: retq 57 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 58 ret i16 %res 59} 60 61declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 62 63define i16 @unpckbw_test(i16 %a0, i16 %a1) { 64; CHECK-LABEL: unpckbw_test: 65; CHECK: ## BB#0: 66; CHECK-NEXT: kmovw %edi, %k0 67; CHECK-NEXT: kmovw %esi, %k1 68; CHECK-NEXT: kunpckbw %k1, %k0, %k0 69; CHECK-NEXT: kmovw %k0, %eax 70; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 71; CHECK-NEXT: retq 72 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 73 ret i16 %res 74} 75 76define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 77; CHECK-LABEL: test_rcp_ps_512: 78; CHECK: ## BB#0: 79; CHECK-NEXT: vrcp14ps %zmm0, %zmm0 80; CHECK-NEXT: retq 81 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 82 ret <16 x float> %res 83} 84declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 85 86define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 87; CHECK-LABEL: test_rcp_pd_512: 88; CHECK: ## BB#0: 89; CHECK-NEXT: vrcp14pd %zmm0, %zmm0 90; CHECK-NEXT: retq 91 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 92 ret <8 x double> %res 93} 94declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 95 96declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 97 98define <8 x double> @test7(<8 x double> %a) { 99; CHECK-LABEL: test7: 100; CHECK: ## BB#0: 101; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 102; CHECK-NEXT: retq 103 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 104 ret <8 x double>%res 105} 106 107declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 108 109define <16 x float> @test8(<16 x float> %a) { 110; CHECK-LABEL: test8: 111; CHECK: ## BB#0: 112; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 113; CHECK-NEXT: retq 114 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 115 ret <16 x float>%res 116} 117 118define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 119; CHECK-LABEL: test_rsqrt_ps_512: 120; CHECK: ## BB#0: 121; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0 122; CHECK-NEXT: retq 123 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 124 ret <16 x float> %res 125} 126declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 127 128define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 129; CHECK-LABEL: test_sqrt_pd_512: 130; CHECK: ## BB#0: 131; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 132; CHECK-NEXT: retq 133 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 134 ret <8 x double> %res 135} 136declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 137 138define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 139; CHECK-LABEL: test_sqrt_ps_512: 140; CHECK: ## BB#0: 141; CHECK-NEXT: vsqrtps %zmm0, %zmm0 142; CHECK-NEXT: retq 143 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 144 ret <16 x float> %res 145} 146define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { 147; CHECK-LABEL: test_sqrt_round_ps_512: 148; CHECK: ## BB#0: 149; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 150; CHECK-NEXT: retq 151 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) 152 ret <16 x float> %res 153} 154declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 155 156define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { 157; CHECK-LABEL: test_getexp_pd_512: 158; CHECK: ## BB#0: 159; CHECK-NEXT: vgetexppd %zmm0, %zmm0 160; CHECK-NEXT: retq 161 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 162 ret <8 x double> %res 163} 164define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { 165; CHECK-LABEL: test_getexp_round_pd_512: 166; CHECK: ## BB#0: 167; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 168; CHECK-NEXT: retq 169 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 170 ret <8 x double> %res 171} 172declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 173 174define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { 175; CHECK-LABEL: test_getexp_ps_512: 176; CHECK: ## BB#0: 177; CHECK-NEXT: vgetexpps %zmm0, %zmm0 178; CHECK-NEXT: retq 179 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 180 ret <16 x float> %res 181} 182 183define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { 184; CHECK-LABEL: test_getexp_round_ps_512: 185; CHECK: ## BB#0: 186; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0 187; CHECK-NEXT: retq 188 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 189 ret <16 x float> %res 190} 191declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 192 193declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 194 195define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 196; CHECK-LABEL: test_sqrt_ss: 197; CHECK: ## BB#0: 198; CHECK-NEXT: andl $1, %edi 199; CHECK-NEXT: kmovw %edi, %k1 200; CHECK-NEXT: vmovaps %zmm2, %zmm3 201; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} 202; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 203; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 204; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 205; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 206; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 207; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 208; CHECK-NEXT: retq 209 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 210 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 211 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2) 212 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3) 213 214 %res.1 = fadd <4 x float> %res0, %res1 215 %res.2 = fadd <4 x float> %res2, %res3 216 %res = fadd <4 x float> %res.1, %res.2 217 ret <4 x float> %res 218} 219 220declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 221 222define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 223; CHECK-LABEL: test_sqrt_sd: 224; CHECK: ## BB#0: 225; CHECK-NEXT: andl $1, %edi 226; CHECK-NEXT: kmovw %edi, %k1 227; CHECK-NEXT: vmovaps %zmm2, %zmm3 228; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} 229; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 230; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 231; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 232; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 233; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 234; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 235; CHECK-NEXT: retq 236 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 237 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 238 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2) 239 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3) 240 241 %res.1 = fadd <2 x double> %res0, %res1 242 %res.2 = fadd <2 x double> %res2, %res3 243 %res = fadd <2 x double> %res.1, %res.2 244 ret <2 x double> %res 245} 246 247define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 248; CHECK-LABEL: test_x86_sse2_cvtsd2si64: 249; CHECK: ## BB#0: 250; CHECK-NEXT: vcvtsd2si %xmm0, %rax 251; CHECK-NEXT: retq 252 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 253 ret i64 %res 254} 255declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 256 257define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 258; CHECK-LABEL: test_x86_sse2_cvtsi642sd: 259; CHECK: ## BB#0: 260; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 261; CHECK-NEXT: retq 262 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 263 ret <2 x double> %res 264} 265declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 266 267define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { 268; CHECK-LABEL: test_x86_avx512_cvttsd2si64: 269; CHECK: ## BB#0: 270; CHECK-NEXT: vcvttsd2si %xmm0, %rcx 271; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax 272; CHECK-NEXT: addq %rcx, %rax 273; CHECK-NEXT: retq 274 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; 275 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; 276 %res2 = add i64 %res0, %res1 277 ret i64 %res2 278} 279declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone 280 281define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) { 282; CHECK-LABEL: test_x86_avx512_cvttsd2usi: 283; CHECK: ## BB#0: 284; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx 285; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax 286; CHECK-NEXT: addl %ecx, %eax 287; CHECK-NEXT: retq 288 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; 289 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; 290 %res2 = add i32 %res0, %res1 291 ret i32 %res2 292} 293declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone 294 295define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) { 296; CHECK-LABEL: test_x86_avx512_cvttsd2si: 297; CHECK: ## BB#0: 298; CHECK-NEXT: vcvttsd2si %xmm0, %ecx 299; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax 300; CHECK-NEXT: addl %ecx, %eax 301; CHECK-NEXT: retq 302 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; 303 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; 304 %res2 = add i32 %res0, %res1 305 ret i32 %res2 306} 307declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone 308 309 310 311define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { 312; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: 313; CHECK: ## BB#0: 314; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx 315; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax 316; CHECK-NEXT: addq %rcx, %rax 317; CHECK-NEXT: retq 318 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; 319 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; 320 %res2 = add i64 %res0, %res1 321 ret i64 %res2 322} 323declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone 324 325define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 326; CHECK-LABEL: test_x86_sse_cvtss2si64: 327; CHECK: ## BB#0: 328; CHECK-NEXT: vcvtss2si %xmm0, %rax 329; CHECK-NEXT: retq 330 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 331 ret i64 %res 332} 333declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 334 335 336define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 337; CHECK-LABEL: test_x86_sse_cvtsi642ss: 338; CHECK: ## BB#0: 339; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 340; CHECK-NEXT: retq 341 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 342 ret <4 x float> %res 343} 344declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 345 346 347define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) { 348; CHECK-LABEL: test_x86_avx512_cvttss2si: 349; CHECK: ## BB#0: 350; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx 351; CHECK-NEXT: vcvttss2si %xmm0, %eax 352; CHECK-NEXT: addl %ecx, %eax 353; CHECK-NEXT: retq 354 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; 355 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; 356 %res2 = add i32 %res0, %res1 357 ret i32 %res2 358} 359declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone 360 361define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { 362; CHECK-LABEL: test_x86_avx512_cvttss2si64: 363; CHECK: ## BB#0: 364; CHECK-NEXT: vcvttss2si %xmm0, %rcx 365; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax 366; CHECK-NEXT: addq %rcx, %rax 367; CHECK-NEXT: retq 368 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; 369 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; 370 %res2 = add i64 %res0, %res1 371 ret i64 %res2 372} 373declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone 374 375define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { 376; CHECK-LABEL: test_x86_avx512_cvttss2usi: 377; CHECK: ## BB#0: 378; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx 379; CHECK-NEXT: vcvttss2usi %xmm0, %eax 380; CHECK-NEXT: addl %ecx, %eax 381; CHECK-NEXT: retq 382 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; 383 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; 384 %res2 = add i32 %res0, %res1 385 ret i32 %res2 386} 387declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone 388 389define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { 390; CHECK-LABEL: test_x86_avx512_cvttss2usi64: 391; CHECK: ## BB#0: 392; CHECK-NEXT: vcvttss2usi %xmm0, %rcx 393; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax 394; CHECK-NEXT: addq %rcx, %rax 395; CHECK-NEXT: retq 396 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; 397 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; 398 %res2 = add i64 %res0, %res1 399 ret i64 %res2 400} 401declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone 402 403define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 404; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: 405; CHECK: ## BB#0: 406; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx 407; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rax 408; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rdx 409; CHECK-NEXT: addq %rcx, %rax 410; CHECK-NEXT: addq %rdx, %rax 411; CHECK-NEXT: retq 412 413 %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) 414 %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3) 415 %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1) 416 %res3 = add i64 %res, %res1 417 %res4 = add i64 %res3, %res2 418 ret i64 %res4 419} 420declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone 421 422define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { 423; CHECK-LABEL: test_x86_avx512_cvtsd2si64: 424; CHECK: ## BB#0: 425; CHECK-NEXT: vcvtsd2si %xmm0, %rcx 426; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rax 427; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rdx 428; CHECK-NEXT: addq %rcx, %rax 429; CHECK-NEXT: addq %rdx, %rax 430; CHECK-NEXT: retq 431 432 %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) 433 %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3) 434 %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1) 435 %res3 = add i64 %res, %res1 436 %res4 = add i64 %res3, %res2 437 ret i64 %res4 438} 439declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone 440 441define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { 442; CHECK-LABEL: test_x86_avx512_cvtss2usi64: 443; CHECK: ## BB#0: 444; CHECK-NEXT: vcvtss2usi %xmm0, %rcx 445; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rax 446; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rdx 447; CHECK-NEXT: addq %rcx, %rax 448; CHECK-NEXT: addq %rdx, %rax 449; CHECK-NEXT: retq 450 451 %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) 452 %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3) 453 %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1) 454 %res3 = add i64 %res, %res1 455 %res4 = add i64 %res3, %res2 456 ret i64 %res4 457} 458declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone 459 460define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { 461; CHECK-LABEL: test_x86_avx512_cvtss2si64: 462; CHECK: ## BB#0: 463; CHECK-NEXT: vcvtss2si %xmm0, %rcx 464; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rax 465; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rdx 466; CHECK-NEXT: addq %rcx, %rax 467; CHECK-NEXT: addq %rdx, %rax 468; CHECK-NEXT: retq 469 470 %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) 471 %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3) 472 %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1) 473 %res3 = add i64 %res, %res1 474 %res4 = add i64 %res3, %res2 475 ret i64 %res4 476} 477declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone 478 479define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { 480; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: 481; CHECK: ## BB#0: 482; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx 483; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %eax 484; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %edx 485; CHECK-NEXT: addl %ecx, %eax 486; CHECK-NEXT: addl %edx, %eax 487; CHECK-NEXT: retq 488 489 %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) 490 %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3) 491 %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1) 492 %res3 = add i32 %res, %res1 493 %res4 = add i32 %res3, %res2 494 ret i32 %res4 495} 496declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone 497 498define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { 499; CHECK-LABEL: test_x86_avx512_cvtsd2si32: 500; CHECK: ## BB#0: 501; CHECK-NEXT: vcvtsd2si %xmm0, %ecx 502; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %eax 503; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %edx 504; CHECK-NEXT: addl %ecx, %eax 505; CHECK-NEXT: addl %edx, %eax 506; CHECK-NEXT: retq 507 508 %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) 509 %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3) 510 %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1) 511 %res3 = add i32 %res, %res1 512 %res4 = add i32 %res3, %res2 513 ret i32 %res4 514} 515declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone 516 517define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { 518; CHECK-LABEL: test_x86_avx512_cvtss2usi32: 519; CHECK: ## BB#0: 520; CHECK-NEXT: vcvtss2usi %xmm0, %ecx 521; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %eax 522; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %edx 523; CHECK-NEXT: addl %ecx, %eax 524; CHECK-NEXT: addl %edx, %eax 525; CHECK-NEXT: retq 526 527 %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) 528 %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3) 529 %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1) 530 %res3 = add i32 %res, %res1 531 %res4 = add i32 %res3, %res2 532 ret i32 %res4 533} 534declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone 535 536define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { 537; CHECK-LABEL: test_x86_avx512_cvtss2si32: 538; CHECK: ## BB#0: 539; CHECK-NEXT: vcvtss2si %xmm0, %ecx 540; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %eax 541; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %edx 542; CHECK-NEXT: addl %ecx, %eax 543; CHECK-NEXT: addl %edx, %eax 544; CHECK-NEXT: retq 545 546 %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) 547 %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3) 548 %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1) 549 %res3 = add i32 %res, %res1 550 %res4 = add i32 %res3, %res2 551 ret i32 %res4 552} 553declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone 554 555define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 556; CHECK-LABEL: test_x86_vcvtph2ps_512: 557; CHECK: ## BB#0: 558; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 559; CHECK-NEXT: retq 560 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 561 ret <16 x float> %res 562} 563 564define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { 565; CHECK-LABEL: test_x86_vcvtph2ps_512_sae: 566; CHECK: ## BB#0: 567; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 568; CHECK-NEXT: retq 569 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 570 ret <16 x float> %res 571} 572 573define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { 574; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk: 575; CHECK: ## BB#0: 576; CHECK-NEXT: kmovw %edi, %k1 577; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} 578; CHECK-NEXT: vmovaps %zmm1, %zmm0 579; CHECK-NEXT: retq 580 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) 581 ret <16 x float> %res 582} 583 584define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { 585; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz: 586; CHECK: ## BB#0: 587; CHECK-NEXT: kmovw %edi, %k1 588; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} 589; CHECK-NEXT: retq 590 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) 591 ret <16 x float> %res 592} 593 594define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { 595; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz: 596; CHECK: ## BB#0: 597; CHECK-NEXT: kmovw %edi, %k1 598; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} 599; CHECK-NEXT: retq 600 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) 601 ret <16 x float> %res 602} 603 604declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 605 606define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) { 607; CHECK-LABEL: test_x86_vcvtps2ph_256: 608; CHECK: ## BB#0: 609; CHECK-NEXT: kmovw %edi, %k1 610; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1} 611; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z} 612; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi) 613; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm0 614; CHECK-NEXT: retq 615 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 616 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask) 617 %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> %src, i16 %mask) 618 store <16 x i16> %res1, <16 x i16> * %dst 619 %res = add <16 x i16> %res2, %res3 620 ret <16 x i16> %res 621} 622 623declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 624 625define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 626; CHECK-LABEL: test_x86_vbroadcast_ss_512: 627; CHECK: ## BB#0: 628; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 629; CHECK-NEXT: retq 630 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 631 ret <16 x float> %res 632} 633declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 634 635define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 636; CHECK-LABEL: test_x86_vbroadcast_sd_512: 637; CHECK: ## BB#0: 638; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 639; CHECK-NEXT: retq 640 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 641 ret <8 x double> %res 642} 643declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 644 645define <16 x i32> @test_conflict_d(<16 x i32> %a) { 646; CHECK-LABEL: test_conflict_d: 647; CHECK: ## BB#0: 648; CHECK-NEXT: vpconflictd %zmm0, %zmm0 649; CHECK-NEXT: retq 650 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 651 ret <16 x i32> %res 652} 653 654declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 655 656define <8 x i64> @test_conflict_q(<8 x i64> %a) { 657; CHECK-LABEL: test_conflict_q: 658; CHECK: ## BB#0: 659; CHECK-NEXT: vpconflictq %zmm0, %zmm0 660; CHECK-NEXT: retq 661 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 662 ret <8 x i64> %res 663} 664 665declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 666 667define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { 668; CHECK-LABEL: test_maskz_conflict_d: 669; CHECK: ## BB#0: 670; CHECK-NEXT: kmovw %edi, %k1 671; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} 672; CHECK-NEXT: retq 673 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) 674 ret <16 x i32> %res 675} 676 677define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 678; CHECK-LABEL: test_mask_conflict_q: 679; CHECK: ## BB#0: 680; CHECK-NEXT: kmovw %edi, %k1 681; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1} 682; CHECK-NEXT: vmovaps %zmm1, %zmm0 683; CHECK-NEXT: retq 684 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 685 ret <8 x i64> %res 686} 687 688define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { 689; CHECK-LABEL: test_lzcnt_d: 690; CHECK: ## BB#0: 691; CHECK-NEXT: vplzcntd %zmm0, %zmm0 692; CHECK-NEXT: retq 693 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 694 ret <16 x i32> %res 695} 696 697declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 698 699define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { 700; CHECK-LABEL: test_lzcnt_q: 701; CHECK: ## BB#0: 702; CHECK-NEXT: vplzcntq %zmm0, %zmm0 703; CHECK-NEXT: retq 704 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 705 ret <8 x i64> %res 706} 707 708declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 709 710 711define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 712; CHECK-LABEL: test_mask_lzcnt_d: 713; CHECK: ## BB#0: 714; CHECK-NEXT: kmovw %edi, %k1 715; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1} 716; CHECK-NEXT: vmovaps %zmm1, %zmm0 717; CHECK-NEXT: retq 718 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 719 ret <16 x i32> %res 720} 721 722define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 723; CHECK-LABEL: test_mask_lzcnt_q: 724; CHECK: ## BB#0: 725; CHECK-NEXT: kmovw %edi, %k1 726; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1} 727; CHECK-NEXT: vmovaps %zmm1, %zmm0 728; CHECK-NEXT: retq 729 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 730 ret <8 x i64> %res 731} 732 733 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 734; CHECK-LABEL: test_cmpps: 735; CHECK: ## BB#0: 736; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 737; CHECK-NEXT: kmovw %k0, %eax 738; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 739; CHECK-NEXT: retq 740 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) 741 ret i16 %res 742 } 743 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) 744 745 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 746; CHECK-LABEL: test_cmppd: 747; CHECK: ## BB#0: 748; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 749; CHECK-NEXT: kmovw %k0, %eax 750; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 751; CHECK-NEXT: retq 752 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) 753 ret i8 %res 754 } 755 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) 756 757 ; fp min - max 758define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 759; CHECK-LABEL: test_vmaxpd: 760; CHECK: ## BB#0: 761; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 762; CHECK-NEXT: retq 763 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, 764 <8 x double>zeroinitializer, i8 -1, i32 4) 765 ret <8 x double> %res 766} 767declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, 768 <8 x double>, i8, i32) 769 770define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 771; CHECK-LABEL: test_vminpd: 772; CHECK: ## BB#0: 773; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0 774; CHECK-NEXT: retq 775 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, 776 <8 x double>zeroinitializer, i8 -1, i32 4) 777 ret <8 x double> %res 778} 779declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, 780 <8 x double>, i8, i32) 781 782 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 783 784define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 785; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512: 786; CHECK: ## BB#0: 787; CHECK-NEXT: kmovw %edi, %k1 788; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1} 789; CHECK-NEXT: vpabsd %zmm0, %zmm0 790; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 791; CHECK-NEXT: retq 792 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 793 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) 794 %res2 = add <16 x i32> %res, %res1 795 ret <16 x i32> %res2 796} 797 798declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 799 800define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 801; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512: 802; CHECK: ## BB#0: 803; CHECK-NEXT: kmovw %edi, %k1 804; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1} 805; CHECK-NEXT: vpabsq %zmm0, %zmm0 806; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 807; CHECK-NEXT: retq 808 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 809 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) 810 %res2 = add <8 x i64> %res, %res1 811 ret <8 x i64> %res2 812} 813 814define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { 815; CHECK-LABEL: test_vptestmq: 816; CHECK: ## BB#0: 817; CHECK-NEXT: kmovw %edi, %k1 818; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1} 819; CHECK-NEXT: kmovw %k0, %ecx 820; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 821; CHECK-NEXT: kmovw %k0, %eax 822; CHECK-NEXT: addb %cl, %al 823; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 824; CHECK-NEXT: retq 825 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 826 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) 827 %res2 = add i8 %res1, %res 828 ret i8 %res2 829} 830declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 831 832define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) { 833; CHECK-LABEL: test_vptestmd: 834; CHECK: ## BB#0: 835; CHECK-NEXT: kmovw %edi, %k1 836; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1} 837; CHECK-NEXT: kmovw %k0, %ecx 838; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 839; CHECK-NEXT: kmovw %k0, %eax 840; CHECK-NEXT: addl %ecx, %eax 841; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 842; CHECK-NEXT: retq 843 %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 844 %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m) 845 %res2 = add i16 %res1, %res 846 ret i16 %res2 847} 848declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 849 850define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 851; CHECK-LABEL: test_valign_q: 852; CHECK: ## BB#0: 853; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 854; CHECK-NEXT: retq 855 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1) 856 ret <8 x i64> %res 857} 858 859define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 860; CHECK-LABEL: test_mask_valign_q: 861; CHECK: ## BB#0: 862; CHECK-NEXT: kmovw %edi, %k1 863; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} 864; CHECK-NEXT: vmovaps %zmm2, %zmm0 865; CHECK-NEXT: retq 866 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask) 867 ret <8 x i64> %res 868} 869 870declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 871 872define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 873; CHECK-LABEL: test_maskz_valign_d: 874; CHECK: ## BB#0: 875; CHECK-NEXT: kmovw %edi, %k1 876; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} 877; CHECK-NEXT: retq 878 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask) 879 ret <16 x i32> %res 880} 881 882declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 883 884define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 885; CHECK-LABEL: test_mask_store_ss: 886; CHECK: ## BB#0: 887; CHECK-NEXT: kmovw %esi, %k1 888; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} 889; CHECK-NEXT: retq 890 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask) 891 ret void 892} 893 894declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) 895 896define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 897; CHECK-LABEL: test_cmp_d_512: 898; CHECK: ## BB#0: 899; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 900; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k1 901; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 902; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k3 903; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 904; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 905; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k6 906; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k7 907; CHECK-NEXT: kmovw %k1, %eax 908; CHECK-NEXT: kmovw %k0, %ecx 909; CHECK-NEXT: vmovd %ecx, %xmm0 910; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 911; CHECK-NEXT: kmovw %k2, %eax 912; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 913; CHECK-NEXT: kmovw %k3, %eax 914; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 915; CHECK-NEXT: kmovw %k4, %eax 916; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 917; CHECK-NEXT: kmovw %k5, %eax 918; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 919; CHECK-NEXT: kmovw %k6, %eax 920; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 921; CHECK-NEXT: kmovw %k7, %eax 922; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 923; CHECK-NEXT: retq 924 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 925 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 926 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 927 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 928 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 929 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 930 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 931 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 932 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 933 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 934 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 935 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 936 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 937 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 938 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 939 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 940 ret <8 x i16> %vec7 941} 942 943define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 944; CHECK-LABEL: test_mask_cmp_d_512: 945; CHECK: ## BB#0: 946; CHECK-NEXT: kmovw %edi, %k1 947; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 948; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k2 {%k1} 949; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} 950; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k4 {%k1} 951; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k5 {%k1} 952; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k6 {%k1} 953; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k7 {%k1} 954; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k1 {%k1} 955; CHECK-NEXT: kmovw %k2, %eax 956; CHECK-NEXT: kmovw %k0, %ecx 957; CHECK-NEXT: vmovd %ecx, %xmm0 958; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 959; CHECK-NEXT: kmovw %k3, %eax 960; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 961; CHECK-NEXT: kmovw %k4, %eax 962; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 963; CHECK-NEXT: kmovw %k5, %eax 964; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 965; CHECK-NEXT: kmovw %k6, %eax 966; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 967; CHECK-NEXT: kmovw %k7, %eax 968; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 969; CHECK-NEXT: kmovw %k1, %eax 970; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 971; CHECK-NEXT: retq 972 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 973 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 974 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 975 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 976 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 977 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 978 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 979 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 980 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 981 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 982 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 983 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 984 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 985 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 986 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 987 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 988 ret <8 x i16> %vec7 989} 990 991declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 992 993define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 994; CHECK-LABEL: test_ucmp_d_512: 995; CHECK: ## BB#0: 996; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 997; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1 998; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 999; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k3 1000; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k4 1001; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 1002; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k6 1003; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k7 1004; CHECK-NEXT: kmovw %k1, %eax 1005; CHECK-NEXT: kmovw %k0, %ecx 1006; CHECK-NEXT: vmovd %ecx, %xmm0 1007; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 1008; CHECK-NEXT: kmovw %k2, %eax 1009; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 1010; CHECK-NEXT: kmovw %k3, %eax 1011; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 1012; CHECK-NEXT: kmovw %k4, %eax 1013; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 1014; CHECK-NEXT: kmovw %k5, %eax 1015; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1016; CHECK-NEXT: kmovw %k6, %eax 1017; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 1018; CHECK-NEXT: kmovw %k7, %eax 1019; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1020; CHECK-NEXT: retq 1021 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 1022 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1023 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 1024 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1025 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 1026 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1027 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 1028 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1029 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 1030 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1031 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 1032 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1033 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 1034 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1035 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 1036 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1037 ret <8 x i16> %vec7 1038} 1039 1040define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1041; CHECK-LABEL: test_mask_ucmp_d_512: 1042; CHECK: ## BB#0: 1043; CHECK-NEXT: kmovw %edi, %k1 1044; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1} 1045; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} 1046; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} 1047; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k4 {%k1} 1048; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k5 {%k1} 1049; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k6 {%k1} 1050; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k7 {%k1} 1051; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k1 {%k1} 1052; CHECK-NEXT: kmovw %k2, %eax 1053; CHECK-NEXT: kmovw %k0, %ecx 1054; CHECK-NEXT: vmovd %ecx, %xmm0 1055; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 1056; CHECK-NEXT: kmovw %k3, %eax 1057; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 1058; CHECK-NEXT: kmovw %k4, %eax 1059; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 1060; CHECK-NEXT: kmovw %k5, %eax 1061; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 1062; CHECK-NEXT: kmovw %k6, %eax 1063; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1064; CHECK-NEXT: kmovw %k7, %eax 1065; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 1066; CHECK-NEXT: kmovw %k1, %eax 1067; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1068; CHECK-NEXT: retq 1069 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 1070 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1071 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 1072 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1073 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 1074 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1075 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 1076 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1077 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 1078 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1079 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 1080 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1081 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 1082 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1083 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 1084 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1085 ret <8 x i16> %vec7 1086} 1087 1088declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 1089 1090define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1091; CHECK-LABEL: test_cmp_q_512: 1092; CHECK: ## BB#0: 1093; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 1094; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k1 1095; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k2 1096; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k3 1097; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 1098; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 1099; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k6 1100; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k7 1101; CHECK-NEXT: kmovw %k0, %eax 1102; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 1103; CHECK-NEXT: kmovw %k1, %eax 1104; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1105; CHECK-NEXT: kmovw %k2, %eax 1106; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1107; CHECK-NEXT: kmovw %k3, %eax 1108; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1109; CHECK-NEXT: kmovw %k4, %eax 1110; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1111; CHECK-NEXT: kmovw %k5, %eax 1112; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1113; CHECK-NEXT: kmovw %k6, %eax 1114; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1115; CHECK-NEXT: kmovw %k7, %eax 1116; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1117; CHECK-NEXT: retq 1118 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1119 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1120 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1121 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1122 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1123 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1124 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1125 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1126 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1127 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1128 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1129 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1130 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1131 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1132 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1133 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1134 ret <8 x i8> %vec7 1135} 1136 1137define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1138; CHECK-LABEL: test_mask_cmp_q_512: 1139; CHECK: ## BB#0: 1140; CHECK-NEXT: kmovw %edi, %k1 1141; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 1142; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k2 {%k1} 1143; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} 1144; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k4 {%k1} 1145; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k5 {%k1} 1146; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k6 {%k1} 1147; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k7 {%k1} 1148; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k1 {%k1} 1149; CHECK-NEXT: kmovw %k0, %eax 1150; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 1151; CHECK-NEXT: kmovw %k2, %eax 1152; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1153; CHECK-NEXT: kmovw %k3, %eax 1154; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1155; CHECK-NEXT: kmovw %k4, %eax 1156; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1157; CHECK-NEXT: kmovw %k5, %eax 1158; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1159; CHECK-NEXT: kmovw %k6, %eax 1160; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1161; CHECK-NEXT: kmovw %k7, %eax 1162; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1163; CHECK-NEXT: kmovw %k1, %eax 1164; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1165; CHECK-NEXT: retq 1166 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1167 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1168 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1169 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1170 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1171 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1172 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1173 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1174 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1175 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1176 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1177 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1178 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1179 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1180 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1181 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1182 ret <8 x i8> %vec7 1183} 1184 1185declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1186 1187define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1188; CHECK-LABEL: test_ucmp_q_512: 1189; CHECK: ## BB#0: 1190; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 1191; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 1192; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 1193; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k3 1194; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k4 1195; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 1196; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k6 1197; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k7 1198; CHECK-NEXT: kmovw %k0, %eax 1199; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 1200; CHECK-NEXT: kmovw %k1, %eax 1201; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1202; CHECK-NEXT: kmovw %k2, %eax 1203; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1204; CHECK-NEXT: kmovw %k3, %eax 1205; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1206; CHECK-NEXT: kmovw %k4, %eax 1207; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1208; CHECK-NEXT: kmovw %k5, %eax 1209; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1210; CHECK-NEXT: kmovw %k6, %eax 1211; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1212; CHECK-NEXT: kmovw %k7, %eax 1213; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1214; CHECK-NEXT: retq 1215 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1216 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1217 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1218 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1219 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1220 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1221 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1222 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1223 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1224 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1225 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1226 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1227 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1228 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1229 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1230 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1231 ret <8 x i8> %vec7 1232} 1233 1234define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1235; CHECK-LABEL: test_mask_ucmp_q_512: 1236; CHECK: ## BB#0: 1237; CHECK-NEXT: kmovw %edi, %k1 1238; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1} 1239; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} 1240; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} 1241; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k4 {%k1} 1242; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k5 {%k1} 1243; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k6 {%k1} 1244; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k7 {%k1} 1245; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k1 {%k1} 1246; CHECK-NEXT: kmovw %k0, %eax 1247; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 1248; CHECK-NEXT: kmovw %k2, %eax 1249; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1250; CHECK-NEXT: kmovw %k3, %eax 1251; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1252; CHECK-NEXT: kmovw %k4, %eax 1253; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1254; CHECK-NEXT: kmovw %k5, %eax 1255; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1256; CHECK-NEXT: kmovw %k6, %eax 1257; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1258; CHECK-NEXT: kmovw %k7, %eax 1259; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1260; CHECK-NEXT: kmovw %k1, %eax 1261; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1262; CHECK-NEXT: retq 1263 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1264 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1265 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1266 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1267 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1268 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1269 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1270 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1271 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1272 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1273 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1274 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1275 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1276 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1277 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1278 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1279 ret <8 x i8> %vec7 1280} 1281 1282declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1283 1284define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 1285; CHECK-LABEL: test_mask_vextractf32x4: 1286; CHECK: ## BB#0: 1287; CHECK-NEXT: kmovw %edi, %k1 1288; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} 1289; CHECK-NEXT: retq 1290 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) 1291 ret <4 x float> %res 1292} 1293 1294declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8) 1295 1296define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 1297; CHECK-LABEL: test_mask_vextracti64x4: 1298; CHECK: ## BB#0: 1299; CHECK-NEXT: kmovw %edi, %k1 1300; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1} 1301; CHECK-NEXT: retq 1302 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask) 1303 ret <4 x i64> %res 1304} 1305 1306declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8) 1307 1308define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 1309; CHECK-LABEL: test_maskz_vextracti32x4: 1310; CHECK: ## BB#0: 1311; CHECK-NEXT: kmovw %edi, %k1 1312; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} 1313; CHECK-NEXT: retq 1314 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) 1315 ret <4 x i32> %res 1316} 1317 1318declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8) 1319 1320define <4 x double> @test_vextractf64x4(<8 x double> %a) { 1321; CHECK-LABEL: test_vextractf64x4: 1322; CHECK: ## BB#0: 1323; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0 1324; CHECK-NEXT: retq 1325 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1) 1326 ret <4 x double> %res 1327} 1328 1329declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) 1330 1331define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 1332; CHECK-LABEL: test_x86_avx512_psll_d: 1333; CHECK: ## BB#0: 1334; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 1335; CHECK-NEXT: retq 1336 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1337 ret <16 x i32> %res 1338} 1339 1340define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1341; CHECK-LABEL: test_x86_avx512_mask_psll_d: 1342; CHECK: ## BB#0: 1343; CHECK-NEXT: kmovw %edi, %k1 1344; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} 1345; CHECK-NEXT: vmovaps %zmm2, %zmm0 1346; CHECK-NEXT: retq 1347 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1348 ret <16 x i32> %res 1349} 1350 1351define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1352; CHECK-LABEL: test_x86_avx512_maskz_psll_d: 1353; CHECK: ## BB#0: 1354; CHECK-NEXT: kmovw %edi, %k1 1355; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 1356; CHECK-NEXT: retq 1357 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1358 ret <16 x i32> %res 1359} 1360 1361declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1362 1363define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 1364; CHECK-LABEL: test_x86_avx512_psll_q: 1365; CHECK: ## BB#0: 1366; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 1367; CHECK-NEXT: retq 1368 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1369 ret <8 x i64> %res 1370} 1371 1372define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1373; CHECK-LABEL: test_x86_avx512_mask_psll_q: 1374; CHECK: ## BB#0: 1375; CHECK-NEXT: kmovw %edi, %k1 1376; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 1377; CHECK-NEXT: vmovaps %zmm2, %zmm0 1378; CHECK-NEXT: retq 1379 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1380 ret <8 x i64> %res 1381} 1382 1383define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1384; CHECK-LABEL: test_x86_avx512_maskz_psll_q: 1385; CHECK: ## BB#0: 1386; CHECK-NEXT: kmovw %edi, %k1 1387; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 1388; CHECK-NEXT: retq 1389 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1390 ret <8 x i64> %res 1391} 1392 1393declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1394 1395define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 1396; CHECK-LABEL: test_x86_avx512_psrl_d: 1397; CHECK: ## BB#0: 1398; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 1399; CHECK-NEXT: retq 1400 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1401 ret <16 x i32> %res 1402} 1403 1404define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1405; CHECK-LABEL: test_x86_avx512_mask_psrl_d: 1406; CHECK: ## BB#0: 1407; CHECK-NEXT: kmovw %edi, %k1 1408; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 1409; CHECK-NEXT: vmovaps %zmm2, %zmm0 1410; CHECK-NEXT: retq 1411 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1412 ret <16 x i32> %res 1413} 1414 1415define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1416; CHECK-LABEL: test_x86_avx512_maskz_psrl_d: 1417; CHECK: ## BB#0: 1418; CHECK-NEXT: kmovw %edi, %k1 1419; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 1420; CHECK-NEXT: retq 1421 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1422 ret <16 x i32> %res 1423} 1424 1425declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1426 1427define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 1428; CHECK-LABEL: test_x86_avx512_psrl_q: 1429; CHECK: ## BB#0: 1430; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 1431; CHECK-NEXT: retq 1432 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1433 ret <8 x i64> %res 1434} 1435 1436define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1437; CHECK-LABEL: test_x86_avx512_mask_psrl_q: 1438; CHECK: ## BB#0: 1439; CHECK-NEXT: kmovw %edi, %k1 1440; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 1441; CHECK-NEXT: vmovaps %zmm2, %zmm0 1442; CHECK-NEXT: retq 1443 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1444 ret <8 x i64> %res 1445} 1446 1447define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1448; CHECK-LABEL: test_x86_avx512_maskz_psrl_q: 1449; CHECK: ## BB#0: 1450; CHECK-NEXT: kmovw %edi, %k1 1451; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 1452; CHECK-NEXT: retq 1453 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1454 ret <8 x i64> %res 1455} 1456 1457declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1458 1459define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 1460; CHECK-LABEL: test_x86_avx512_psra_d: 1461; CHECK: ## BB#0: 1462; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 1463; CHECK-NEXT: retq 1464 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1465 ret <16 x i32> %res 1466} 1467 1468define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1469; CHECK-LABEL: test_x86_avx512_mask_psra_d: 1470; CHECK: ## BB#0: 1471; CHECK-NEXT: kmovw %edi, %k1 1472; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 1473; CHECK-NEXT: vmovaps %zmm2, %zmm0 1474; CHECK-NEXT: retq 1475 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1476 ret <16 x i32> %res 1477} 1478 1479define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1480; CHECK-LABEL: test_x86_avx512_maskz_psra_d: 1481; CHECK: ## BB#0: 1482; CHECK-NEXT: kmovw %edi, %k1 1483; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 1484; CHECK-NEXT: retq 1485 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1486 ret <16 x i32> %res 1487} 1488 1489declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1490 1491define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 1492; CHECK-LABEL: test_x86_avx512_psra_q: 1493; CHECK: ## BB#0: 1494; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 1495; CHECK-NEXT: retq 1496 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1497 ret <8 x i64> %res 1498} 1499 1500define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1501; CHECK-LABEL: test_x86_avx512_mask_psra_q: 1502; CHECK: ## BB#0: 1503; CHECK-NEXT: kmovw %edi, %k1 1504; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 1505; CHECK-NEXT: vmovaps %zmm2, %zmm0 1506; CHECK-NEXT: retq 1507 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1508 ret <8 x i64> %res 1509} 1510 1511define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1512; CHECK-LABEL: test_x86_avx512_maskz_psra_q: 1513; CHECK: ## BB#0: 1514; CHECK-NEXT: kmovw %edi, %k1 1515; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 1516; CHECK-NEXT: retq 1517 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1518 ret <8 x i64> %res 1519} 1520 1521declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1522 1523define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 1524; CHECK-LABEL: test_x86_avx512_psllv_d: 1525; CHECK: ## BB#0: 1526; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 1527; CHECK-NEXT: retq 1528 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1529 ret <16 x i32> %res 1530} 1531 1532define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1533; CHECK-LABEL: test_x86_avx512_mask_psllv_d: 1534; CHECK: ## BB#0: 1535; CHECK-NEXT: kmovw %edi, %k1 1536; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 1537; CHECK-NEXT: vmovaps %zmm2, %zmm0 1538; CHECK-NEXT: retq 1539 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1540 ret <16 x i32> %res 1541} 1542 1543define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1544; CHECK-LABEL: test_x86_avx512_maskz_psllv_d: 1545; CHECK: ## BB#0: 1546; CHECK-NEXT: kmovw %edi, %k1 1547; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1548; CHECK-NEXT: retq 1549 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1550 ret <16 x i32> %res 1551} 1552 1553declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1554 1555define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 1556; CHECK-LABEL: test_x86_avx512_psllv_q: 1557; CHECK: ## BB#0: 1558; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 1559; CHECK-NEXT: retq 1560 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1561 ret <8 x i64> %res 1562} 1563 1564define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1565; CHECK-LABEL: test_x86_avx512_mask_psllv_q: 1566; CHECK: ## BB#0: 1567; CHECK-NEXT: kmovw %edi, %k1 1568; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 1569; CHECK-NEXT: vmovaps %zmm2, %zmm0 1570; CHECK-NEXT: retq 1571 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1572 ret <8 x i64> %res 1573} 1574 1575define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1576; CHECK-LABEL: test_x86_avx512_maskz_psllv_q: 1577; CHECK: ## BB#0: 1578; CHECK-NEXT: kmovw %edi, %k1 1579; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1580; CHECK-NEXT: retq 1581 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1582 ret <8 x i64> %res 1583} 1584 1585declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1586 1587 1588define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 1589; CHECK-LABEL: test_x86_avx512_psrav_d: 1590; CHECK: ## BB#0: 1591; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 1592; CHECK-NEXT: retq 1593 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1594 ret <16 x i32> %res 1595} 1596 1597define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1598; CHECK-LABEL: test_x86_avx512_mask_psrav_d: 1599; CHECK: ## BB#0: 1600; CHECK-NEXT: kmovw %edi, %k1 1601; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 1602; CHECK-NEXT: vmovaps %zmm2, %zmm0 1603; CHECK-NEXT: retq 1604 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1605 ret <16 x i32> %res 1606} 1607 1608define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1609; CHECK-LABEL: test_x86_avx512_maskz_psrav_d: 1610; CHECK: ## BB#0: 1611; CHECK-NEXT: kmovw %edi, %k1 1612; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 1613; CHECK-NEXT: retq 1614 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1615 ret <16 x i32> %res 1616} 1617 1618declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1619 1620define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 1621; CHECK-LABEL: test_x86_avx512_psrav_q: 1622; CHECK: ## BB#0: 1623; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 1624; CHECK-NEXT: retq 1625 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1626 ret <8 x i64> %res 1627} 1628 1629define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1630; CHECK-LABEL: test_x86_avx512_mask_psrav_q: 1631; CHECK: ## BB#0: 1632; CHECK-NEXT: kmovw %edi, %k1 1633; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 1634; CHECK-NEXT: vmovaps %zmm2, %zmm0 1635; CHECK-NEXT: retq 1636 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1637 ret <8 x i64> %res 1638} 1639 1640define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1641; CHECK-LABEL: test_x86_avx512_maskz_psrav_q: 1642; CHECK: ## BB#0: 1643; CHECK-NEXT: kmovw %edi, %k1 1644; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 1645; CHECK-NEXT: retq 1646 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1647 ret <8 x i64> %res 1648} 1649 1650declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1651 1652define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 1653; CHECK-LABEL: test_x86_avx512_psrlv_d: 1654; CHECK: ## BB#0: 1655; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 1656; CHECK-NEXT: retq 1657 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1658 ret <16 x i32> %res 1659} 1660 1661define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1662; CHECK-LABEL: test_x86_avx512_mask_psrlv_d: 1663; CHECK: ## BB#0: 1664; CHECK-NEXT: kmovw %edi, %k1 1665; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 1666; CHECK-NEXT: vmovaps %zmm2, %zmm0 1667; CHECK-NEXT: retq 1668 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1669 ret <16 x i32> %res 1670} 1671 1672define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1673; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d: 1674; CHECK: ## BB#0: 1675; CHECK-NEXT: kmovw %edi, %k1 1676; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1677; CHECK-NEXT: retq 1678 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1679 ret <16 x i32> %res 1680} 1681 1682declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1683 1684define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 1685; CHECK-LABEL: test_x86_avx512_psrlv_q: 1686; CHECK: ## BB#0: 1687; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 1688; CHECK-NEXT: retq 1689 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1690 ret <8 x i64> %res 1691} 1692 1693define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1694; CHECK-LABEL: test_x86_avx512_mask_psrlv_q: 1695; CHECK: ## BB#0: 1696; CHECK-NEXT: kmovw %edi, %k1 1697; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 1698; CHECK-NEXT: vmovaps %zmm2, %zmm0 1699; CHECK-NEXT: retq 1700 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1701 ret <8 x i64> %res 1702} 1703 1704define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1705; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q: 1706; CHECK: ## BB#0: 1707; CHECK-NEXT: kmovw %edi, %k1 1708; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1709; CHECK-NEXT: retq 1710 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1711 ret <8 x i64> %res 1712} 1713 1714declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1715 1716define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 1717; CHECK-LABEL: test_x86_avx512_psrlv_q_memop: 1718; CHECK: ## BB#0: 1719; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 1720; CHECK-NEXT: retq 1721 %b = load <8 x i64>, <8 x i64>* %ptr 1722 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1723 ret <8 x i64> %res 1724} 1725 1726declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 1727declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 1728declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 1729 1730define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 1731; CHECK-LABEL: test_vsubps_rn: 1732; CHECK: ## BB#0: 1733; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 1734; CHECK-NEXT: retq 1735 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1736 <16 x float> zeroinitializer, i16 -1, i32 0) 1737 ret <16 x float> %res 1738} 1739 1740define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 1741; CHECK-LABEL: test_vsubps_rd: 1742; CHECK: ## BB#0: 1743; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 1744; CHECK-NEXT: retq 1745 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1746 <16 x float> zeroinitializer, i16 -1, i32 1) 1747 ret <16 x float> %res 1748} 1749 1750define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 1751; CHECK-LABEL: test_vsubps_ru: 1752; CHECK: ## BB#0: 1753; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 1754; CHECK-NEXT: retq 1755 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1756 <16 x float> zeroinitializer, i16 -1, i32 2) 1757 ret <16 x float> %res 1758} 1759 1760define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 1761; CHECK-LABEL: test_vsubps_rz: 1762; CHECK: ## BB#0: 1763; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 1764; CHECK-NEXT: retq 1765 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1766 <16 x float> zeroinitializer, i16 -1, i32 3) 1767 ret <16 x float> %res 1768} 1769 1770define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 1771; CHECK-LABEL: test_vmulps_rn: 1772; CHECK: ## BB#0: 1773; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 1774; CHECK-NEXT: retq 1775 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1776 <16 x float> zeroinitializer, i16 -1, i32 0) 1777 ret <16 x float> %res 1778} 1779 1780define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 1781; CHECK-LABEL: test_vmulps_rd: 1782; CHECK: ## BB#0: 1783; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 1784; CHECK-NEXT: retq 1785 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1786 <16 x float> zeroinitializer, i16 -1, i32 1) 1787 ret <16 x float> %res 1788} 1789 1790define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 1791; CHECK-LABEL: test_vmulps_ru: 1792; CHECK: ## BB#0: 1793; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 1794; CHECK-NEXT: retq 1795 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1796 <16 x float> zeroinitializer, i16 -1, i32 2) 1797 ret <16 x float> %res 1798} 1799 1800define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 1801; CHECK-LABEL: test_vmulps_rz: 1802; CHECK: ## BB#0: 1803; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 1804; CHECK-NEXT: retq 1805 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1806 <16 x float> zeroinitializer, i16 -1, i32 3) 1807 ret <16 x float> %res 1808} 1809 1810;; mask float 1811define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1812; CHECK-LABEL: test_vmulps_mask_rn: 1813; CHECK: ## BB#0: 1814; CHECK-NEXT: kmovw %edi, %k1 1815; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1816; CHECK-NEXT: retq 1817 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1818 <16 x float> zeroinitializer, i16 %mask, i32 0) 1819 ret <16 x float> %res 1820} 1821 1822define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1823; CHECK-LABEL: test_vmulps_mask_rd: 1824; CHECK: ## BB#0: 1825; CHECK-NEXT: kmovw %edi, %k1 1826; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1827; CHECK-NEXT: retq 1828 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1829 <16 x float> zeroinitializer, i16 %mask, i32 1) 1830 ret <16 x float> %res 1831} 1832 1833define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1834; CHECK-LABEL: test_vmulps_mask_ru: 1835; CHECK: ## BB#0: 1836; CHECK-NEXT: kmovw %edi, %k1 1837; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1838; CHECK-NEXT: retq 1839 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1840 <16 x float> zeroinitializer, i16 %mask, i32 2) 1841 ret <16 x float> %res 1842} 1843 1844define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1845; CHECK-LABEL: test_vmulps_mask_rz: 1846; CHECK: ## BB#0: 1847; CHECK-NEXT: kmovw %edi, %k1 1848; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1849; CHECK-NEXT: retq 1850 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1851 <16 x float> zeroinitializer, i16 %mask, i32 3) 1852 ret <16 x float> %res 1853} 1854 1855;; With Passthru value 1856define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1857; CHECK-LABEL: test_vmulps_mask_passthru_rn: 1858; CHECK: ## BB#0: 1859; CHECK-NEXT: kmovw %edi, %k1 1860; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1861; CHECK-NEXT: vmovaps %zmm2, %zmm0 1862; CHECK-NEXT: retq 1863 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1864 <16 x float> %passthru, i16 %mask, i32 0) 1865 ret <16 x float> %res 1866} 1867 1868define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1869; CHECK-LABEL: test_vmulps_mask_passthru_rd: 1870; CHECK: ## BB#0: 1871; CHECK-NEXT: kmovw %edi, %k1 1872; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1873; CHECK-NEXT: vmovaps %zmm2, %zmm0 1874; CHECK-NEXT: retq 1875 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1876 <16 x float> %passthru, i16 %mask, i32 1) 1877 ret <16 x float> %res 1878} 1879 1880define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1881; CHECK-LABEL: test_vmulps_mask_passthru_ru: 1882; CHECK: ## BB#0: 1883; CHECK-NEXT: kmovw %edi, %k1 1884; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1885; CHECK-NEXT: vmovaps %zmm2, %zmm0 1886; CHECK-NEXT: retq 1887 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1888 <16 x float> %passthru, i16 %mask, i32 2) 1889 ret <16 x float> %res 1890} 1891 1892define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1893; CHECK-LABEL: test_vmulps_mask_passthru_rz: 1894; CHECK: ## BB#0: 1895; CHECK-NEXT: kmovw %edi, %k1 1896; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 1897; CHECK-NEXT: vmovaps %zmm2, %zmm0 1898; CHECK-NEXT: retq 1899 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1900 <16 x float> %passthru, i16 %mask, i32 3) 1901 ret <16 x float> %res 1902} 1903 1904;; mask double 1905define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1906; CHECK-LABEL: test_vmulpd_mask_rn: 1907; CHECK: ## BB#0: 1908; CHECK-NEXT: kmovw %edi, %k1 1909; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1910; CHECK-NEXT: retq 1911 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1912 <8 x double> zeroinitializer, i8 %mask, i32 0) 1913 ret <8 x double> %res 1914} 1915 1916define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1917; CHECK-LABEL: test_vmulpd_mask_rd: 1918; CHECK: ## BB#0: 1919; CHECK-NEXT: kmovw %edi, %k1 1920; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1921; CHECK-NEXT: retq 1922 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1923 <8 x double> zeroinitializer, i8 %mask, i32 1) 1924 ret <8 x double> %res 1925} 1926 1927define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1928; CHECK-LABEL: test_vmulpd_mask_ru: 1929; CHECK: ## BB#0: 1930; CHECK-NEXT: kmovw %edi, %k1 1931; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1932; CHECK-NEXT: retq 1933 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1934 <8 x double> zeroinitializer, i8 %mask, i32 2) 1935 ret <8 x double> %res 1936} 1937 1938define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1939; CHECK-LABEL: test_vmulpd_mask_rz: 1940; CHECK: ## BB#0: 1941; CHECK-NEXT: kmovw %edi, %k1 1942; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 1943; CHECK-NEXT: retq 1944 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1945 <8 x double> zeroinitializer, i8 %mask, i32 3) 1946 ret <8 x double> %res 1947} 1948 1949define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 1950; CHECK-LABEL: test_mask_add_epi32_rr: 1951; CHECK: ## BB#0: 1952; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1953; CHECK-NEXT: retq 1954 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1955 ret < 16 x i32> %res 1956} 1957 1958define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1959; CHECK-LABEL: test_mask_add_epi32_rrk: 1960; CHECK: ## BB#0: 1961; CHECK-NEXT: kmovw %edi, %k1 1962; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} 1963; CHECK-NEXT: vmovaps %zmm2, %zmm0 1964; CHECK-NEXT: retq 1965 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1966 ret < 16 x i32> %res 1967} 1968 1969define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1970; CHECK-LABEL: test_mask_add_epi32_rrkz: 1971; CHECK: ## BB#0: 1972; CHECK-NEXT: kmovw %edi, %k1 1973; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 1974; CHECK-NEXT: retq 1975 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1976 ret < 16 x i32> %res 1977} 1978 1979define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 1980; CHECK-LABEL: test_mask_add_epi32_rm: 1981; CHECK: ## BB#0: 1982; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 1983; CHECK-NEXT: retq 1984 %b = load <16 x i32>, <16 x i32>* %ptr_b 1985 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1986 ret < 16 x i32> %res 1987} 1988 1989define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1990; CHECK-LABEL: test_mask_add_epi32_rmk: 1991; CHECK: ## BB#0: 1992; CHECK-NEXT: kmovw %esi, %k1 1993; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} 1994; CHECK-NEXT: vmovaps %zmm1, %zmm0 1995; CHECK-NEXT: retq 1996 %b = load <16 x i32>, <16 x i32>* %ptr_b 1997 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1998 ret < 16 x i32> %res 1999} 2000 2001define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2002; CHECK-LABEL: test_mask_add_epi32_rmkz: 2003; CHECK: ## BB#0: 2004; CHECK-NEXT: kmovw %esi, %k1 2005; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 2006; CHECK-NEXT: retq 2007 %b = load <16 x i32>, <16 x i32>* %ptr_b 2008 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2009 ret < 16 x i32> %res 2010} 2011 2012define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2013; CHECK-LABEL: test_mask_add_epi32_rmb: 2014; CHECK: ## BB#0: 2015; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 2016; CHECK-NEXT: retq 2017 %q = load i32, i32* %ptr_b 2018 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2019 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2020 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2021 ret < 16 x i32> %res 2022} 2023 2024define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2025; CHECK-LABEL: test_mask_add_epi32_rmbk: 2026; CHECK: ## BB#0: 2027; CHECK-NEXT: kmovw %esi, %k1 2028; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2029; CHECK-NEXT: vmovaps %zmm1, %zmm0 2030; CHECK-NEXT: retq 2031 %q = load i32, i32* %ptr_b 2032 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2033 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2034 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2035 ret < 16 x i32> %res 2036} 2037 2038define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2039; CHECK-LABEL: test_mask_add_epi32_rmbkz: 2040; CHECK: ## BB#0: 2041; CHECK-NEXT: kmovw %esi, %k1 2042; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2043; CHECK-NEXT: retq 2044 %q = load i32, i32* %ptr_b 2045 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2046 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2047 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2048 ret < 16 x i32> %res 2049} 2050 2051declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2052 2053define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2054; CHECK-LABEL: test_mask_sub_epi32_rr: 2055; CHECK: ## BB#0: 2056; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 2057; CHECK-NEXT: retq 2058 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2059 ret < 16 x i32> %res 2060} 2061 2062define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2063; CHECK-LABEL: test_mask_sub_epi32_rrk: 2064; CHECK: ## BB#0: 2065; CHECK-NEXT: kmovw %edi, %k1 2066; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} 2067; CHECK-NEXT: vmovaps %zmm2, %zmm0 2068; CHECK-NEXT: retq 2069 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2070 ret < 16 x i32> %res 2071} 2072 2073define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2074; CHECK-LABEL: test_mask_sub_epi32_rrkz: 2075; CHECK: ## BB#0: 2076; CHECK-NEXT: kmovw %edi, %k1 2077; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} 2078; CHECK-NEXT: retq 2079 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2080 ret < 16 x i32> %res 2081} 2082 2083define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2084; CHECK-LABEL: test_mask_sub_epi32_rm: 2085; CHECK: ## BB#0: 2086; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 2087; CHECK-NEXT: retq 2088 %b = load <16 x i32>, <16 x i32>* %ptr_b 2089 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2090 ret < 16 x i32> %res 2091} 2092 2093define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2094; CHECK-LABEL: test_mask_sub_epi32_rmk: 2095; CHECK: ## BB#0: 2096; CHECK-NEXT: kmovw %esi, %k1 2097; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} 2098; CHECK-NEXT: vmovaps %zmm1, %zmm0 2099; CHECK-NEXT: retq 2100 %b = load <16 x i32>, <16 x i32>* %ptr_b 2101 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2102 ret < 16 x i32> %res 2103} 2104 2105define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2106; CHECK-LABEL: test_mask_sub_epi32_rmkz: 2107; CHECK: ## BB#0: 2108; CHECK-NEXT: kmovw %esi, %k1 2109; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} 2110; CHECK-NEXT: retq 2111 %b = load <16 x i32>, <16 x i32>* %ptr_b 2112 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2113 ret < 16 x i32> %res 2114} 2115 2116define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2117; CHECK-LABEL: test_mask_sub_epi32_rmb: 2118; CHECK: ## BB#0: 2119; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 2120; CHECK-NEXT: retq 2121 %q = load i32, i32* %ptr_b 2122 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2123 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2124 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2125 ret < 16 x i32> %res 2126} 2127 2128define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2129; CHECK-LABEL: test_mask_sub_epi32_rmbk: 2130; CHECK: ## BB#0: 2131; CHECK-NEXT: kmovw %esi, %k1 2132; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2133; CHECK-NEXT: vmovaps %zmm1, %zmm0 2134; CHECK-NEXT: retq 2135 %q = load i32, i32* %ptr_b 2136 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2137 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2138 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2139 ret < 16 x i32> %res 2140} 2141 2142define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2143; CHECK-LABEL: test_mask_sub_epi32_rmbkz: 2144; CHECK: ## BB#0: 2145; CHECK-NEXT: kmovw %esi, %k1 2146; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2147; CHECK-NEXT: retq 2148 %q = load i32, i32* %ptr_b 2149 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2150 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2151 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2152 ret < 16 x i32> %res 2153} 2154 2155declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2156 2157define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2158; CHECK-LABEL: test_mask_add_epi64_rr: 2159; CHECK: ## BB#0: 2160; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 2161; CHECK-NEXT: retq 2162 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2163 ret < 8 x i64> %res 2164} 2165 2166define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2167; CHECK-LABEL: test_mask_add_epi64_rrk: 2168; CHECK: ## BB#0: 2169; CHECK-NEXT: kmovw %edi, %k1 2170; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} 2171; CHECK-NEXT: vmovaps %zmm2, %zmm0 2172; CHECK-NEXT: retq 2173 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2174 ret < 8 x i64> %res 2175} 2176 2177define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2178; CHECK-LABEL: test_mask_add_epi64_rrkz: 2179; CHECK: ## BB#0: 2180; CHECK-NEXT: kmovw %edi, %k1 2181; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} 2182; CHECK-NEXT: retq 2183 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2184 ret < 8 x i64> %res 2185} 2186 2187define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2188; CHECK-LABEL: test_mask_add_epi64_rm: 2189; CHECK: ## BB#0: 2190; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 2191; CHECK-NEXT: retq 2192 %b = load <8 x i64>, <8 x i64>* %ptr_b 2193 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2194 ret < 8 x i64> %res 2195} 2196 2197define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2198; CHECK-LABEL: test_mask_add_epi64_rmk: 2199; CHECK: ## BB#0: 2200; CHECK-NEXT: kmovw %esi, %k1 2201; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} 2202; CHECK-NEXT: vmovaps %zmm1, %zmm0 2203; CHECK-NEXT: retq 2204 %b = load <8 x i64>, <8 x i64>* %ptr_b 2205 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2206 ret < 8 x i64> %res 2207} 2208 2209define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2210; CHECK-LABEL: test_mask_add_epi64_rmkz: 2211; CHECK: ## BB#0: 2212; CHECK-NEXT: kmovw %esi, %k1 2213; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} 2214; CHECK-NEXT: retq 2215 %b = load <8 x i64>, <8 x i64>* %ptr_b 2216 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2217 ret < 8 x i64> %res 2218} 2219 2220define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2221; CHECK-LABEL: test_mask_add_epi64_rmb: 2222; CHECK: ## BB#0: 2223; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 2224; CHECK-NEXT: retq 2225 %q = load i64, i64* %ptr_b 2226 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2227 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2228 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2229 ret < 8 x i64> %res 2230} 2231 2232define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2233; CHECK-LABEL: test_mask_add_epi64_rmbk: 2234; CHECK: ## BB#0: 2235; CHECK-NEXT: kmovw %esi, %k1 2236; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2237; CHECK-NEXT: vmovaps %zmm1, %zmm0 2238; CHECK-NEXT: retq 2239 %q = load i64, i64* %ptr_b 2240 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2241 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2242 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2243 ret < 8 x i64> %res 2244} 2245 2246define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2247; CHECK-LABEL: test_mask_add_epi64_rmbkz: 2248; CHECK: ## BB#0: 2249; CHECK-NEXT: kmovw %esi, %k1 2250; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2251; CHECK-NEXT: retq 2252 %q = load i64, i64* %ptr_b 2253 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2254 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2255 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2256 ret < 8 x i64> %res 2257} 2258 2259declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2260 2261define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2262; CHECK-LABEL: test_mask_sub_epi64_rr: 2263; CHECK: ## BB#0: 2264; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 2265; CHECK-NEXT: retq 2266 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2267 ret < 8 x i64> %res 2268} 2269 2270define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2271; CHECK-LABEL: test_mask_sub_epi64_rrk: 2272; CHECK: ## BB#0: 2273; CHECK-NEXT: kmovw %edi, %k1 2274; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} 2275; CHECK-NEXT: vmovaps %zmm2, %zmm0 2276; CHECK-NEXT: retq 2277 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2278 ret < 8 x i64> %res 2279} 2280 2281define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2282; CHECK-LABEL: test_mask_sub_epi64_rrkz: 2283; CHECK: ## BB#0: 2284; CHECK-NEXT: kmovw %edi, %k1 2285; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} 2286; CHECK-NEXT: retq 2287 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2288 ret < 8 x i64> %res 2289} 2290 2291define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2292; CHECK-LABEL: test_mask_sub_epi64_rm: 2293; CHECK: ## BB#0: 2294; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 2295; CHECK-NEXT: retq 2296 %b = load <8 x i64>, <8 x i64>* %ptr_b 2297 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2298 ret < 8 x i64> %res 2299} 2300 2301define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2302; CHECK-LABEL: test_mask_sub_epi64_rmk: 2303; CHECK: ## BB#0: 2304; CHECK-NEXT: kmovw %esi, %k1 2305; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} 2306; CHECK-NEXT: vmovaps %zmm1, %zmm0 2307; CHECK-NEXT: retq 2308 %b = load <8 x i64>, <8 x i64>* %ptr_b 2309 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2310 ret < 8 x i64> %res 2311} 2312 2313define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2314; CHECK-LABEL: test_mask_sub_epi64_rmkz: 2315; CHECK: ## BB#0: 2316; CHECK-NEXT: kmovw %esi, %k1 2317; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} 2318; CHECK-NEXT: retq 2319 %b = load <8 x i64>, <8 x i64>* %ptr_b 2320 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2321 ret < 8 x i64> %res 2322} 2323 2324define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2325; CHECK-LABEL: test_mask_sub_epi64_rmb: 2326; CHECK: ## BB#0: 2327; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 2328; CHECK-NEXT: retq 2329 %q = load i64, i64* %ptr_b 2330 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2331 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2332 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2333 ret < 8 x i64> %res 2334} 2335 2336define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2337; CHECK-LABEL: test_mask_sub_epi64_rmbk: 2338; CHECK: ## BB#0: 2339; CHECK-NEXT: kmovw %esi, %k1 2340; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2341; CHECK-NEXT: vmovaps %zmm1, %zmm0 2342; CHECK-NEXT: retq 2343 %q = load i64, i64* %ptr_b 2344 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2345 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2346 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2347 ret < 8 x i64> %res 2348} 2349 2350define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2351; CHECK-LABEL: test_mask_sub_epi64_rmbkz: 2352; CHECK: ## BB#0: 2353; CHECK-NEXT: kmovw %esi, %k1 2354; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2355; CHECK-NEXT: retq 2356 %q = load i64, i64* %ptr_b 2357 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2358 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2359 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2360 ret < 8 x i64> %res 2361} 2362 2363declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2364 2365define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2366; CHECK-LABEL: test_mask_mul_epi32_rr: 2367; CHECK: ## BB#0: 2368; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 2369; CHECK-NEXT: retq 2370 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2371 ret < 8 x i64> %res 2372} 2373 2374define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2375; CHECK-LABEL: test_mask_mul_epi32_rrk: 2376; CHECK: ## BB#0: 2377; CHECK-NEXT: kmovw %edi, %k1 2378; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} 2379; CHECK-NEXT: vmovaps %zmm2, %zmm0 2380; CHECK-NEXT: retq 2381 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2382 ret < 8 x i64> %res 2383} 2384 2385define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2386; CHECK-LABEL: test_mask_mul_epi32_rrkz: 2387; CHECK: ## BB#0: 2388; CHECK-NEXT: kmovw %edi, %k1 2389; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} 2390; CHECK-NEXT: retq 2391 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2392 ret < 8 x i64> %res 2393} 2394 2395define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2396; CHECK-LABEL: test_mask_mul_epi32_rm: 2397; CHECK: ## BB#0: 2398; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 2399; CHECK-NEXT: retq 2400 %b = load <16 x i32>, <16 x i32>* %ptr_b 2401 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2402 ret < 8 x i64> %res 2403} 2404 2405define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2406; CHECK-LABEL: test_mask_mul_epi32_rmk: 2407; CHECK: ## BB#0: 2408; CHECK-NEXT: kmovw %esi, %k1 2409; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} 2410; CHECK-NEXT: vmovaps %zmm1, %zmm0 2411; CHECK-NEXT: retq 2412 %b = load <16 x i32>, <16 x i32>* %ptr_b 2413 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2414 ret < 8 x i64> %res 2415} 2416 2417define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 2418; CHECK-LABEL: test_mask_mul_epi32_rmkz: 2419; CHECK: ## BB#0: 2420; CHECK-NEXT: kmovw %esi, %k1 2421; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} 2422; CHECK-NEXT: retq 2423 %b = load <16 x i32>, <16 x i32>* %ptr_b 2424 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2425 ret < 8 x i64> %res 2426} 2427 2428define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 2429; CHECK-LABEL: test_mask_mul_epi32_rmb: 2430; CHECK: ## BB#0: 2431; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 2432; CHECK-NEXT: retq 2433 %q = load i64, i64* %ptr_b 2434 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2435 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2436 %b = bitcast <8 x i64> %b64 to <16 x i32> 2437 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2438 ret < 8 x i64> %res 2439} 2440 2441define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2442; CHECK-LABEL: test_mask_mul_epi32_rmbk: 2443; CHECK: ## BB#0: 2444; CHECK-NEXT: kmovw %esi, %k1 2445; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2446; CHECK-NEXT: vmovaps %zmm1, %zmm0 2447; CHECK-NEXT: retq 2448 %q = load i64, i64* %ptr_b 2449 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2450 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2451 %b = bitcast <8 x i64> %b64 to <16 x i32> 2452 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2453 ret < 8 x i64> %res 2454} 2455 2456define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 2457; CHECK-LABEL: test_mask_mul_epi32_rmbkz: 2458; CHECK: ## BB#0: 2459; CHECK-NEXT: kmovw %esi, %k1 2460; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2461; CHECK-NEXT: retq 2462 %q = load i64, i64* %ptr_b 2463 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2464 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2465 %b = bitcast <8 x i64> %b64 to <16 x i32> 2466 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2467 ret < 8 x i64> %res 2468} 2469 2470declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 2471 2472define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 2473; CHECK-LABEL: test_mask_mul_epu32_rr: 2474; CHECK: ## BB#0: 2475; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 2476; CHECK-NEXT: retq 2477 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2478 ret < 8 x i64> %res 2479} 2480 2481define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2482; CHECK-LABEL: test_mask_mul_epu32_rrk: 2483; CHECK: ## BB#0: 2484; CHECK-NEXT: kmovw %edi, %k1 2485; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} 2486; CHECK-NEXT: vmovaps %zmm2, %zmm0 2487; CHECK-NEXT: retq 2488 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2489 ret < 8 x i64> %res 2490} 2491 2492define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2493; CHECK-LABEL: test_mask_mul_epu32_rrkz: 2494; CHECK: ## BB#0: 2495; CHECK-NEXT: kmovw %edi, %k1 2496; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} 2497; CHECK-NEXT: retq 2498 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2499 ret < 8 x i64> %res 2500} 2501 2502define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2503; CHECK-LABEL: test_mask_mul_epu32_rm: 2504; CHECK: ## BB#0: 2505; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 2506; CHECK-NEXT: retq 2507 %b = load <16 x i32>, <16 x i32>* %ptr_b 2508 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2509 ret < 8 x i64> %res 2510} 2511 2512define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2513; CHECK-LABEL: test_mask_mul_epu32_rmk: 2514; CHECK: ## BB#0: 2515; CHECK-NEXT: kmovw %esi, %k1 2516; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} 2517; CHECK-NEXT: vmovaps %zmm1, %zmm0 2518; CHECK-NEXT: retq 2519 %b = load <16 x i32>, <16 x i32>* %ptr_b 2520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2521 ret < 8 x i64> %res 2522} 2523 2524define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 2525; CHECK-LABEL: test_mask_mul_epu32_rmkz: 2526; CHECK: ## BB#0: 2527; CHECK-NEXT: kmovw %esi, %k1 2528; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} 2529; CHECK-NEXT: retq 2530 %b = load <16 x i32>, <16 x i32>* %ptr_b 2531 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2532 ret < 8 x i64> %res 2533} 2534 2535define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 2536; CHECK-LABEL: test_mask_mul_epu32_rmb: 2537; CHECK: ## BB#0: 2538; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 2539; CHECK-NEXT: retq 2540 %q = load i64, i64* %ptr_b 2541 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2542 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2543 %b = bitcast <8 x i64> %b64 to <16 x i32> 2544 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2545 ret < 8 x i64> %res 2546} 2547 2548define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2549; CHECK-LABEL: test_mask_mul_epu32_rmbk: 2550; CHECK: ## BB#0: 2551; CHECK-NEXT: kmovw %esi, %k1 2552; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2553; CHECK-NEXT: vmovaps %zmm1, %zmm0 2554; CHECK-NEXT: retq 2555 %q = load i64, i64* %ptr_b 2556 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2557 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2558 %b = bitcast <8 x i64> %b64 to <16 x i32> 2559 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2560 ret < 8 x i64> %res 2561} 2562 2563define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 2564; CHECK-LABEL: test_mask_mul_epu32_rmbkz: 2565; CHECK: ## BB#0: 2566; CHECK-NEXT: kmovw %esi, %k1 2567; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2568; CHECK-NEXT: retq 2569 %q = load i64, i64* %ptr_b 2570 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2571 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2572 %b = bitcast <8 x i64> %b64 to <16 x i32> 2573 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2574 ret < 8 x i64> %res 2575} 2576 2577declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 2578 2579define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 2580; CHECK-LABEL: test_mask_mullo_epi32_rr_512: 2581; CHECK: ## BB#0: 2582; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 2583; CHECK-NEXT: retq 2584 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2585 ret <16 x i32> %res 2586} 2587 2588define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2589; CHECK-LABEL: test_mask_mullo_epi32_rrk_512: 2590; CHECK: ## BB#0: 2591; CHECK-NEXT: kmovw %edi, %k1 2592; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} 2593; CHECK-NEXT: vmovaps %zmm2, %zmm0 2594; CHECK-NEXT: retq 2595 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2596 ret < 16 x i32> %res 2597} 2598 2599define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2600; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512: 2601; CHECK: ## BB#0: 2602; CHECK-NEXT: kmovw %edi, %k1 2603; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} 2604; CHECK-NEXT: retq 2605 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2606 ret < 16 x i32> %res 2607} 2608 2609define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 2610; CHECK-LABEL: test_mask_mullo_epi32_rm_512: 2611; CHECK: ## BB#0: 2612; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 2613; CHECK-NEXT: retq 2614 %b = load <16 x i32>, <16 x i32>* %ptr_b 2615 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2616 ret < 16 x i32> %res 2617} 2618 2619define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2620; CHECK-LABEL: test_mask_mullo_epi32_rmk_512: 2621; CHECK: ## BB#0: 2622; CHECK-NEXT: kmovw %esi, %k1 2623; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} 2624; CHECK-NEXT: vmovaps %zmm1, %zmm0 2625; CHECK-NEXT: retq 2626 %b = load <16 x i32>, <16 x i32>* %ptr_b 2627 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2628 ret < 16 x i32> %res 2629} 2630 2631define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2632; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512: 2633; CHECK: ## BB#0: 2634; CHECK-NEXT: kmovw %esi, %k1 2635; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} 2636; CHECK-NEXT: retq 2637 %b = load <16 x i32>, <16 x i32>* %ptr_b 2638 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2639 ret < 16 x i32> %res 2640} 2641 2642define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 2643; CHECK-LABEL: test_mask_mullo_epi32_rmb_512: 2644; CHECK: ## BB#0: 2645; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 2646; CHECK-NEXT: retq 2647 %q = load i32, i32* %ptr_b 2648 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2649 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2650 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2651 ret < 16 x i32> %res 2652} 2653 2654define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2655; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512: 2656; CHECK: ## BB#0: 2657; CHECK-NEXT: kmovw %esi, %k1 2658; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2659; CHECK-NEXT: vmovaps %zmm1, %zmm0 2660; CHECK-NEXT: retq 2661 %q = load i32, i32* %ptr_b 2662 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2663 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2664 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2665 ret < 16 x i32> %res 2666} 2667 2668define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2669; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512: 2670; CHECK: ## BB#0: 2671; CHECK-NEXT: kmovw %esi, %k1 2672; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2673; CHECK-NEXT: retq 2674 %q = load i32, i32* %ptr_b 2675 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2676 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2677 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2678 ret < 16 x i32> %res 2679} 2680 2681declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2682 2683define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2684; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 2685; CHECK: ## BB#0: 2686; CHECK-NEXT: kmovw %edi, %k1 2687; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2688; CHECK-NEXT: retq 2689 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 2690 ret <16 x float> %res 2691} 2692define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2693; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 2694; CHECK: ## BB#0: 2695; CHECK-NEXT: kmovw %edi, %k1 2696; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2697; CHECK-NEXT: retq 2698 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 2699 ret <16 x float> %res 2700} 2701define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2702; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 2703; CHECK: ## BB#0: 2704; CHECK-NEXT: kmovw %edi, %k1 2705; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2706; CHECK-NEXT: retq 2707 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 2708 ret <16 x float> %res 2709} 2710 2711define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2712; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 2713; CHECK: ## BB#0: 2714; CHECK-NEXT: kmovw %edi, %k1 2715; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2716; CHECK-NEXT: retq 2717 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 2718 ret <16 x float> %res 2719} 2720 2721 2722define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2723; CHECK-LABEL: test_mm512_maskz_add_round_ps_current: 2724; CHECK: ## BB#0: 2725; CHECK-NEXT: kmovw %edi, %k1 2726; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} 2727; CHECK-NEXT: retq 2728 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 2729 ret <16 x float> %res 2730} 2731 2732define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2733; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae: 2734; CHECK: ## BB#0: 2735; CHECK-NEXT: kmovw %edi, %k1 2736; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2737; CHECK-NEXT: vmovaps %zmm2, %zmm0 2738; CHECK-NEXT: retq 2739 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 2740 ret <16 x float> %res 2741} 2742define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2743; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae: 2744; CHECK: ## BB#0: 2745; CHECK-NEXT: kmovw %edi, %k1 2746; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2747; CHECK-NEXT: vmovaps %zmm2, %zmm0 2748; CHECK-NEXT: retq 2749 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 2750 ret <16 x float> %res 2751} 2752define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2753; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae: 2754; CHECK: ## BB#0: 2755; CHECK-NEXT: kmovw %edi, %k1 2756; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2757; CHECK-NEXT: vmovaps %zmm2, %zmm0 2758; CHECK-NEXT: retq 2759 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 2760 ret <16 x float> %res 2761} 2762 2763define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2764; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae: 2765; CHECK: ## BB#0: 2766; CHECK-NEXT: kmovw %edi, %k1 2767; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2768; CHECK-NEXT: vmovaps %zmm2, %zmm0 2769; CHECK-NEXT: retq 2770 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 2771 ret <16 x float> %res 2772} 2773 2774 2775define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2776; CHECK-LABEL: test_mm512_mask_add_round_ps_current: 2777; CHECK: ## BB#0: 2778; CHECK-NEXT: kmovw %edi, %k1 2779; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} 2780; CHECK-NEXT: vmovaps %zmm2, %zmm0 2781; CHECK-NEXT: retq 2782 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 2783 ret <16 x float> %res 2784} 2785 2786 2787define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2788; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: 2789; CHECK: ## BB#0: 2790; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 2791; CHECK-NEXT: retq 2792 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 2793 ret <16 x float> %res 2794} 2795define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2796; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: 2797; CHECK: ## BB#0: 2798; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 2799; CHECK-NEXT: retq 2800 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 2801 ret <16 x float> %res 2802} 2803define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2804; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: 2805; CHECK: ## BB#0: 2806; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 2807; CHECK-NEXT: retq 2808 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 2809 ret <16 x float> %res 2810} 2811 2812define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2813; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: 2814; CHECK: ## BB#0: 2815; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 2816; CHECK-NEXT: retq 2817 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 2818 ret <16 x float> %res 2819} 2820 2821define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2822; CHECK-LABEL: test_mm512_add_round_ps_current: 2823; CHECK: ## BB#0: 2824; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 2825; CHECK-NEXT: retq 2826 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 2827 ret <16 x float> %res 2828} 2829declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2830 2831define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2832; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 2833; CHECK: ## BB#0: 2834; CHECK-NEXT: kmovw %edi, %k1 2835; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2836; CHECK-NEXT: vmovaps %zmm2, %zmm0 2837; CHECK-NEXT: retq 2838 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 2839 ret <16 x float> %res 2840} 2841define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2842; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 2843; CHECK: ## BB#0: 2844; CHECK-NEXT: kmovw %edi, %k1 2845; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2846; CHECK-NEXT: vmovaps %zmm2, %zmm0 2847; CHECK-NEXT: retq 2848 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 2849 ret <16 x float> %res 2850} 2851define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2852; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 2853; CHECK: ## BB#0: 2854; CHECK-NEXT: kmovw %edi, %k1 2855; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2856; CHECK-NEXT: vmovaps %zmm2, %zmm0 2857; CHECK-NEXT: retq 2858 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 2859 ret <16 x float> %res 2860} 2861 2862define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2863; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 2864; CHECK: ## BB#0: 2865; CHECK-NEXT: kmovw %edi, %k1 2866; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2867; CHECK-NEXT: vmovaps %zmm2, %zmm0 2868; CHECK-NEXT: retq 2869 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 2870 ret <16 x float> %res 2871} 2872 2873 2874define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2875; CHECK-LABEL: test_mm512_mask_sub_round_ps_current: 2876; CHECK: ## BB#0: 2877; CHECK-NEXT: kmovw %edi, %k1 2878; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} 2879; CHECK-NEXT: vmovaps %zmm2, %zmm0 2880; CHECK-NEXT: retq 2881 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 2882 ret <16 x float> %res 2883} 2884 2885define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2886; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: 2887; CHECK: ## BB#0: 2888; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 2889; CHECK-NEXT: retq 2890 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 2891 ret <16 x float> %res 2892} 2893define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2894; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: 2895; CHECK: ## BB#0: 2896; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 2897; CHECK-NEXT: retq 2898 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 2899 ret <16 x float> %res 2900} 2901define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2902; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: 2903; CHECK: ## BB#0: 2904; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 2905; CHECK-NEXT: retq 2906 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 2907 ret <16 x float> %res 2908} 2909 2910define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2911; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: 2912; CHECK: ## BB#0: 2913; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 2914; CHECK-NEXT: retq 2915 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 2916 ret <16 x float> %res 2917} 2918 2919define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2920; CHECK-LABEL: test_mm512_sub_round_ps_current: 2921; CHECK: ## BB#0: 2922; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 2923; CHECK-NEXT: retq 2924 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 2925 ret <16 x float> %res 2926} 2927 2928define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2929; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 2930; CHECK: ## BB#0: 2931; CHECK-NEXT: kmovw %edi, %k1 2932; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2933; CHECK-NEXT: retq 2934 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 2935 ret <16 x float> %res 2936} 2937define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2938; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 2939; CHECK: ## BB#0: 2940; CHECK-NEXT: kmovw %edi, %k1 2941; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2942; CHECK-NEXT: retq 2943 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 2944 ret <16 x float> %res 2945} 2946define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2947; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 2948; CHECK: ## BB#0: 2949; CHECK-NEXT: kmovw %edi, %k1 2950; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2951; CHECK-NEXT: retq 2952 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 2953 ret <16 x float> %res 2954} 2955 2956define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2957; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 2958; CHECK: ## BB#0: 2959; CHECK-NEXT: kmovw %edi, %k1 2960; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2961; CHECK-NEXT: retq 2962 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 2963 ret <16 x float> %res 2964} 2965 2966 2967define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2968; CHECK-LABEL: test_mm512_maskz_div_round_ps_current: 2969; CHECK: ## BB#0: 2970; CHECK-NEXT: kmovw %edi, %k1 2971; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} 2972; CHECK-NEXT: retq 2973 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 2974 ret <16 x float> %res 2975} 2976 2977define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2978; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae: 2979; CHECK: ## BB#0: 2980; CHECK-NEXT: kmovw %edi, %k1 2981; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2982; CHECK-NEXT: vmovaps %zmm2, %zmm0 2983; CHECK-NEXT: retq 2984 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 2985 ret <16 x float> %res 2986} 2987define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2988; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae: 2989; CHECK: ## BB#0: 2990; CHECK-NEXT: kmovw %edi, %k1 2991; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2992; CHECK-NEXT: vmovaps %zmm2, %zmm0 2993; CHECK-NEXT: retq 2994 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 2995 ret <16 x float> %res 2996} 2997define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 2998; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae: 2999; CHECK: ## BB#0: 3000; CHECK-NEXT: kmovw %edi, %k1 3001; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3002; CHECK-NEXT: vmovaps %zmm2, %zmm0 3003; CHECK-NEXT: retq 3004 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3005 ret <16 x float> %res 3006} 3007 3008define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3009; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae: 3010; CHECK: ## BB#0: 3011; CHECK-NEXT: kmovw %edi, %k1 3012; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3013; CHECK-NEXT: vmovaps %zmm2, %zmm0 3014; CHECK-NEXT: retq 3015 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3016 ret <16 x float> %res 3017} 3018 3019 3020define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3021; CHECK-LABEL: test_mm512_mask_div_round_ps_current: 3022; CHECK: ## BB#0: 3023; CHECK-NEXT: kmovw %edi, %k1 3024; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} 3025; CHECK-NEXT: vmovaps %zmm2, %zmm0 3026; CHECK-NEXT: retq 3027 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3028 ret <16 x float> %res 3029} 3030 3031 3032define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3033; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: 3034; CHECK: ## BB#0: 3035; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 3036; CHECK-NEXT: retq 3037 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3038 ret <16 x float> %res 3039} 3040define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3041; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: 3042; CHECK: ## BB#0: 3043; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 3044; CHECK-NEXT: retq 3045 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3046 ret <16 x float> %res 3047} 3048define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3049; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: 3050; CHECK: ## BB#0: 3051; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 3052; CHECK-NEXT: retq 3053 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3054 ret <16 x float> %res 3055} 3056 3057define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3058; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: 3059; CHECK: ## BB#0: 3060; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 3061; CHECK-NEXT: retq 3062 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3063 ret <16 x float> %res 3064} 3065 3066define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3067; CHECK-LABEL: test_mm512_div_round_ps_current: 3068; CHECK: ## BB#0: 3069; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 3070; CHECK-NEXT: retq 3071 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3072 ret <16 x float> %res 3073} 3074declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3075 3076define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3077; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae: 3078; CHECK: ## BB#0: 3079; CHECK-NEXT: kmovw %edi, %k1 3080; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3081; CHECK-NEXT: retq 3082 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3083 ret <16 x float> %res 3084} 3085 3086define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3087; CHECK-LABEL: test_mm512_maskz_min_round_ps_current: 3088; CHECK: ## BB#0: 3089; CHECK-NEXT: kmovw %edi, %k1 3090; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} 3091; CHECK-NEXT: retq 3092 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3093 ret <16 x float> %res 3094} 3095 3096define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3097; CHECK-LABEL: test_mm512_mask_min_round_ps_sae: 3098; CHECK: ## BB#0: 3099; CHECK-NEXT: kmovw %edi, %k1 3100; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3101; CHECK-NEXT: vmovaps %zmm2, %zmm0 3102; CHECK-NEXT: retq 3103 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3104 ret <16 x float> %res 3105} 3106 3107define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3108; CHECK-LABEL: test_mm512_mask_min_round_ps_current: 3109; CHECK: ## BB#0: 3110; CHECK-NEXT: kmovw %edi, %k1 3111; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} 3112; CHECK-NEXT: vmovaps %zmm2, %zmm0 3113; CHECK-NEXT: retq 3114 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3115 ret <16 x float> %res 3116} 3117 3118define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3119; CHECK-LABEL: test_mm512_min_round_ps_sae: 3120; CHECK: ## BB#0: 3121; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 3122; CHECK-NEXT: retq 3123 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3124 ret <16 x float> %res 3125} 3126 3127define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3128; CHECK-LABEL: test_mm512_min_round_ps_current: 3129; CHECK: ## BB#0: 3130; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 3131; CHECK-NEXT: retq 3132 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3133 ret <16 x float> %res 3134} 3135declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3136 3137define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3138; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae: 3139; CHECK: ## BB#0: 3140; CHECK-NEXT: kmovw %edi, %k1 3141; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3142; CHECK-NEXT: retq 3143 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3144 ret <16 x float> %res 3145} 3146 3147define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3148; CHECK-LABEL: test_mm512_maskz_max_round_ps_current: 3149; CHECK: ## BB#0: 3150; CHECK-NEXT: kmovw %edi, %k1 3151; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} 3152; CHECK-NEXT: retq 3153 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3154 ret <16 x float> %res 3155} 3156 3157define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3158; CHECK-LABEL: test_mm512_mask_max_round_ps_sae: 3159; CHECK: ## BB#0: 3160; CHECK-NEXT: kmovw %edi, %k1 3161; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3162; CHECK-NEXT: vmovaps %zmm2, %zmm0 3163; CHECK-NEXT: retq 3164 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3165 ret <16 x float> %res 3166} 3167 3168define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3169; CHECK-LABEL: test_mm512_mask_max_round_ps_current: 3170; CHECK: ## BB#0: 3171; CHECK-NEXT: kmovw %edi, %k1 3172; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} 3173; CHECK-NEXT: vmovaps %zmm2, %zmm0 3174; CHECK-NEXT: retq 3175 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3176 ret <16 x float> %res 3177} 3178 3179define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3180; CHECK-LABEL: test_mm512_max_round_ps_sae: 3181; CHECK: ## BB#0: 3182; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 3183; CHECK-NEXT: retq 3184 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3185 ret <16 x float> %res 3186} 3187 3188define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3189; CHECK-LABEL: test_mm512_max_round_ps_current: 3190; CHECK: ## BB#0: 3191; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 3192; CHECK-NEXT: retq 3193 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3194 ret <16 x float> %res 3195} 3196declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3197 3198declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3199 3200define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3201; CHECK-LABEL: test_mask_add_ss_rn: 3202; CHECK: ## BB#0: 3203; CHECK-NEXT: andl $1, %edi 3204; CHECK-NEXT: kmovw %edi, %k1 3205; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3206; CHECK-NEXT: vmovaps %zmm2, %zmm0 3207; CHECK-NEXT: retq 3208 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) 3209 ret <4 x float> %res 3210} 3211 3212define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3213; CHECK-LABEL: test_mask_add_ss_rd: 3214; CHECK: ## BB#0: 3215; CHECK-NEXT: andl $1, %edi 3216; CHECK-NEXT: kmovw %edi, %k1 3217; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3218; CHECK-NEXT: vmovaps %zmm2, %zmm0 3219; CHECK-NEXT: retq 3220 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 3221 ret <4 x float> %res 3222} 3223 3224define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3225; CHECK-LABEL: test_mask_add_ss_ru: 3226; CHECK: ## BB#0: 3227; CHECK-NEXT: andl $1, %edi 3228; CHECK-NEXT: kmovw %edi, %k1 3229; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3230; CHECK-NEXT: vmovaps %zmm2, %zmm0 3231; CHECK-NEXT: retq 3232 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) 3233 ret <4 x float> %res 3234} 3235 3236define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3237; CHECK-LABEL: test_mask_add_ss_rz: 3238; CHECK: ## BB#0: 3239; CHECK-NEXT: andl $1, %edi 3240; CHECK-NEXT: kmovw %edi, %k1 3241; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3242; CHECK-NEXT: vmovaps %zmm2, %zmm0 3243; CHECK-NEXT: retq 3244 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) 3245 ret <4 x float> %res 3246} 3247 3248define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3249; CHECK-LABEL: test_mask_add_ss_current: 3250; CHECK: ## BB#0: 3251; CHECK-NEXT: andl $1, %edi 3252; CHECK-NEXT: kmovw %edi, %k1 3253; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 3254; CHECK-NEXT: vmovaps %zmm2, %zmm0 3255; CHECK-NEXT: retq 3256 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3257 ret <4 x float> %res 3258} 3259 3260define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3261; CHECK-LABEL: test_maskz_add_ss_rn: 3262; CHECK: ## BB#0: 3263; CHECK-NEXT: andl $1, %edi 3264; CHECK-NEXT: kmovw %edi, %k1 3265; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3266; CHECK-NEXT: retq 3267 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0) 3268 ret <4 x float> %res 3269} 3270 3271define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { 3272; CHECK-LABEL: test_add_ss_rn: 3273; CHECK: ## BB#0: 3274; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 3275; CHECK-NEXT: retq 3276 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0) 3277 ret <4 x float> %res 3278} 3279 3280declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 3281 3282define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3283; CHECK-LABEL: test_mask_add_sd_rn: 3284; CHECK: ## BB#0: 3285; CHECK-NEXT: andl $1, %edi 3286; CHECK-NEXT: kmovw %edi, %k1 3287; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3288; CHECK-NEXT: vmovaps %zmm2, %zmm0 3289; CHECK-NEXT: retq 3290 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0) 3291 ret <2 x double> %res 3292} 3293 3294define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3295; CHECK-LABEL: test_mask_add_sd_rd: 3296; CHECK: ## BB#0: 3297; CHECK-NEXT: andl $1, %edi 3298; CHECK-NEXT: kmovw %edi, %k1 3299; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3300; CHECK-NEXT: vmovaps %zmm2, %zmm0 3301; CHECK-NEXT: retq 3302 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 3303 ret <2 x double> %res 3304} 3305 3306define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3307; CHECK-LABEL: test_mask_add_sd_ru: 3308; CHECK: ## BB#0: 3309; CHECK-NEXT: andl $1, %edi 3310; CHECK-NEXT: kmovw %edi, %k1 3311; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3312; CHECK-NEXT: vmovaps %zmm2, %zmm0 3313; CHECK-NEXT: retq 3314 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2) 3315 ret <2 x double> %res 3316} 3317 3318define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3319; CHECK-LABEL: test_mask_add_sd_rz: 3320; CHECK: ## BB#0: 3321; CHECK-NEXT: andl $1, %edi 3322; CHECK-NEXT: kmovw %edi, %k1 3323; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3324; CHECK-NEXT: vmovaps %zmm2, %zmm0 3325; CHECK-NEXT: retq 3326 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3) 3327 ret <2 x double> %res 3328} 3329 3330define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3331; CHECK-LABEL: test_mask_add_sd_current: 3332; CHECK: ## BB#0: 3333; CHECK-NEXT: andl $1, %edi 3334; CHECK-NEXT: kmovw %edi, %k1 3335; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 3336; CHECK-NEXT: vmovaps %zmm2, %zmm0 3337; CHECK-NEXT: retq 3338 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 3339 ret <2 x double> %res 3340} 3341 3342define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 3343; CHECK-LABEL: test_maskz_add_sd_rn: 3344; CHECK: ## BB#0: 3345; CHECK-NEXT: andl $1, %edi 3346; CHECK-NEXT: kmovw %edi, %k1 3347; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3348; CHECK-NEXT: retq 3349 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0) 3350 ret <2 x double> %res 3351} 3352 3353define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) { 3354; CHECK-LABEL: test_add_sd_rn: 3355; CHECK: ## BB#0: 3356; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 3357; CHECK-NEXT: retq 3358 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0) 3359 ret <2 x double> %res 3360} 3361 3362declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3363 3364define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3365; CHECK-LABEL: test_mask_max_ss_sae: 3366; CHECK: ## BB#0: 3367; CHECK-NEXT: andl $1, %edi 3368; CHECK-NEXT: kmovw %edi, %k1 3369; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3370; CHECK-NEXT: vmovaps %zmm2, %zmm0 3371; CHECK-NEXT: retq 3372 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 3373 ret <4 x float> %res 3374} 3375 3376define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3377; CHECK-LABEL: test_maskz_max_ss_sae: 3378; CHECK: ## BB#0: 3379; CHECK-NEXT: andl $1, %edi 3380; CHECK-NEXT: kmovw %edi, %k1 3381; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3382; CHECK-NEXT: retq 3383 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 3384 ret <4 x float> %res 3385} 3386 3387define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) { 3388; CHECK-LABEL: test_max_ss_sae: 3389; CHECK: ## BB#0: 3390; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 3391; CHECK-NEXT: retq 3392 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 3393 ret <4 x float> %res 3394} 3395 3396define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3397; CHECK-LABEL: test_mask_max_ss: 3398; CHECK: ## BB#0: 3399; CHECK-NEXT: andl $1, %edi 3400; CHECK-NEXT: kmovw %edi, %k1 3401; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} 3402; CHECK-NEXT: vmovaps %zmm2, %zmm0 3403; CHECK-NEXT: retq 3404 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3405 ret <4 x float> %res 3406} 3407 3408define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3409; CHECK-LABEL: test_maskz_max_ss: 3410; CHECK: ## BB#0: 3411; CHECK-NEXT: andl $1, %edi 3412; CHECK-NEXT: kmovw %edi, %k1 3413; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z} 3414; CHECK-NEXT: retq 3415 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4) 3416 ret <4 x float> %res 3417} 3418 3419define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) { 3420; CHECK-LABEL: test_max_ss: 3421; CHECK: ## BB#0: 3422; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 3423; CHECK-NEXT: retq 3424 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4) 3425 ret <4 x float> %res 3426} 3427declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 3428 3429define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3430; CHECK-LABEL: test_mask_max_sd_sae: 3431; CHECK: ## BB#0: 3432; CHECK-NEXT: andl $1, %edi 3433; CHECK-NEXT: kmovw %edi, %k1 3434; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3435; CHECK-NEXT: vmovaps %zmm2, %zmm0 3436; CHECK-NEXT: retq 3437 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 3438 ret <2 x double> %res 3439} 3440 3441define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 3442; CHECK-LABEL: test_maskz_max_sd_sae: 3443; CHECK: ## BB#0: 3444; CHECK-NEXT: andl $1, %edi 3445; CHECK-NEXT: kmovw %edi, %k1 3446; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3447; CHECK-NEXT: retq 3448 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 3449 ret <2 x double> %res 3450} 3451 3452define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) { 3453; CHECK-LABEL: test_max_sd_sae: 3454; CHECK: ## BB#0: 3455; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 3456; CHECK-NEXT: retq 3457 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) 3458 ret <2 x double> %res 3459} 3460 3461define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3462; CHECK-LABEL: test_mask_max_sd: 3463; CHECK: ## BB#0: 3464; CHECK-NEXT: andl $1, %edi 3465; CHECK-NEXT: kmovw %edi, %k1 3466; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} 3467; CHECK-NEXT: vmovaps %zmm2, %zmm0 3468; CHECK-NEXT: retq 3469 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 3470 ret <2 x double> %res 3471} 3472 3473define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 3474; CHECK-LABEL: test_maskz_max_sd: 3475; CHECK: ## BB#0: 3476; CHECK-NEXT: andl $1, %edi 3477; CHECK-NEXT: kmovw %edi, %k1 3478; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} 3479; CHECK-NEXT: retq 3480 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4) 3481 ret <2 x double> %res 3482} 3483 3484define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { 3485; CHECK-LABEL: test_max_sd: 3486; CHECK: ## BB#0: 3487; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 3488; CHECK-NEXT: retq 3489 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 3490 ret <2 x double> %res 3491} 3492 3493define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { 3494; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: 3495; CHECK: ## BB#0: 3496; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 3497; CHECK-NEXT: retq 3498 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] 3499 ret <2 x double> %res 3500} 3501declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone 3502 3503define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { 3504; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: 3505; CHECK: ## BB#0: 3506; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 3507; CHECK-NEXT: retq 3508 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] 3509 ret <2 x double> %res 3510} 3511declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone 3512 3513define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { 3514; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: 3515; CHECK: ## BB#0: 3516; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 3517; CHECK-NEXT: retq 3518 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] 3519 ret <4 x float> %res 3520} 3521declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone 3522 3523define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { 3524; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: 3525; CHECK: ## BB#0: 3526; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 3527; CHECK-NEXT: retq 3528 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] 3529 ret <4 x float> %res 3530} 3531declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone 3532 3533define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) 3534; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: 3535; CHECK: ## BB#0: 3536; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 3537; CHECK-NEXT: retq 3538{ 3539 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 3540 ret <4 x float> %res 3541} 3542 3543define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) 3544; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: 3545; CHECK: ## BB#0: 3546; CHECK-NEXT: movl (%rdi), %eax 3547; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 3548; CHECK-NEXT: retq 3549{ 3550 %b = load i32, i32* %ptr 3551 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 3552 ret <4 x float> %res 3553} 3554 3555define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) 3556; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: 3557; CHECK: ## BB#0: 3558; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 3559; CHECK-NEXT: retq 3560{ 3561 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 3562 ret <4 x float> %res 3563} 3564 3565define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) 3566; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: 3567; CHECK: ## BB#0: 3568; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 3569; CHECK-NEXT: retq 3570{ 3571 %b = load i32, i32* %ptr 3572 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 3573 ret <4 x float> %res 3574} 3575declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone 3576 3577define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) 3578; CHECK-LABEL: _mm_cvt_roundu64_ss: 3579; CHECK: ## BB#0: 3580; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 3581; CHECK-NEXT: retq 3582{ 3583 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] 3584 ret <4 x float> %res 3585} 3586 3587define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) 3588; CHECK-LABEL: _mm_cvtu64_ss: 3589; CHECK: ## BB#0: 3590; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 3591; CHECK-NEXT: retq 3592{ 3593 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] 3594 ret <4 x float> %res 3595} 3596declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone 3597 3598define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) 3599; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: 3600; CHECK: ## BB#0: 3601; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 3602; CHECK-NEXT: retq 3603{ 3604 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] 3605 ret <2 x double> %res 3606} 3607declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone 3608 3609define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) 3610; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: 3611; CHECK: ## BB#0: 3612; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 3613; CHECK-NEXT: retq 3614{ 3615 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] 3616 ret <2 x double> %res 3617} 3618 3619define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) 3620; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: 3621; CHECK: ## BB#0: 3622; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 3623; CHECK-NEXT: retq 3624{ 3625 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] 3626 ret <2 x double> %res 3627} 3628declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone 3629 3630define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { 3631; CHECK-LABEL: test_vpmaxq: 3632; CHECK: ## BB#0: 3633; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 3634; CHECK-NEXT: retq 3635 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, 3636 <8 x i64>zeroinitializer, i8 -1) 3637 ret <8 x i64> %res 3638} 3639declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3640 3641define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { 3642; CHECK-LABEL: test_vpminud: 3643; CHECK: ## BB#0: 3644; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 3645; CHECK-NEXT: retq 3646 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, 3647 <16 x i32>zeroinitializer, i16 -1) 3648 ret <16 x i32> %res 3649} 3650declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3651 3652define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { 3653; CHECK-LABEL: test_vpmaxsd: 3654; CHECK: ## BB#0: 3655; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 3656; CHECK-NEXT: retq 3657 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, 3658 <16 x i32>zeroinitializer, i16 -1) 3659 ret <16 x i32> %res 3660} 3661declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3662 3663define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3664; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512: 3665; CHECK: ## BB#0: 3666; CHECK-NEXT: kmovw %edi, %k1 3667; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} 3668; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 3669; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3670; CHECK-NEXT: retq 3671 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3672 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3673 %res2 = add <16 x i32> %res, %res1 3674 ret <16 x i32> %res2 3675} 3676 3677define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3678; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512: 3679; CHECK: ## BB#0: 3680; CHECK-NEXT: kmovw %edi, %k1 3681; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} 3682; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 3683; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3684; CHECK-NEXT: retq 3685 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3686 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3687 %res2 = add <8 x i64> %res, %res1 3688 ret <8 x i64> %res2 3689} 3690 3691declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3692 3693define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3694; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512: 3695; CHECK: ## BB#0: 3696; CHECK-NEXT: kmovw %edi, %k1 3697; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} 3698; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 3699; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3700; CHECK-NEXT: retq 3701 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3702 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3703 %res2 = add <16 x i32> %res, %res1 3704 ret <16 x i32> %res2 3705} 3706 3707declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3708 3709define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3710; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512: 3711; CHECK: ## BB#0: 3712; CHECK-NEXT: kmovw %edi, %k1 3713; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} 3714; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 3715; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3716; CHECK-NEXT: retq 3717 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3718 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3719 %res2 = add <8 x i64> %res, %res1 3720 ret <8 x i64> %res2 3721} 3722 3723declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3724 3725define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3726; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512: 3727; CHECK: ## BB#0: 3728; CHECK-NEXT: kmovw %edi, %k1 3729; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} 3730; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0 3731; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3732; CHECK-NEXT: retq 3733 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3734 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3735 %res2 = add <16 x i32> %res, %res1 3736 ret <16 x i32> %res2 3737} 3738 3739declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3740 3741define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3742; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512: 3743; CHECK: ## BB#0: 3744; CHECK-NEXT: kmovw %edi, %k1 3745; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} 3746; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0 3747; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3748; CHECK-NEXT: retq 3749 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3750 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3751 %res2 = add <8 x i64> %res, %res1 3752 ret <8 x i64> %res2 3753} 3754 3755define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3756; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512: 3757; CHECK: ## BB#0: 3758; CHECK-NEXT: kmovw %edi, %k1 3759; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} 3760; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 3761; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 3762; CHECK-NEXT: retq 3763 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3764 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3765 %res2 = add <16 x i32> %res, %res1 3766 ret <16 x i32> %res2 3767} 3768 3769declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3770 3771define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3772; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512: 3773; CHECK: ## BB#0: 3774; CHECK-NEXT: kmovw %edi, %k1 3775; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} 3776; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0 3777; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 3778; CHECK-NEXT: retq 3779 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3780 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3781 %res2 = add <8 x i64> %res, %res1 3782 ret <8 x i64> %res2 3783} 3784 3785declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3786 3787define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 3788; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 3789; CHECK: ## BB#0: 3790; CHECK-NEXT: kmovw %esi, %k1 3791; CHECK-NEXT: vmovaps %zmm1, %zmm3 3792; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} 3793; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1 3794; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 3795; CHECK-NEXT: retq 3796 %x2 = load <16 x i32>, <16 x i32>* %x2p 3797 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3798 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 3799 %res2 = add <16 x i32> %res, %res1 3800 ret <16 x i32> %res2 3801} 3802 3803declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 3804 3805define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 3806; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 3807; CHECK: ## BB#0: 3808; CHECK-NEXT: kmovw %edi, %k1 3809; CHECK-NEXT: vmovaps %zmm1, %zmm3 3810; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1} 3811; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 3812; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 3813; CHECK-NEXT: retq 3814 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 3815 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 3816 %res2 = fadd <8 x double> %res, %res1 3817 ret <8 x double> %res2 3818} 3819 3820declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 3821 3822define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 3823; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 3824; CHECK: ## BB#0: 3825; CHECK-NEXT: kmovw %edi, %k1 3826; CHECK-NEXT: vmovaps %zmm1, %zmm3 3827; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1} 3828; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 3829; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 3830; CHECK-NEXT: retq 3831 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 3832 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 3833 %res2 = fadd <16 x float> %res, %res1 3834 ret <16 x float> %res2 3835} 3836 3837declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3838 3839define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3840; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 3841; CHECK: ## BB#0: 3842; CHECK-NEXT: kmovw %edi, %k1 3843; CHECK-NEXT: vmovaps %zmm1, %zmm3 3844; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1} 3845; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 3846; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 3847; CHECK-NEXT: retq 3848 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3849 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3850 %res2 = add <8 x i64> %res, %res1 3851 ret <8 x i64> %res2 3852} 3853 3854declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3855 3856define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 3857; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 3858; CHECK: ## BB#0: 3859; CHECK-NEXT: kmovw %esi, %k1 3860; CHECK-NEXT: vmovaps %zmm1, %zmm2 3861; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} 3862; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 3863; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0 3864; CHECK-NEXT: retq 3865 %x2 = load <16 x i32>, <16 x i32>* %x2p 3866 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3867 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1) 3868 %res2 = add <16 x i32> %res, %res1 3869 ret <16 x i32> %res2 3870} 3871 3872declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 3873 3874define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 3875; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 3876; CHECK: ## BB#0: 3877; CHECK-NEXT: kmovw %esi, %k1 3878; CHECK-NEXT: vmovaps %zmm1, %zmm2 3879; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} 3880; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 3881; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0 3882; CHECK-NEXT: retq 3883 %x2s = load double, double* %x2ptr 3884 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 3885 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 3886 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 3887 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1) 3888 %res2 = fadd <8 x double> %res, %res1 3889 ret <8 x double> %res2 3890} 3891 3892declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 3893 3894define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 3895; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 3896; CHECK: ## BB#0: 3897; CHECK-NEXT: kmovw %edi, %k1 3898; CHECK-NEXT: vmovaps %zmm1, %zmm3 3899; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z} 3900; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 3901; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 3902; CHECK-NEXT: retq 3903 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 3904 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 3905 %res2 = fadd <16 x float> %res, %res1 3906 ret <16 x float> %res2 3907} 3908 3909 3910declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3911 3912define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3913; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 3914; CHECK: ## BB#0: 3915; CHECK-NEXT: kmovw %edi, %k1 3916; CHECK-NEXT: vmovaps %zmm1, %zmm3 3917; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z} 3918; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 3919; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 3920; CHECK-NEXT: retq 3921 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3922 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3923 %res2 = add <8 x i64> %res, %res1 3924 ret <8 x i64> %res2 3925} 3926 3927declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3928 3929define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3930; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 3931; CHECK: ## BB#0: 3932; CHECK-NEXT: kmovw %edi, %k1 3933; CHECK-NEXT: vmovaps %zmm1, %zmm3 3934; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1} 3935; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 3936; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 3937; CHECK-NEXT: retq 3938 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3939 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3940 %res2 = add <16 x i32> %res, %res1 3941 ret <16 x i32> %res2 3942} 3943 3944declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 3945define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 3946; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512: 3947; CHECK: ## BB#0: 3948; CHECK-NEXT: kmovw %edi, %k1 3949; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3950; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0 3951; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 3952; CHECK-NEXT: retq 3953 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) 3954 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 3955 %res2 = fadd <8 x double> %res, %res1 3956 ret <8 x double> %res2 3957} 3958 3959declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3960define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 3961; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512: 3962; CHECK: ## BB#0: 3963; CHECK-NEXT: kmovw %edi, %k1 3964; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3965; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0 3966; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 3967; CHECK-NEXT: retq 3968 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) 3969 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 3970 %res2 = fadd <16 x float> %res, %res1 3971 ret <16 x float> %res2 3972} 3973 3974declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) 3975 3976define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3977; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: 3978; CHECK: ## BB#0: 3979; CHECK-NEXT: kmovw %edi, %k1 3980; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} 3981; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} 3982; CHECK-NEXT: vpmovqb %zmm0, %xmm0 3983; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 3984; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 3985; CHECK-NEXT: retq 3986 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 3987 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 3988 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3989 %res3 = add <16 x i8> %res0, %res1 3990 %res4 = add <16 x i8> %res3, %res2 3991 ret <16 x i8> %res4 3992} 3993 3994declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) 3995 3996define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 3997; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: 3998; CHECK: ## BB#0: 3999; CHECK-NEXT: kmovw %esi, %k1 4000; CHECK-NEXT: vpmovqb %zmm0, (%rdi) 4001; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1} 4002; CHECK-NEXT: retq 4003 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4004 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4005 ret void 4006} 4007 4008declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) 4009 4010define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4011; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: 4012; CHECK: ## BB#0: 4013; CHECK-NEXT: kmovw %edi, %k1 4014; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} 4015; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} 4016; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 4017; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4018; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4019; CHECK-NEXT: retq 4020 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4021 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4022 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4023 %res3 = add <16 x i8> %res0, %res1 4024 %res4 = add <16 x i8> %res3, %res2 4025 ret <16 x i8> %res4 4026} 4027 4028declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4029 4030define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4031; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: 4032; CHECK: ## BB#0: 4033; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) 4034; CHECK-NEXT: kmovw %esi, %k1 4035; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} 4036; CHECK-NEXT: retq 4037 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4038 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4039 ret void 4040} 4041 4042declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) 4043 4044define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4045; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: 4046; CHECK: ## BB#0: 4047; CHECK-NEXT: kmovw %edi, %k1 4048; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} 4049; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} 4050; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 4051; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4052; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4053; CHECK-NEXT: retq 4054 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4055 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4056 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4057 %res3 = add <16 x i8> %res0, %res1 4058 %res4 = add <16 x i8> %res3, %res2 4059 ret <16 x i8> %res4 4060} 4061 4062declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4063 4064define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4065; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: 4066; CHECK: ## BB#0: 4067; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) 4068; CHECK-NEXT: kmovw %esi, %k1 4069; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} 4070; CHECK-NEXT: retq 4071 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4072 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4073 ret void 4074} 4075 4076declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) 4077 4078define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4079; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: 4080; CHECK: ## BB#0: 4081; CHECK-NEXT: kmovw %edi, %k1 4082; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} 4083; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} 4084; CHECK-NEXT: vpmovqw %zmm0, %xmm0 4085; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4086; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4087; CHECK-NEXT: retq 4088 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4089 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4090 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4091 %res3 = add <8 x i16> %res0, %res1 4092 %res4 = add <8 x i16> %res3, %res2 4093 ret <8 x i16> %res4 4094} 4095 4096declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4097 4098define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4099; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: 4100; CHECK: ## BB#0: 4101; CHECK-NEXT: kmovw %esi, %k1 4102; CHECK-NEXT: vpmovqw %zmm0, (%rdi) 4103; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1} 4104; CHECK-NEXT: retq 4105 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4106 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4107 ret void 4108} 4109 4110declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) 4111 4112define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4113; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: 4114; CHECK: ## BB#0: 4115; CHECK-NEXT: kmovw %edi, %k1 4116; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} 4117; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} 4118; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 4119; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4120; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4121; CHECK-NEXT: retq 4122 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4123 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4124 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4125 %res3 = add <8 x i16> %res0, %res1 4126 %res4 = add <8 x i16> %res3, %res2 4127 ret <8 x i16> %res4 4128} 4129 4130declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4131 4132define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4133; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: 4134; CHECK: ## BB#0: 4135; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) 4136; CHECK-NEXT: kmovw %esi, %k1 4137; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} 4138; CHECK-NEXT: retq 4139 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4140 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4141 ret void 4142} 4143 4144declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) 4145 4146define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4147; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: 4148; CHECK: ## BB#0: 4149; CHECK-NEXT: kmovw %edi, %k1 4150; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} 4151; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} 4152; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 4153; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4154; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4155; CHECK-NEXT: retq 4156 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4157 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4158 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4159 %res3 = add <8 x i16> %res0, %res1 4160 %res4 = add <8 x i16> %res3, %res2 4161 ret <8 x i16> %res4 4162} 4163 4164declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4165 4166define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4167; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: 4168; CHECK: ## BB#0: 4169; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) 4170; CHECK-NEXT: kmovw %esi, %k1 4171; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} 4172; CHECK-NEXT: retq 4173 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4174 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4175 ret void 4176} 4177 4178declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) 4179 4180define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4181; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: 4182; CHECK: ## BB#0: 4183; CHECK-NEXT: kmovw %edi, %k1 4184; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} 4185; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} 4186; CHECK-NEXT: vpmovqd %zmm0, %ymm0 4187; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4188; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4189; CHECK-NEXT: retq 4190 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4191 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4192 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4193 %res3 = add <8 x i32> %res0, %res1 4194 %res4 = add <8 x i32> %res3, %res2 4195 ret <8 x i32> %res4 4196} 4197 4198declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4199 4200define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4201; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: 4202; CHECK: ## BB#0: 4203; CHECK-NEXT: kmovw %esi, %k1 4204; CHECK-NEXT: vpmovqd %zmm0, (%rdi) 4205; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1} 4206; CHECK-NEXT: retq 4207 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4208 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4209 ret void 4210} 4211 4212declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) 4213 4214define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4215; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: 4216; CHECK: ## BB#0: 4217; CHECK-NEXT: kmovw %edi, %k1 4218; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} 4219; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} 4220; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 4221; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4222; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4223; CHECK-NEXT: retq 4224 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4225 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4226 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4227 %res3 = add <8 x i32> %res0, %res1 4228 %res4 = add <8 x i32> %res3, %res2 4229 ret <8 x i32> %res4 4230} 4231 4232declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4233 4234define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4235; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: 4236; CHECK: ## BB#0: 4237; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) 4238; CHECK-NEXT: kmovw %esi, %k1 4239; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} 4240; CHECK-NEXT: retq 4241 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4242 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4243 ret void 4244} 4245 4246declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) 4247 4248define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4249; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: 4250; CHECK: ## BB#0: 4251; CHECK-NEXT: kmovw %edi, %k1 4252; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} 4253; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} 4254; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 4255; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4256; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4257; CHECK-NEXT: retq 4258 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4259 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4260 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4261 %res3 = add <8 x i32> %res0, %res1 4262 %res4 = add <8 x i32> %res3, %res2 4263 ret <8 x i32> %res4 4264} 4265 4266declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4267 4268define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4269; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: 4270; CHECK: ## BB#0: 4271; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) 4272; CHECK-NEXT: kmovw %esi, %k1 4273; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} 4274; CHECK-NEXT: retq 4275 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4276 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4277 ret void 4278} 4279 4280declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) 4281 4282define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 4283; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: 4284; CHECK: ## BB#0: 4285; CHECK-NEXT: kmovw %edi, %k1 4286; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} 4287; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} 4288; CHECK-NEXT: vpmovdb %zmm0, %xmm0 4289; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4290; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4291; CHECK-NEXT: retq 4292 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 4293 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 4294 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 4295 %res3 = add <16 x i8> %res0, %res1 4296 %res4 = add <16 x i8> %res3, %res2 4297 ret <16 x i8> %res4 4298} 4299 4300declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) 4301 4302define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4303; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: 4304; CHECK: ## BB#0: 4305; CHECK-NEXT: kmovw %esi, %k1 4306; CHECK-NEXT: vpmovdb %zmm0, (%rdi) 4307; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1} 4308; CHECK-NEXT: retq 4309 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4310 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4311 ret void 4312} 4313 4314declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) 4315 4316define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 4317; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: 4318; CHECK: ## BB#0: 4319; CHECK-NEXT: kmovw %edi, %k1 4320; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} 4321; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} 4322; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 4323; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4324; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4325; CHECK-NEXT: retq 4326 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 4327 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 4328 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 4329 %res3 = add <16 x i8> %res0, %res1 4330 %res4 = add <16 x i8> %res3, %res2 4331 ret <16 x i8> %res4 4332} 4333 4334declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) 4335 4336define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4337; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: 4338; CHECK: ## BB#0: 4339; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) 4340; CHECK-NEXT: kmovw %esi, %k1 4341; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} 4342; CHECK-NEXT: retq 4343 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4344 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4345 ret void 4346} 4347 4348declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) 4349 4350define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 4351; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: 4352; CHECK: ## BB#0: 4353; CHECK-NEXT: kmovw %edi, %k1 4354; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} 4355; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} 4356; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 4357; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4358; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4359; CHECK-NEXT: retq 4360 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 4361 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 4362 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 4363 %res3 = add <16 x i8> %res0, %res1 4364 %res4 = add <16 x i8> %res3, %res2 4365 ret <16 x i8> %res4 4366} 4367 4368declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) 4369 4370define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4371; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: 4372; CHECK: ## BB#0: 4373; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) 4374; CHECK-NEXT: kmovw %esi, %k1 4375; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} 4376; CHECK-NEXT: retq 4377 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4378 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4379 ret void 4380} 4381 4382declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) 4383 4384define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 4385; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: 4386; CHECK: ## BB#0: 4387; CHECK-NEXT: kmovw %edi, %k1 4388; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} 4389; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} 4390; CHECK-NEXT: vpmovdw %zmm0, %ymm0 4391; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 4392; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 4393; CHECK-NEXT: retq 4394 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 4395 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 4396 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 4397 %res3 = add <16 x i16> %res0, %res1 4398 %res4 = add <16 x i16> %res3, %res2 4399 ret <16 x i16> %res4 4400} 4401 4402declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) 4403 4404define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4405; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: 4406; CHECK: ## BB#0: 4407; CHECK-NEXT: kmovw %esi, %k1 4408; CHECK-NEXT: vpmovdw %zmm0, (%rdi) 4409; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1} 4410; CHECK-NEXT: retq 4411 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4412 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4413 ret void 4414} 4415 4416declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) 4417 4418define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 4419; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: 4420; CHECK: ## BB#0: 4421; CHECK-NEXT: kmovw %edi, %k1 4422; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} 4423; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} 4424; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 4425; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 4426; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 4427; CHECK-NEXT: retq 4428 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 4429 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 4430 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 4431 %res3 = add <16 x i16> %res0, %res1 4432 %res4 = add <16 x i16> %res3, %res2 4433 ret <16 x i16> %res4 4434} 4435 4436declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) 4437 4438define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4439; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: 4440; CHECK: ## BB#0: 4441; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) 4442; CHECK-NEXT: kmovw %esi, %k1 4443; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} 4444; CHECK-NEXT: retq 4445 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4446 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4447 ret void 4448} 4449 4450declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) 4451 4452define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 4453; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: 4454; CHECK: ## BB#0: 4455; CHECK-NEXT: kmovw %edi, %k1 4456; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} 4457; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} 4458; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 4459; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 4460; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 4461; CHECK-NEXT: retq 4462 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 4463 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 4464 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 4465 %res3 = add <16 x i16> %res0, %res1 4466 %res4 = add <16 x i16> %res3, %res2 4467 ret <16 x i16> %res4 4468} 4469 4470declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) 4471 4472define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 4473; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: 4474; CHECK: ## BB#0: 4475; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) 4476; CHECK-NEXT: kmovw %esi, %k1 4477; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} 4478; CHECK-NEXT: retq 4479 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 4480 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 4481 ret void 4482} 4483 4484declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 4485 4486define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 4487; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: 4488; CHECK: ## BB#0: 4489; CHECK-NEXT: kmovw %edi, %k1 4490; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} 4491; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 4492; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 4493; CHECK-NEXT: retq 4494 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 4495 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 4496 %res2 = fadd <8 x double> %res, %res1 4497 ret <8 x double> %res2 4498} 4499 4500declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 4501 4502define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 4503; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: 4504; CHECK: ## BB#0: 4505; CHECK-NEXT: kmovw %edi, %k1 4506; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} 4507; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 4508; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 4509; CHECK-NEXT: retq 4510 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 4511 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 4512 %res2 = fadd <16 x float> %res, %res1 4513 ret <16 x float> %res2 4514} 4515 4516declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 4517 4518define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 4519; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512: 4520; CHECK: ## BB#0: 4521; CHECK-NEXT: kmovw %edi, %k1 4522; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1} 4523; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0 4524; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4525; CHECK-NEXT: retq 4526 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 4527 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 4528 %res2 = add <8 x i32> %res, %res1 4529 ret <8 x i32> %res2 4530} 4531 4532declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 4533 4534define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) { 4535; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512: 4536; CHECK: ## BB#0: 4537; CHECK-NEXT: kmovw %edi, %k1 4538; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1} 4539; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0 4540; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 4541; CHECK-NEXT: retq 4542 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4) 4543 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2) 4544 %res2 = fadd <8 x float> %res, %res1 4545 ret <8 x float> %res2 4546} 4547 4548declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 4549 4550define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 4551; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512: 4552; CHECK: ## BB#0: 4553; CHECK-NEXT: kmovw %edi, %k1 4554; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1} 4555; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0 4556; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4557; CHECK-NEXT: retq 4558 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2) 4559 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 4560 %res2 = add <8 x i32> %res, %res1 4561 ret <8 x i32> %res2 4562} 4563 4564declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32) 4565 4566define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 4567; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512: 4568; CHECK: ## BB#0: 4569; CHECK-NEXT: kmovw %edi, %k1 4570; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1} 4571; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0 4572; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 4573; CHECK-NEXT: retq 4574 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 4575 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 4576 %res2 = add <16 x i32> %res, %res1 4577 ret <16 x i32> %res2 4578} 4579 4580declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32) 4581 4582define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) { 4583; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512: 4584; CHECK: ## BB#0: 4585; CHECK-NEXT: kmovw %edi, %k1 4586; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1} 4587; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0 4588; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 4589; CHECK-NEXT: retq 4590 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4) 4591 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8) 4592 %res2 = fadd <8 x double> %res, %res1 4593 ret <8 x double> %res2 4594} 4595 4596declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 4597 4598define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 4599; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512: 4600; CHECK: ## BB#0: 4601; CHECK-NEXT: kmovw %edi, %k1 4602; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1} 4603; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0 4604; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 4605; CHECK-NEXT: retq 4606 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 4607 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 4608 %res2 = add <16 x i32> %res, %res1 4609 ret <16 x i32> %res2 4610} 4611 4612declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 4613 4614define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 4615; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: 4616; CHECK: ## BB#0: 4617; CHECK-NEXT: kmovw %edi, %k1 4618; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} 4619; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 4620; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4621; CHECK-NEXT: retq 4622 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 4623 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 4624 %res2 = add <8 x i32> %res, %res1 4625 ret <8 x i32> %res2 4626} 4627 4628declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 4629 4630define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 4631; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: 4632; CHECK: ## BB#0: 4633; CHECK-NEXT: kmovw %edi, %k1 4634; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} 4635; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 4636; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 4637; CHECK-NEXT: retq 4638 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 4639 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 4640 %res2 = fadd <8 x double> %res, %res1 4641 ret <8 x double> %res2 4642} 4643 4644 4645declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 4646 4647define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 4648; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: 4649; CHECK: ## BB#0: 4650; CHECK-NEXT: kmovw %edi, %k1 4651; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} 4652; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 4653; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 4654; CHECK-NEXT: retq 4655 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 4656 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 4657 %res2 = fadd <16 x float> %res, %res1 4658 ret <16 x float> %res2 4659} 4660 4661declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 4662 4663define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 4664; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: 4665; CHECK: ## BB#0: 4666; CHECK-NEXT: kmovw %edi, %k1 4667; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} 4668; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 4669; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4670; CHECK-NEXT: retq 4671 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 4672 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 4673 %res2 = add <8 x i32> %res, %res1 4674 ret <8 x i32> %res2 4675} 4676 4677declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 4678 4679define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 4680; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: 4681; CHECK: ## BB#0: 4682; CHECK-NEXT: kmovw %edi, %k1 4683; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} 4684; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 4685; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 4686; CHECK-NEXT: retq 4687 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 4688 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 4689 %res2 = add <16 x i32> %res, %res1 4690 ret <16 x i32> %res2 4691} 4692 4693declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 4694 4695define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 4696; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: 4697; CHECK: ## BB#0: 4698; CHECK-NEXT: kmovw %edi, %k1 4699; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} 4700; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 4701; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 4702; CHECK-NEXT: retq 4703 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 4704 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 4705 %res2 = add <16 x i32> %res, %res1 4706 ret <16 x i32> %res2 4707} 4708 4709declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 4710 4711define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 4712; CHECK-LABEL: test_getexp_ss: 4713; CHECK: ## BB#0: 4714; CHECK-NEXT: andl $1, %edi 4715; CHECK-NEXT: kmovw %edi, %k1 4716; CHECK-NEXT: vmovaps %zmm2, %zmm3 4717; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} 4718; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 4719; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 4720; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0 4721; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 4722; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 4723; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4724; CHECK-NEXT: retq 4725 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 4726 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 4727 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 4728 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 4729 4730 %res.1 = fadd <4 x float> %res0, %res1 4731 %res.2 = fadd <4 x float> %res2, %res3 4732 %res = fadd <4 x float> %res.1, %res.2 4733 ret <4 x float> %res 4734} 4735 4736declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 4737 4738define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 4739; CHECK-LABEL: test_getexp_sd: 4740; CHECK: ## BB#0: 4741; CHECK-NEXT: andl $1, %edi 4742; CHECK-NEXT: kmovw %edi, %k1 4743; CHECK-NEXT: vmovaps %zmm2, %zmm3 4744; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} 4745; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 4746; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 4747; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 4748; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 4749; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 4750; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4751; CHECK-NEXT: retq 4752 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 4753 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 4754 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 4755 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 4756 4757 %res.1 = fadd <2 x double> %res0, %res1 4758 %res.2 = fadd <2 x double> %res2, %res3 4759 %res = fadd <2 x double> %res.1, %res.2 4760 ret <2 x double> %res 4761} 4762 4763declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) 4764 4765define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 4766; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: 4767; CHECK: ## BB#0: 4768; CHECK-NEXT: andl $1, %edi 4769; CHECK-NEXT: kmovw %edi, %k1 4770; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} 4771; CHECK-NEXT: kmovw %k0, %eax 4772; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 4773; CHECK-NEXT: retq 4774 4775 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 4776 ret i8 %res4 4777} 4778 4779define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 4780; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: 4781; CHECK: ## BB#0: 4782; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 4783; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1 4784; CHECK-NEXT: korw %k0, %k1, %k0 4785; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1 4786; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2 4787; CHECK-NEXT: korw %k1, %k2, %k1 4788; CHECK-NEXT: andl $1, %edi 4789; CHECK-NEXT: kmovw %edi, %k2 4790; CHECK-NEXT: kandw %k2, %k1, %k1 4791; CHECK-NEXT: korw %k1, %k0, %k0 4792; CHECK-NEXT: kmovw %k0, %eax 4793; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 4794; CHECK-NEXT: retq 4795 4796 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) 4797 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) 4798 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) 4799 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 4800 4801 %res11 = or i8 %res1, %res2 4802 %res12 = or i8 %res3, %res4 4803 %res13 = or i8 %res11, %res12 4804 ret i8 %res13 4805} 4806 4807declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) 4808 4809define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 4810; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: 4811; CHECK: ## BB#0: 4812; CHECK-NEXT: andl $1, %edi 4813; CHECK-NEXT: kmovw %edi, %k1 4814; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} 4815; CHECK-NEXT: kmovw %k0, %eax 4816; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 4817; CHECK-NEXT: retq 4818 4819 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) 4820 ret i8 %res2 4821} 4822 4823 4824define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 4825; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: 4826; CHECK: ## BB#0: 4827; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1 4828; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1} 4829; CHECK-NEXT: andl $1, %edi 4830; CHECK-NEXT: kmovw %edi, %k1 4831; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1} 4832; CHECK-NEXT: kmovw %k2, %ecx 4833; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1} 4834; CHECK-NEXT: kmovw %k1, %eax 4835; CHECK-NEXT: kmovw %k0, %edx 4836; CHECK-NEXT: andb %cl, %al 4837; CHECK-NEXT: andb %dl, %al 4838; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 4839; CHECK-NEXT: retq 4840 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) 4841 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) 4842 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) 4843 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) 4844 4845 %res11 = and i8 %res1, %res2 4846 %res12 = and i8 %res3, %res4 4847 %res13 = and i8 %res11, %res12 4848 ret i8 %res13 4849} 4850 4851declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) 4852 4853define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 4854; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4: 4855; CHECK: ## BB#0: 4856; CHECK-NEXT: kmovw %edi, %k1 4857; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 4858; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 4859; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4860; CHECK-NEXT: retq 4861 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 4862 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 4863 %res2 = fadd <16 x float> %res, %res1 4864 ret <16 x float> %res2 4865} 4866 4867declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8) 4868 4869define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 4870; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2: 4871; CHECK: ## BB#0: 4872; CHECK-NEXT: kmovw %edi, %k1 4873; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 4874; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1] 4875; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 4876; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4877; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 4878; CHECK-NEXT: retq 4879 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 4880 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 4881 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 4882 4883 %res3 = fadd <8 x double> %res, %res1 4884 %res4 = fadd <8 x double> %res3, %res2 4885 ret <8 x double> %res4 4886} 4887 4888declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 4889 4890define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 4891; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4: 4892; CHECK: ## BB#0: 4893; CHECK-NEXT: kmovw %edi, %k1 4894; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 4895; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 4896; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4897; CHECK-NEXT: retq 4898 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 4899 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 4900 %res2 = add <16 x i32> %res, %res1 4901 ret <16 x i32> %res2 4902} 4903 4904declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 4905 4906define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 4907; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2: 4908; CHECK: ## BB#0: 4909; CHECK-NEXT: kmovw %edi, %k1 4910; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 4911; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 4912; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4913; CHECK-NEXT: retq 4914 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 4915 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 4916 %res2 = add <8 x i64> %res, %res1 4917 ret <8 x i64> %res2 4918} 4919 4920declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 4921 4922define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 4923; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512: 4924; CHECK: ## BB#0: 4925; CHECK-NEXT: kmovw %edi, %k1 4926; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 4927; CHECK-NEXT: vgetmantpd $11, {sae}, %zmm0, %zmm0 4928; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 4929; CHECK-NEXT: retq 4930 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4) 4931 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8) 4932 %res2 = fadd <8 x double> %res, %res1 4933 ret <8 x double> %res2 4934} 4935 4936declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 4937 4938define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 4939; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512: 4940; CHECK: ## BB#0: 4941; CHECK-NEXT: kmovw %edi, %k1 4942; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 4943; CHECK-NEXT: vgetmantps $11, {sae}, %zmm0, %zmm0 4944; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 4945; CHECK-NEXT: retq 4946 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4) 4947 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8) 4948 %res2 = fadd <16 x float> %res, %res1 4949 ret <16 x float> %res2 4950} 4951 4952declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32) 4953 4954define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 4955; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd: 4956; CHECK: ## BB#0: 4957; CHECK-NEXT: andl $1, %edi 4958; CHECK-NEXT: kmovw %edi, %k1 4959; CHECK-NEXT: vmovaps %zmm2, %zmm3 4960; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} 4961; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} 4962; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 4963; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1} 4964; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 4965; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1 4966; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 4967; CHECK-NEXT: retq 4968 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) 4969 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4) 4970 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8) 4971 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4) 4972 %res11 = fadd <2 x double> %res, %res1 4973 %res12 = fadd <2 x double> %res2, %res3 4974 %res13 = fadd <2 x double> %res11, %res12 4975 ret <2 x double> %res13 4976} 4977 4978declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32) 4979 4980define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 4981; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss: 4982; CHECK: ## BB#0: 4983; CHECK-NEXT: andl $1, %edi 4984; CHECK-NEXT: kmovw %edi, %k1 4985; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1} 4986; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z} 4987; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 4988; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0 4989; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 4990; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 4991; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4992; CHECK-NEXT: retq 4993 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) 4994 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4) 4995 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8) 4996 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4) 4997 %res11 = fadd <4 x float> %res, %res1 4998 %res12 = fadd <4 x float> %res2, %res3 4999 %res13 = fadd <4 x float> %res11, %res12 5000 ret <4 x float> %res13 5001} 5002 5003declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8) 5004 5005define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 5006; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512: 5007; CHECK: ## BB#0: 5008; CHECK-NEXT: kmovw %edi, %k1 5009; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 5010; CHECK-NEXT: vshufpd {{.*#+}} zmm3 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 5011; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 5012; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5013; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 5014; CHECK-NEXT: retq 5015 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 5016 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 5017 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 5018 5019 %res3 = fadd <8 x double> %res, %res1 5020 %res4 = fadd <8 x double> %res3, %res2 5021 ret <8 x double> %res4 5022} 5023 5024declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16) 5025 5026define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 5027; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512: 5028; CHECK: ## BB#0: 5029; CHECK-NEXT: kmovw %edi, %k1 5030; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 5031; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 5032; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5033; CHECK-NEXT: retq 5034 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 5035 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 5036 %res2 = fadd <16 x float> %res, %res1 5037 ret <16 x float> %res2 5038} 5039 5040declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 5041 5042define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 5043; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: 5044; CHECK: ## BB#0: 5045; CHECK-NEXT: kmovw %edi, %k1 5046; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} 5047; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z} 5048; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 5049; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 5050; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 5051; CHECK-NEXT: retq 5052 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 5053 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 5054 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 5055 %res3 = fadd <8 x double> %res, %res1 5056 %res4 = fadd <8 x double> %res2, %res3 5057 ret <8 x double> %res4 5058} 5059 5060declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 5061 5062define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 5063; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: 5064; CHECK: ## BB#0: 5065; CHECK-NEXT: kmovw %edi, %k1 5066; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} 5067; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z} 5068; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 5069; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 5070; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 5071; CHECK-NEXT: retq 5072 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 5073 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 5074 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 5075 %res3 = fadd <16 x float> %res, %res1 5076 %res4 = fadd <16 x float> %res2, %res3 5077 ret <16 x float> %res4 5078} 5079 5080declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16) 5081 5082define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) { 5083; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512: 5084; CHECK: ## BB#0: 5085; CHECK-NEXT: kmovw %edi, %k1 5086; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5087; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5088; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 5089; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5090; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 5091; CHECK-NEXT: retq 5092 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 5093 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 5094 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 5095 %res3 = fadd <16 x float> %res, %res1 5096 %res4 = fadd <16 x float> %res2, %res3 5097 ret <16 x float> %res4 5098} 5099 5100declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16) 5101 5102define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) { 5103; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512: 5104; CHECK: ## BB#0: 5105; CHECK-NEXT: kmovw %edi, %k1 5106; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5107; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5108; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 5109; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5110; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 5111; CHECK-NEXT: retq 5112 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 5113 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 5114 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 5115 %res3 = add <16 x i32> %res, %res1 5116 %res4 = add <16 x i32> %res2, %res3 5117 ret <16 x i32> %res4 5118} 5119 5120declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) 5121 5122define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) { 5123; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512: 5124; CHECK: ## BB#0: 5125; CHECK-NEXT: kmovw %edi, %k1 5126; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5127; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5128; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 5129; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5130; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 5131; CHECK-NEXT: retq 5132 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 5133 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 5134 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 5135 %res3 = fadd <8 x double> %res, %res1 5136 %res4 = fadd <8 x double> %res2, %res3 5137 ret <8 x double> %res4 5138} 5139 5140declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) 5141 5142define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) { 5143; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512: 5144; CHECK: ## BB#0: 5145; CHECK-NEXT: kmovw %edi, %k1 5146; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5147; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5148; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 5149; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5150; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 5151; CHECK-NEXT: retq 5152 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 5153 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 5154 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 5155 %res3 = add <8 x i64> %res, %res1 5156 %res4 = add <8 x i64> %res2, %res3 5157 ret <8 x i64> %res4 5158} 5159 5160declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32) 5161 5162define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) { 5163; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round: 5164; CHECK: ## BB#0: 5165; CHECK-NEXT: andl $1, %edi 5166; CHECK-NEXT: kmovw %edi, %k1 5167; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} 5168; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0 5169; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 5170; CHECK-NEXT: retq 5171 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4) 5172 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8) 5173 %res2 = fadd <2 x double> %res, %res1 5174 ret <2 x double> %res2 5175} 5176 5177declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32) 5178 5179define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) { 5180; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round: 5181; CHECK: ## BB#0: 5182; CHECK-NEXT: andl $1, %edi 5183; CHECK-NEXT: kmovw %edi, %k1 5184; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 5185; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0 5186; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 5187; CHECK-NEXT: retq 5188 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3) 5189 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8) 5190 %res2 = fadd <4 x float> %res, %res1 5191 ret <4 x float> %res2 5192} 5193 5194declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 5195 5196define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 5197; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 5198; CHECK: ## BB#0: 5199; CHECK-NEXT: kmovw %edi, %k1 5200; CHECK-NEXT: vmovaps %zmm0, %zmm3 5201; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} 5202; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 5203; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 5204; CHECK-NEXT: retq 5205 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 5206 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 5207 %res2 = add <16 x i32> %res, %res1 5208 ret <16 x i32> %res2 5209} 5210 5211declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 5212 5213define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 5214; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 5215; CHECK: ## BB#0: 5216; CHECK-NEXT: kmovw %edi, %k1 5217; CHECK-NEXT: vmovaps %zmm0, %zmm3 5218; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 5219; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 5220; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 5221; CHECK-NEXT: retq 5222 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 5223 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 5224 %res2 = add <16 x i32> %res, %res1 5225 ret <16 x i32> %res2 5226} 5227 5228declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 5229 5230define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 5231; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 5232; CHECK: ## BB#0: 5233; CHECK-NEXT: kmovw %edi, %k1 5234; CHECK-NEXT: vmovaps %zmm0, %zmm3 5235; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} 5236; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 5237; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 5238; CHECK-NEXT: retq 5239 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 5240 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 5241 %res2 = add <8 x i64> %res, %res1 5242 ret <8 x i64> %res2 5243} 5244 5245declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 5246 5247define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 5248; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 5249; CHECK: ## BB#0: 5250; CHECK-NEXT: kmovw %edi, %k1 5251; CHECK-NEXT: vmovaps %zmm0, %zmm3 5252; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 5253; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 5254; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 5255; CHECK-NEXT: retq 5256 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 5257 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 5258 %res2 = add <8 x i64> %res, %res1 5259 ret <8 x i64> %res2 5260} 5261 5262define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 5263; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: 5264; CHECK: ## BB#0: 5265; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0 5266; CHECK-NEXT: kmovw %k0, %eax 5267; CHECK-NEXT: retq 5268 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) 5269 ret i32 %res 5270} 5271 5272define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 5273; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: 5274; CHECK: ## BB#0: 5275; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0 5276; CHECK-NEXT: kmovw %k0, %eax 5277; CHECK-NEXT: retq 5278 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) 5279 ret i32 %res 5280} 5281 5282define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 5283; CHECK-LABEL: test_x86_avx512_comi_sd_eq: 5284; CHECK: ## BB#0: 5285; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 5286; CHECK-NEXT: kmovw %k0, %eax 5287; CHECK-NEXT: retq 5288 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) 5289 ret i32 %res 5290} 5291 5292define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 5293; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: 5294; CHECK: ## BB#0: 5295; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0 5296; CHECK-NEXT: kmovw %k0, %eax 5297; CHECK-NEXT: retq 5298 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) 5299 ret i32 %res 5300} 5301 5302define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 5303; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: 5304; CHECK: ## BB#0: 5305; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0 5306; CHECK-NEXT: kmovw %k0, %eax 5307; CHECK-NEXT: retq 5308 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) 5309 ret i32 %res 5310} 5311 5312define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 5313; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: 5314; CHECK: ## BB#0: 5315; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0 5316; CHECK-NEXT: kmovw %k0, %eax 5317; CHECK-NEXT: retq 5318 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) 5319 ret i32 %res 5320} 5321 5322define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 5323; CHECK-LABEL: test_x86_avx512_comi_sd_lt: 5324; CHECK: ## BB#0: 5325; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0 5326; CHECK-NEXT: kmovw %k0, %eax 5327; CHECK-NEXT: retq 5328 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) 5329 ret i32 %res 5330} 5331 5332define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 5333; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: 5334; CHECK: ## BB#0: 5335; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0 5336; CHECK-NEXT: kmovw %k0, %eax 5337; CHECK-NEXT: retq 5338 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) 5339 ret i32 %res 5340} 5341 5342declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) 5343 5344define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { 5345; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: 5346; CHECK: ## BB#0: 5347; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0 5348; CHECK-NEXT: kmovw %k0, %eax 5349; CHECK-NEXT: retq 5350 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) 5351 ret i32 %res 5352} 5353 5354declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) 5355declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) 5356 5357define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 5358; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: 5359; CHECK: ## BB#0: 5360; CHECK-NEXT: andl $1, %edi 5361; CHECK-NEXT: kmovw %edi, %k1 5362; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} 5363; CHECK-NEXT: vmovaps %zmm2, %zmm0 5364; CHECK-NEXT: retq 5365 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 5366 ret <4 x float> %res 5367} 5368 5369define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 5370; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: 5371; CHECK: ## BB#0: 5372; CHECK-NEXT: andl $1, %edi 5373; CHECK-NEXT: kmovw %edi, %k1 5374; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 5375; CHECK-NEXT: retq 5376 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) 5377 ret <4 x float> %res 5378} 5379 5380define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 5381; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: 5382; CHECK: ## BB#0: 5383; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 5384; CHECK-NEXT: retq 5385 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) 5386 ret <4 x float> %res 5387} 5388 5389declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) 5390define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 5391; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: 5392; CHECK: ## BB#0: 5393; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 5394; CHECK-NEXT: retq 5395 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) 5396 ret <2 x double> %res 5397} 5398 5399define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 5400; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: 5401; CHECK: ## BB#0: 5402; CHECK-NEXT: andl $1, %edi 5403; CHECK-NEXT: kmovw %edi, %k1 5404; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 5405; CHECK-NEXT: retq 5406 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) 5407 ret <2 x double> %res 5408} 5409 5410define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 5411; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: 5412; CHECK: ## BB#0: 5413; CHECK-NEXT: andl $1, %edi 5414; CHECK-NEXT: kmovw %edi, %k1 5415; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} 5416; CHECK-NEXT: vmovaps %zmm2, %zmm0 5417; CHECK-NEXT: retq 5418 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 5419 ret <2 x double> %res 5420} 5421 5422declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16) 5423 5424define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) { 5425; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512: 5426; CHECK: ## BB#0: 5427; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 5428; CHECK-NEXT: kmovw %edi, %k1 5429; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5430; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5431; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5432; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 5433; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5434; CHECK-NEXT: retq 5435 5436 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 5437 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask) 5438 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask) 5439 %res4 = fadd <16 x float> %res1, %res2 5440 %res5 = fadd <16 x float> %res3, %res4 5441 ret <16 x float> %res5 5442} 5443 5444declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8) 5445 5446define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) { 5447; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512: 5448; CHECK: ## BB#0: 5449; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 5450; CHECK-NEXT: kmovw %edi, %k1 5451; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3] 5452; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3] 5453; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3] 5454; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 5455; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5456; CHECK-NEXT: retq 5457 5458 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1) 5459 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask) 5460 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask) 5461 %res4 = fadd <8 x double> %res1, %res2 5462 %res5 = fadd <8 x double> %res3, %res4 5463 ret <8 x double> %res5 5464} 5465 5466declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16) 5467 5468define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) { 5469; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512: 5470; CHECK: ## BB#0: 5471; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 5472; CHECK-NEXT: kmovw %edi, %k1 5473; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5474; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5475; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5476; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 5477; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5478; CHECK-NEXT: retq 5479 5480 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 5481 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) 5482 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 5483 %res4 = add <16 x i32> %res1, %res2 5484 %res5 = add <16 x i32> %res3, %res4 5485 ret <16 x i32> %res5 5486} 5487 5488declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8) 5489 5490define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) { 5491; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512: 5492; CHECK: ## BB#0: 5493; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 5494; CHECK-NEXT: kmovw %edi, %k1 5495; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3] 5496; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3] 5497; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3] 5498; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 5499; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5500; CHECK-NEXT: retq 5501 5502 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1) 5503 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) 5504 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) 5505 %res4 = add <8 x i64> %res1, %res2 5506 %res5 = add <8 x i64> %res3, %res4 5507 ret <8 x i64> %res5 5508} 5509 5510declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8) 5511 5512define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 5513; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_512: 5514; CHECK: ## BB#0: 5515; CHECK-NEXT: kmovw %esi, %k1 5516; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm1 {%k1} 5517; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm2 {%k1} {z} 5518; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm0 5519; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5520; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 5521; CHECK-NEXT: retq 5522 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 255, <8 x i64> %x2, i8 %x3) 5523 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 255, <8 x i64> %x2, i8 -1) 5524 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 255, <8 x i64> zeroinitializer, i8 %x3) 5525 %res3 = add <8 x i64> %res, %res1 5526 %res4 = add <8 x i64> %res3, %res2 5527 ret <8 x i64> %res4 5528} 5529 5530declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) 5531 5532define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 5533; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512: 5534; CHECK: ## BB#0: 5535; CHECK-NEXT: kmovw %esi, %k1 5536; CHECK-NEXT: vpsrld $255, %zmm0, %zmm1 {%k1} 5537; CHECK-NEXT: vpsrld $255, %zmm0, %zmm2 {%k1} {z} 5538; CHECK-NEXT: vpsrld $255, %zmm0, %zmm0 5539; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5540; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 5541; CHECK-NEXT: retq 5542 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 %x3) 5543 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 -1) 5544 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> zeroinitializer, i16 %x3) 5545 %res3 = add <16 x i32> %res, %res1 5546 %res4 = add <16 x i32> %res3, %res2 5547 ret <16 x i32> %res4 5548} 5549 5550declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16) 5551 5552define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 5553; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_512: 5554; CHECK: ## BB#0: 5555; CHECK-NEXT: kmovw %esi, %k1 5556; CHECK-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} 5557; CHECK-NEXT: vpsrad $3, %zmm0, %zmm2 {%k1} {z} 5558; CHECK-NEXT: vpsrad $3, %zmm0, %zmm0 5559; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5560; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5561; CHECK-NEXT: retq 5562 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 5563 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 5564 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 5565 %res3 = add <16 x i32> %res, %res1 5566 %res4 = add <16 x i32> %res3, %res2 5567 ret <16 x i32> %res4 5568} 5569 5570declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8) 5571 5572define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 5573; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_512: 5574; CHECK: ## BB#0: 5575; CHECK-NEXT: kmovw %esi, %k1 5576; CHECK-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} 5577; CHECK-NEXT: vpsraq $3, %zmm0, %zmm2 {%k1} {z} 5578; CHECK-NEXT: vpsraq $3, %zmm0, %zmm0 5579; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5580; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5581; CHECK-NEXT: retq 5582 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 5583 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 5584 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 5585 %res3 = add <8 x i64> %res, %res1 5586 %res4 = add <8 x i64> %res3, %res2 5587 ret <8 x i64> %res4 5588} 5589 5590declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16) 5591 5592define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 5593; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_512: 5594; CHECK: ## BB#0: 5595; CHECK-NEXT: kmovw %esi, %k1 5596; CHECK-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} 5597; CHECK-NEXT: vpslld $3, %zmm0, %zmm2 {%k1} {z} 5598; CHECK-NEXT: vpslld $3, %zmm0, %zmm0 5599; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5600; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5601; CHECK-NEXT: retq 5602 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 5603 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 5604 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 5605 %res3 = add <16 x i32> %res, %res1 5606 %res4 = add <16 x i32> %res3, %res2 5607 ret <16 x i32> %res4 5608} 5609 5610declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8) 5611 5612define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 5613; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_512: 5614; CHECK: ## BB#0: 5615; CHECK-NEXT: kmovw %esi, %k1 5616; CHECK-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} 5617; CHECK-NEXT: vpsllq $3, %zmm0, %zmm2 {%k1} {z} 5618; CHECK-NEXT: vpsllq $3, %zmm0, %zmm0 5619; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5620; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5621; CHECK-NEXT: retq 5622 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 5623 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 5624 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 5625 %res3 = add <8 x i64> %res, %res1 5626 %res4 = add <8 x i64> %res3, %res2 5627 ret <8 x i64> %res4 5628} 5629 5630declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 5631 5632define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 5633; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512: 5634; CHECK: ## BB#0: 5635; CHECK-NEXT: kmovw %edi, %k1 5636; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} 5637; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z} 5638; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 5639; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 5640; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5641; CHECK-NEXT: retq 5642 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 5643 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) 5644 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 5645 %res3 = add <16 x i32> %res, %res1 5646 %res4 = add <16 x i32> %res3, %res2 5647 ret <16 x i32> %res4 5648} 5649 5650declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 5651 5652define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 5653; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512: 5654; CHECK: ## BB#0: 5655; CHECK-NEXT: kmovw %edi, %k1 5656; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} 5657; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z} 5658; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 5659; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 5660; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5661; CHECK-NEXT: retq 5662 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 5663 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) 5664 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 5665 %res3 = add <8 x i64> %res, %res1 5666 %res4 = add <8 x i64> %res3, %res2 5667 ret <8 x i64> %res4 5668} 5669 5670declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16) 5671 5672define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 5673; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512: 5674; CHECK: ## BB#0: 5675; CHECK-NEXT: kmovw %esi, %k1 5676; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1} 5677; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z} 5678; CHECK-NEXT: vprold $3, %zmm0, %zmm0 5679; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5680; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5681; CHECK-NEXT: retq 5682 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 5683 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 5684 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 5685 %res3 = add <16 x i32> %res, %res1 5686 %res4 = add <16 x i32> %res3, %res2 5687 ret <16 x i32> %res4 5688} 5689 5690declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8) 5691 5692define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 5693; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512: 5694; CHECK: ## BB#0: 5695; CHECK-NEXT: kmovw %esi, %k1 5696; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} 5697; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z} 5698; CHECK-NEXT: vprolq $3, %zmm0, %zmm0 5699; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5700; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5701; CHECK-NEXT: retq 5702 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 5703 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 5704 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 5705 %res3 = add <8 x i64> %res, %res1 5706 %res4 = add <8 x i64> %res3, %res2 5707 ret <8 x i64> %res4 5708} 5709 5710declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16) 5711 5712define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) { 5713; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512: 5714; CHECK: ## BB#0: 5715; CHECK-NEXT: kmovw %edi, %k1 5716; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 5717; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 5718; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 5719; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5720; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5721; CHECK-NEXT: retq 5722 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) 5723 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) 5724 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) 5725 %res3 = add <16 x i32> %res, %res1 5726 %res4 = add <16 x i32> %res3, %res2 5727 ret <16 x i32> %res4 5728} 5729 5730declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8) 5731 5732define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) { 5733; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512: 5734; CHECK: ## BB#0: 5735; CHECK-NEXT: kmovw %edi, %k1 5736; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 5737; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm2 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 5738; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 5739; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5740; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5741; CHECK-NEXT: retq 5742 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) 5743 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) 5744 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) 5745 %res3 = add <8 x i64> %res, %res1 5746 %res4 = add <8 x i64> %res3, %res2 5747 ret <8 x i64> %res4 5748} 5749 5750declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8) 5751 5752define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) { 5753; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512: 5754; CHECK: ## BB#0: 5755; CHECK-NEXT: kmovw %edi, %k1 5756; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 5757; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 5758; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 5759; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5760; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5761; CHECK-NEXT: retq 5762 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) 5763 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) 5764 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) 5765 %res3 = add <8 x i64> %res, %res1 5766 %res4 = add <8 x i64> %res3, %res2 5767 ret <8 x i64> %res4 5768} 5769 5770declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16) 5771 5772define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) { 5773; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512: 5774; CHECK: ## BB#0: 5775; CHECK-NEXT: kmovw %edi, %k1 5776; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 5777; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 5778; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 5779; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5780; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5781; CHECK-NEXT: retq 5782 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) 5783 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) 5784 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) 5785 %res3 = add <16 x i32> %res, %res1 5786 %res4 = add <16 x i32> %res3, %res2 5787 ret <16 x i32> %res4 5788} 5789 5790declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8) 5791 5792define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) { 5793; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512: 5794; CHECK: ## BB#0: 5795; CHECK-NEXT: kmovw %edi, %k1 5796; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 5797; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 5798; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 5799; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5800; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5801; CHECK-NEXT: retq 5802 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) 5803 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) 5804 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) 5805 %res3 = add <8 x i64> %res, %res1 5806 %res4 = add <8 x i64> %res3, %res2 5807 ret <8 x i64> %res4 5808} 5809 5810declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16) 5811 5812define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) { 5813; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512: 5814; CHECK: ## BB#0: 5815; CHECK-NEXT: kmovw %edi, %k1 5816; CHECK-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} 5817; CHECK-NEXT: vpmovsxbd %xmm0, %zmm2 {%k1} {z} 5818; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 5819; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5820; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5821; CHECK-NEXT: retq 5822 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) 5823 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) 5824 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) 5825 %res3 = add <16 x i32> %res, %res1 5826 %res4 = add <16 x i32> %res3, %res2 5827 ret <16 x i32> %res4 5828} 5829 5830declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8) 5831 5832define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) { 5833; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512: 5834; CHECK: ## BB#0: 5835; CHECK-NEXT: kmovw %edi, %k1 5836; CHECK-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} 5837; CHECK-NEXT: vpmovsxbq %xmm0, %zmm2 {%k1} {z} 5838; CHECK-NEXT: vpmovsxbq %xmm0, %zmm0 5839; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5840; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5841; CHECK-NEXT: retq 5842 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) 5843 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) 5844 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) 5845 %res3 = add <8 x i64> %res, %res1 5846 %res4 = add <8 x i64> %res3, %res2 5847 ret <8 x i64> %res4 5848} 5849 5850declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8) 5851 5852define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) { 5853; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512: 5854; CHECK: ## BB#0: 5855; CHECK-NEXT: kmovw %edi, %k1 5856; CHECK-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} 5857; CHECK-NEXT: vpmovsxdq %ymm0, %zmm2 {%k1} {z} 5858; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0 5859; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5860; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5861; CHECK-NEXT: retq 5862 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) 5863 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) 5864 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) 5865 %res3 = add <8 x i64> %res, %res1 5866 %res4 = add <8 x i64> %res3, %res2 5867 ret <8 x i64> %res4 5868} 5869 5870 5871declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16) 5872 5873define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) { 5874; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512: 5875; CHECK: ## BB#0: 5876; CHECK-NEXT: kmovw %edi, %k1 5877; CHECK-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} 5878; CHECK-NEXT: vpmovsxwd %ymm0, %zmm2 {%k1} {z} 5879; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 5880; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 5881; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5882; CHECK-NEXT: retq 5883 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) 5884 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) 5885 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) 5886 %res3 = add <16 x i32> %res, %res1 5887 %res4 = add <16 x i32> %res3, %res2 5888 ret <16 x i32> %res4 5889} 5890 5891 5892declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8) 5893 5894define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) { 5895; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512: 5896; CHECK: ## BB#0: 5897; CHECK-NEXT: kmovw %edi, %k1 5898; CHECK-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} 5899; CHECK-NEXT: vpmovsxwq %xmm0, %zmm2 {%k1} {z} 5900; CHECK-NEXT: vpmovsxwq %xmm0, %zmm0 5901; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 5902; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5903; CHECK-NEXT: retq 5904 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) 5905 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) 5906 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) 5907 %res3 = add <8 x i64> %res, %res1 5908 %res4 = add <8 x i64> %res3, %res2 5909 ret <8 x i64> %res4 5910} 5911 5912declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) 5913 5914define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 5915; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512: 5916; CHECK: ## BB#0: 5917; CHECK-NEXT: kmovw %edi, %k1 5918; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} 5919; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 {%k1} {z} 5920; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 5921; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 5922; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5923; CHECK-NEXT: retq 5924 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 5925 %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 5926 %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 5927 %res3 = fadd <8 x double> %res, %res1 5928 %res4 = fadd <8 x double> %res3, %res2 5929 ret <8 x double> %res4 5930} 5931 5932declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 5933 5934define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 5935; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512: 5936; CHECK: ## BB#0: 5937; CHECK-NEXT: kmovw %edi, %k1 5938; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} 5939; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 {%k1} {z} 5940; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 5941; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 5942; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 5943; CHECK-NEXT: retq 5944 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 5945 %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) 5946 %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 5947 %res3 = add <8 x i64> %res, %res1 5948 %res4 = add <8 x i64> %res3, %res2 5949 ret <8 x i64> %res4 5950} 5951 5952declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) 5953 5954define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 5955; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512: 5956; CHECK: ## BB#0: 5957; CHECK-NEXT: kmovw %edi, %k1 5958; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} 5959; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 {%k1} {z} 5960; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 5961; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 5962; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5963; CHECK-NEXT: retq 5964 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 5965 %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 5966 %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 5967 %res3 = fadd <16 x float> %res, %res1 5968 %res4 = fadd <16 x float> %res3, %res2 5969 ret <16 x float> %res4 5970} 5971 5972declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 5973 5974define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 5975; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512: 5976; CHECK: ## BB#0: 5977; CHECK-NEXT: kmovw %edi, %k1 5978; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} 5979; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 {%k1} {z} 5980; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 5981; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 5982; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5983; CHECK-NEXT: retq 5984 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 5985 %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) 5986 %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 5987 %res3 = add <16 x i32> %res, %res1 5988 %res4 = add <16 x i32> %res3, %res2 5989 ret <16 x i32> %res4 5990} 5991 5992declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) 5993 5994define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { 5995; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512: 5996; CHECK: ## BB#0: 5997; CHECK-NEXT: kmovw %edi, %k1 5998; CHECK-NEXT: vmovaps %zmm0, %zmm3 5999; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1} 6000; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 6001; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z} 6002; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0 6003; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm1 6004; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 6005; CHECK-NEXT: retq 6006 %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) 6007 %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) 6008 %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8) 6009 %res3 = fadd <8 x double> %res, %res1 6010 %res4 = fadd <8 x double> %res3, %res2 6011 ret <8 x double> %res4 6012} 6013 6014declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) 6015 6016define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) { 6017; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512: 6018; CHECK: ## BB#0: 6019; CHECK-NEXT: kmovw %edi, %k1 6020; CHECK-NEXT: vmovaps %zmm0, %zmm3 6021; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z} 6022; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 6023; CHECK-NEXT: vmovaps %zmm0, %zmm5 6024; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z} 6025; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0 6026; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1 6027; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 6028; CHECK-NEXT: retq 6029 %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) 6030 %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) 6031 %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8) 6032 %res3 = fadd <8 x double> %res, %res1 6033 %res4 = fadd <8 x double> %res3, %res2 6034 ret <8 x double> %res4 6035} 6036 6037declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) 6038 6039define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 6040; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss: 6041; CHECK: ## BB#0: 6042; CHECK-NEXT: andl $1, %edi 6043; CHECK-NEXT: kmovw %edi, %k1 6044; CHECK-NEXT: vmovaps %zmm0, %zmm3 6045; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} 6046; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 6047; CHECK-NEXT: vmovaps %zmm0, %zmm5 6048; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} 6049; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 6050; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1 6051; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 6052; CHECK-NEXT: retq 6053 %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) 6054 %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) 6055 %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8) 6056 %res3 = fadd <4 x float> %res, %res1 6057 %res4 = fadd <4 x float> %res3, %res2 6058 ret <4 x float> %res4 6059} 6060 6061declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) 6062 6063define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 6064; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss: 6065; CHECK: ## BB#0: 6066; CHECK-NEXT: andl $1, %edi 6067; CHECK-NEXT: kmovw %edi, %k1 6068; CHECK-NEXT: vmovaps %zmm0, %zmm3 6069; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z} 6070; CHECK-NEXT: vmovaps %zmm0, %zmm4 6071; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 6072; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 6073; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} 6074; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 6075; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 6076; CHECK-NEXT: retq 6077 %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) 6078 %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) 6079 %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4) 6080 %res3 = fadd <4 x float> %res, %res1 6081 %res4 = fadd <4 x float> %res3, %res2 6082 ret <4 x float> %res4 6083} 6084 6085declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) 6086 6087define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { 6088; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512: 6089; CHECK: ## BB#0: 6090; CHECK-NEXT: kmovw %edi, %k1 6091; CHECK-NEXT: vmovaps %zmm0, %zmm3 6092; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} 6093; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 6094; CHECK-NEXT: vmovaps %zmm0, %zmm5 6095; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1} 6096; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 6097; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1 6098; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 6099; CHECK-NEXT: retq 6100 %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) 6101 %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) 6102 %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8) 6103 %res3 = fadd <16 x float> %res, %res1 6104 %res4 = fadd <16 x float> %res3, %res2 6105 ret <16 x float> %res4 6106} 6107 6108declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) 6109 6110define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) { 6111; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512: 6112; CHECK: ## BB#0: 6113; CHECK-NEXT: kmovw %edi, %k1 6114; CHECK-NEXT: vmovaps %zmm0, %zmm3 6115; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z} 6116; CHECK-NEXT: vmovaps %zmm0, %zmm4 6117; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 6118; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 6119; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} 6120; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 6121; CHECK-NEXT: vaddps %zmm4, %zmm0, %zmm0 6122; CHECK-NEXT: retq 6123 %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) 6124 %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8) 6125 %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) 6126 %res3 = fadd <16 x float> %res, %res1 6127 %res4 = fadd <16 x float> %res3, %res2 6128 ret <16 x float> %res4 6129} 6130 6131declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) 6132 6133define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 6134; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd: 6135; CHECK: ## BB#0: 6136; CHECK-NEXT: andl $1, %edi 6137; CHECK-NEXT: kmovw %edi, %k1 6138; CHECK-NEXT: vmovaps %zmm0, %zmm3 6139; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} 6140; CHECK-NEXT: vmovaps %zmm0, %zmm4 6141; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 6142; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 6143; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} 6144; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 6145; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 6146; CHECK-NEXT: retq 6147 %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) 6148 %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) 6149 %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4) 6150 %res3 = fadd <2 x double> %res, %res1 6151 %res4 = fadd <2 x double> %res3, %res2 6152 ret <2 x double> %res4 6153} 6154 6155declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) 6156 6157define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 6158; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd: 6159; CHECK: ## BB#0: 6160; CHECK-NEXT: andl $1, %edi 6161; CHECK-NEXT: kmovw %edi, %k1 6162; CHECK-NEXT: vmovaps %zmm0, %zmm3 6163; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} 6164; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 6165; CHECK-NEXT: vmovaps %zmm0, %zmm5 6166; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} 6167; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} 6168; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1 6169; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 6170; CHECK-NEXT: retq 6171 %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) 6172 %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) 6173 %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8) 6174 %res3 = fadd <2 x double> %res, %res1 6175 %res4 = fadd <2 x double> %res3, %res2 6176 ret <2 x double> %res4 6177} 6178 6179declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) 6180 6181define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 6182; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512: 6183; CHECK: ## BB#0: 6184; CHECK-NEXT: kmovw %edi, %k1 6185; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1} 6186; CHECK-NEXT: kmovw %k0, %ecx 6187; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 6188; CHECK-NEXT: kmovw %k0, %eax 6189; CHECK-NEXT: addl %ecx, %eax 6190; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 6191; CHECK-NEXT: retq 6192 %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 6193 %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1) 6194 %res2 = add i16 %res, %res1 6195 ret i16 %res2 6196} 6197 6198declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2) 6199 6200define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 6201; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512: 6202; CHECK: ## BB#0: 6203; CHECK-NEXT: kmovw %edi, %k1 6204; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1} 6205; CHECK-NEXT: kmovw %k0, %ecx 6206; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 6207; CHECK-NEXT: kmovw %k0, %eax 6208; CHECK-NEXT: addb %cl, %al 6209; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 6210; CHECK-NEXT: retq 6211 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 6212 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) 6213 %res2 = add i8 %res, %res1 6214 ret i8 %res2 6215} 6216 6217define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) { 6218; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: 6219; CHECK: ## BB#0: 6220; CHECK-NEXT: kmovw %esi, %k1 6221; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 6222; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z} 6223; CHECK-NEXT: vpbroadcastd %edi, %zmm2 6224; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 6225; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 6226; CHECK-NEXT: retq 6227 %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1) 6228 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask) 6229 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask) 6230 %res3 = add <16 x i32> %res, %res1 6231 %res4 = add <16 x i32> %res2, %res3 6232 ret <16 x i32> %res4 6233} 6234 6235declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16) 6236 6237define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) { 6238; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512: 6239; CHECK: ## BB#0: 6240; CHECK-NEXT: kmovw %esi, %k1 6241; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 6242; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z} 6243; CHECK-NEXT: vpbroadcastq %rdi, %zmm2 6244; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 6245; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 6246; CHECK-NEXT: retq 6247 %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1) 6248 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask) 6249 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask) 6250 %res3 = add <8 x i64> %res, %res1 6251 %res4 = add <8 x i64> %res2, %res3 6252 ret <8 x i64> %res4 6253} 6254declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8) 6255 6256declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 6257 6258define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 6259; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd: 6260; CHECK: ## BB#0: 6261; CHECK-NEXT: andl $1, %edi 6262; CHECK-NEXT: kmovw %edi, %k1 6263; CHECK-NEXT: vmovaps %zmm0, %zmm3 6264; CHECK-NEXT: vfmadd132sd %xmm1, %xmm2, %xmm3 {%k1} 6265; CHECK-NEXT: vmovaps %zmm1, %zmm4 6266; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4 6267; CHECK-NEXT: vmovaps %zmm0, %zmm5 6268; CHECK-NEXT: vfmadd132sd {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1} 6269; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 6270; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0 6271; CHECK-NEXT: vaddpd %xmm5, %xmm1, %xmm1 6272; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 6273; CHECK-NEXT: retq 6274 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 6275 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 6276 %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 6277 %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 6278 %res4 = fadd <2 x double> %res, %res1 6279 %res5 = fadd <2 x double> %res2, %res3 6280 %res6 = fadd <2 x double> %res4, %res5 6281 ret <2 x double> %res6 6282} 6283 6284declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 6285 6286define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 6287; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss: 6288; CHECK: ## BB#0: 6289; CHECK-NEXT: andl $1, %edi 6290; CHECK-NEXT: kmovw %edi, %k1 6291; CHECK-NEXT: vmovaps %zmm0, %zmm3 6292; CHECK-NEXT: vfmadd132ss %xmm1, %xmm2, %xmm3 {%k1} 6293; CHECK-NEXT: vmovaps %zmm1, %zmm4 6294; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4 6295; CHECK-NEXT: vmovaps %zmm0, %zmm5 6296; CHECK-NEXT: vfmadd132ss {rz-sae}, %xmm1, %xmm2, %xmm5 {%k1} 6297; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1 6298; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0 6299; CHECK-NEXT: vaddps %xmm5, %xmm1, %xmm1 6300; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 6301; CHECK-NEXT: retq 6302 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 6303 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 6304 %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 6305 %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 6306 %res4 = fadd <4 x float> %res, %res1 6307 %res5 = fadd <4 x float> %res2, %res3 6308 %res6 = fadd <4 x float> %res4, %res5 6309 ret <4 x float> %res6 6310} 6311 6312declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 6313 6314define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 6315; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: 6316; CHECK: ## BB#0: 6317; CHECK-NEXT: andl $1, %edi 6318; CHECK-NEXT: kmovw %edi, %k1 6319; CHECK-NEXT: vmovaps %zmm1, %zmm3 6320; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm3 {%k1} {z} 6321; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 {%k1} {z} 6322; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 6323; CHECK-NEXT: retq 6324 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 6325 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 6326 %res2 = fadd <2 x double> %res, %res1 6327 ret <2 x double> %res2 6328} 6329 6330declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 6331 6332define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 6333; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: 6334; CHECK: ## BB#0: 6335; CHECK-NEXT: andl $1, %edi 6336; CHECK-NEXT: kmovw %edi, %k1 6337; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 {%k1} {z} 6338; CHECK-NEXT: vmovaps %zmm1, %zmm0 6339; CHECK-NEXT: retq 6340 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 6341 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 6342 %res2 = fadd <4 x float> %res, %res1 6343 ret <4 x float> %res 6344} 6345declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 6346 6347define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 6348; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: 6349; CHECK: ## BB#0: 6350; CHECK-NEXT: andl $1, %edi 6351; CHECK-NEXT: kmovw %edi, %k1 6352; CHECK-NEXT: vmovaps %zmm2, %zmm3 6353; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} 6354; CHECK-NEXT: vmovaps %zmm1, %zmm4 6355; CHECK-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm4 6356; CHECK-NEXT: vmovaps %zmm2, %zmm5 6357; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1} 6358; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm0, %xmm1 6359; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0 6360; CHECK-NEXT: vaddpd %xmm5, %xmm1, %xmm1 6361; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 6362; CHECK-NEXT: retq 6363 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 6364 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 6365 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 6366 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 6367 %res4 = fadd <2 x double> %res, %res1 6368 %res5 = fadd <2 x double> %res2, %res3 6369 %res6 = fadd <2 x double> %res4, %res5 6370 ret <2 x double> %res6 6371} 6372 6373declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 6374 6375define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 6376; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: 6377; CHECK: ## BB#0: 6378; CHECK-NEXT: andl $1, %edi 6379; CHECK-NEXT: kmovw %edi, %k1 6380; CHECK-NEXT: vmovaps %zmm2, %zmm3 6381; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} 6382; CHECK-NEXT: vmovaps %zmm1, %zmm4 6383; CHECK-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm4 6384; CHECK-NEXT: vmovaps %zmm2, %zmm5 6385; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1} 6386; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm0, %xmm1 6387; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0 6388; CHECK-NEXT: vaddps %xmm5, %xmm1, %xmm1 6389; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 6390; CHECK-NEXT: retq 6391 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 6392 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 6393 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 6394 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 6395 %res4 = fadd <4 x float> %res, %res1 6396 %res5 = fadd <4 x float> %res2, %res3 6397 %res6 = fadd <4 x float> %res4, %res5 6398 ret <4 x float> %res6 6399} 6400 6401define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) { 6402; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: 6403; CHECK: ## BB#0: 6404; CHECK-NEXT: andl $1, %esi 6405; CHECK-NEXT: kmovw %esi, %k1 6406; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} 6407; CHECK-NEXT: vmovaps %zmm1, %zmm0 6408; CHECK-NEXT: retq 6409 %q = load float, float* %ptr_b 6410 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 6411 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) 6412 ret < 4 x float> %res 6413} 6414 6415define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 6416; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: 6417; CHECK: ## BB#0: 6418; CHECK-NEXT: andl $1, %esi 6419; CHECK-NEXT: kmovw %esi, %k1 6420; CHECK-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} 6421; CHECK-NEXT: retq 6422 %q = load float, float* %ptr_b 6423 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 6424 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) 6425 ret < 4 x float> %res 6426} 6427 6428 6429define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 6430; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm: 6431; CHECK: ## BB#0: 6432; CHECK-NEXT: kxorw %k0, %k0, %k1 6433; CHECK-NEXT: vfmadd213ss (%rdi), %xmm0, %xmm1 {%k1} {z} 6434; CHECK-NEXT: vmovaps %zmm1, %zmm0 6435; CHECK-NEXT: retq 6436 %q = load float, float* %ptr_b 6437 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 6438 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4) 6439 ret < 4 x float> %res 6440} 6441