1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s 3 4declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5 6define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: 8; CHECK: ## BB#0: 9; CHECK-NEXT: kmovw %edi, %k1 10; CHECK-NEXT: vmovaps %zmm0, %zmm3 11; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} 12; CHECK-NEXT: vmovaps %zmm0, %zmm4 13; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 14; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 15; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 16; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} 17; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 18; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 19; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 20; CHECK-NEXT: retq 21 22 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 23 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 24 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 25 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 26 %res4 = add <2 x i64> %res, %res1 27 %res5 = add <2 x i64> %res3, %res2 28 %res6 = add <2 x i64> %res5, %res4 29 ret <2 x i64> %res6 30} 31 32declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 33 34define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 35; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: 36; CHECK: ## BB#0: 37; CHECK-NEXT: kmovw %edi, %k1 38; CHECK-NEXT: vmovaps %zmm0, %zmm3 39; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} 40; CHECK-NEXT: vmovaps %zmm0, %zmm4 41; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 42; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 43; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 44; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} 45; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 46; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 47; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 48; CHECK-NEXT: retq 49 50 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 51 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 52 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 53 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 54 %res4 = add <4 x i64> %res, %res1 55 %res5 = add <4 x i64> %res3, %res2 56 %res6 = add <4 x i64> %res5, %res4 57 ret <4 x i64> %res6 58} 59 60declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 61 62define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 63; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: 64; CHECK: ## BB#0: 65; CHECK-NEXT: kmovw %edi, %k1 66; CHECK-NEXT: vmovaps %zmm0, %zmm3 67; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z} 68; CHECK-NEXT: vmovaps %zmm0, %zmm4 69; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 70; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 71; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 72; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} 73; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 74; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 75; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 76; CHECK-NEXT: retq 77 78 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 79 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 80 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 81 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 82 %res4 = add <2 x i64> %res, %res1 83 %res5 = add <2 x i64> %res3, %res2 84 %res6 = add <2 x i64> %res5, %res4 85 ret <2 x i64> %res6 86} 87 88declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 89 90define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 91; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: 92; CHECK: ## BB#0: 93; CHECK-NEXT: kmovw %edi, %k1 94; CHECK-NEXT: vmovaps %zmm0, %zmm3 95; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z} 96; CHECK-NEXT: vmovaps %zmm0, %zmm4 97; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 98; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 99; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 100; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} 101; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 102; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 103; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 104; CHECK-NEXT: retq 105 106 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 107 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 108 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 109 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 110 %res4 = add <4 x i64> %res, %res1 111 %res5 = add <4 x i64> %res3, %res2 112 %res6 = add <4 x i64> %res5, %res4 113 ret <4 x i64> %res6 114} 115 116declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 117 118define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 119; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: 120; CHECK: ## BB#0: 121; CHECK-NEXT: kmovw %edi, %k1 122; CHECK-NEXT: vmovaps %zmm0, %zmm3 123; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} 124; CHECK-NEXT: vmovaps %zmm0, %zmm4 125; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 126; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 127; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 128; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} 129; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 130; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 131; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 132; CHECK-NEXT: retq 133 134 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 135 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 136 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 137 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 138 %res4 = add <2 x i64> %res, %res1 139 %res5 = add <2 x i64> %res3, %res2 140 %res6 = add <2 x i64> %res5, %res4 141 ret <2 x i64> %res6 142} 143 144declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 145 146define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 147; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: 148; CHECK: ## BB#0: 149; CHECK-NEXT: kmovw %edi, %k1 150; CHECK-NEXT: vmovaps %zmm0, %zmm3 151; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} 152; CHECK-NEXT: vmovaps %zmm0, %zmm4 153; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 154; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 155; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 156; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} 157; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 158; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 159; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 160; CHECK-NEXT: retq 161 162 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 163 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 164 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 165 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 166 %res4 = add <4 x i64> %res, %res1 167 %res5 = add <4 x i64> %res3, %res2 168 %res6 = add <4 x i64> %res5, %res4 169 ret <4 x i64> %res6 170} 171 172declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 173 174define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 175; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: 176; CHECK: ## BB#0: 177; CHECK-NEXT: kmovw %edi, %k1 178; CHECK-NEXT: vmovaps %zmm0, %zmm3 179; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z} 180; CHECK-NEXT: vmovaps %zmm0, %zmm4 181; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 182; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 183; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 184; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} 185; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 186; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 187; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 188; CHECK-NEXT: retq 189 190 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 191 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 192 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 193 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 194 %res4 = add <2 x i64> %res, %res1 195 %res5 = add <2 x i64> %res3, %res2 196 %res6 = add <2 x i64> %res5, %res4 197 ret <2 x i64> %res6 198} 199 200declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 201 202define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 203; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: 204; CHECK: ## BB#0: 205; CHECK-NEXT: kmovw %edi, %k1 206; CHECK-NEXT: vmovaps %zmm0, %zmm3 207; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z} 208; CHECK-NEXT: vmovaps %zmm0, %zmm4 209; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 210; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 211; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 212; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} 213; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 214; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 215; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 216; CHECK-NEXT: retq 217 218 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 219 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 220 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 221 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 222 %res4 = add <4 x i64> %res, %res1 223 %res5 = add <4 x i64> %res3, %res2 224 %res6 = add <4 x i64> %res5, %res4 225 ret <4 x i64> %res6 226} 227