1; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s 2 3attributes #0 = { nounwind } 4 5declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 6declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 7declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 8declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 9 10declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 11declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 12declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 13declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 14 15define void @fmadd_aab_ss(float* %a, float* %b) #0 { 16; CHECK-LABEL: fmadd_aab_ss: 17; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 18; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]] 19; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 20; CHECK-NEXT: ret 21 %a.val = load float, float* %a 22 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 23 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 24 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 25 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 26 27 %b.val = load float, float* %b 28 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 29 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 30 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 31 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 32 33 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 34 35 %sr = extractelement <4 x float> %vr, i32 0 36 store float %sr, float* %a 37 ret void 38} 39 40define void @fmadd_aba_ss(float* %a, float* %b) #0 { 41; CHECK-LABEL: fmadd_aba_ss: 42; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 43; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]] 44; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 45; CHECK-NEXT: ret 46 %a.val = load float, float* %a 47 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 48 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 49 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 50 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 51 52 %b.val = load float, float* %b 53 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 54 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 55 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 56 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 57 58 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 59 60 %sr = extractelement <4 x float> %vr, i32 0 61 store float %sr, float* %a 62 ret void 63} 64 65define void @fmsub_aab_ss(float* %a, float* %b) #0 { 66; CHECK-LABEL: fmsub_aab_ss: 67; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 68; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]] 69; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 70; CHECK-NEXT: ret 71 %a.val = load float, float* %a 72 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 73 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 74 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 75 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 76 77 %b.val = load float, float* %b 78 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 79 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 80 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 81 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 82 83 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 84 85 %sr = extractelement <4 x float> %vr, i32 0 86 store float %sr, float* %a 87 ret void 88} 89 90define void @fmsub_aba_ss(float* %a, float* %b) #0 { 91; CHECK-LABEL: fmsub_aba_ss: 92; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 93; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]] 94; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 95; CHECK-NEXT: ret 96 %a.val = load float, float* %a 97 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 98 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 99 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 100 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 101 102 %b.val = load float, float* %b 103 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 104 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 105 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 106 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 107 108 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 109 110 %sr = extractelement <4 x float> %vr, i32 0 111 store float %sr, float* %a 112 ret void 113} 114 115define void @fnmadd_aab_ss(float* %a, float* %b) #0 { 116; CHECK-LABEL: fnmadd_aab_ss: 117; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 118; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]] 119; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 120; CHECK-NEXT: ret 121 %a.val = load float, float* %a 122 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 123 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 124 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 125 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 126 127 %b.val = load float, float* %b 128 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 129 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 130 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 131 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 132 133 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 134 135 %sr = extractelement <4 x float> %vr, i32 0 136 store float %sr, float* %a 137 ret void 138} 139 140define void @fnmadd_aba_ss(float* %a, float* %b) #0 { 141; CHECK-LABEL: fnmadd_aba_ss: 142; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 143; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]] 144; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 145; CHECK-NEXT: ret 146 %a.val = load float, float* %a 147 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 148 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 149 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 150 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 151 152 %b.val = load float, float* %b 153 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 154 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 155 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 156 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 157 158 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 159 160 %sr = extractelement <4 x float> %vr, i32 0 161 store float %sr, float* %a 162 ret void 163} 164 165define void @fnmsub_aab_ss(float* %a, float* %b) #0 { 166; CHECK-LABEL: fnmsub_aab_ss: 167; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 168; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]] 169; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 170; CHECK-NEXT: ret 171 %a.val = load float, float* %a 172 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 173 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 174 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 175 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 176 177 %b.val = load float, float* %b 178 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 179 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 180 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 181 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 182 183 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 184 185 %sr = extractelement <4 x float> %vr, i32 0 186 store float %sr, float* %a 187 ret void 188} 189 190define void @fnmsub_aba_ss(float* %a, float* %b) #0 { 191; CHECK-LABEL: fnmsub_aba_ss: 192; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]] 193; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]] 194; CHECK-NEXT: vmovss %[[XMM]], (%rcx) 195; CHECK-NEXT: ret 196 %a.val = load float, float* %a 197 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 198 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 199 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 200 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 201 202 %b.val = load float, float* %b 203 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 204 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 205 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 206 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 207 208 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 209 210 %sr = extractelement <4 x float> %vr, i32 0 211 store float %sr, float* %a 212 ret void 213} 214 215define void @fmadd_aab_sd(double* %a, double* %b) #0 { 216; CHECK-LABEL: fmadd_aab_sd: 217; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 218; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]] 219; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 220; CHECK-NEXT: ret 221 %a.val = load double, double* %a 222 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 223 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 224 225 %b.val = load double, double* %b 226 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 227 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 228 229 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 230 231 %sr = extractelement <2 x double> %vr, i32 0 232 store double %sr, double* %a 233 ret void 234} 235 236define void @fmadd_aba_sd(double* %a, double* %b) #0 { 237; CHECK-LABEL: fmadd_aba_sd: 238; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 239; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]] 240; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 241; CHECK-NEXT: ret 242 %a.val = load double, double* %a 243 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 244 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 245 246 %b.val = load double, double* %b 247 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 248 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 249 250 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 251 252 %sr = extractelement <2 x double> %vr, i32 0 253 store double %sr, double* %a 254 ret void 255} 256 257define void @fmsub_aab_sd(double* %a, double* %b) #0 { 258; CHECK-LABEL: fmsub_aab_sd: 259; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 260; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]] 261; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 262; CHECK-NEXT: ret 263 %a.val = load double, double* %a 264 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 265 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 266 267 %b.val = load double, double* %b 268 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 269 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 270 271 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 272 273 %sr = extractelement <2 x double> %vr, i32 0 274 store double %sr, double* %a 275 ret void 276} 277 278define void @fmsub_aba_sd(double* %a, double* %b) #0 { 279; CHECK-LABEL: fmsub_aba_sd: 280; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 281; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]] 282; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 283; CHECK-NEXT: ret 284 %a.val = load double, double* %a 285 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 286 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 287 288 %b.val = load double, double* %b 289 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 290 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 291 292 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 293 294 %sr = extractelement <2 x double> %vr, i32 0 295 store double %sr, double* %a 296 ret void 297} 298 299define void @fnmadd_aab_sd(double* %a, double* %b) #0 { 300; CHECK-LABEL: fnmadd_aab_sd: 301; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 302; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]] 303; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 304; CHECK-NEXT: ret 305 %a.val = load double, double* %a 306 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 307 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 308 309 %b.val = load double, double* %b 310 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 311 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 312 313 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 314 315 %sr = extractelement <2 x double> %vr, i32 0 316 store double %sr, double* %a 317 ret void 318} 319 320define void @fnmadd_aba_sd(double* %a, double* %b) #0 { 321; CHECK-LABEL: fnmadd_aba_sd: 322; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 323; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]] 324; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 325; CHECK-NEXT: ret 326 %a.val = load double, double* %a 327 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 328 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 329 330 %b.val = load double, double* %b 331 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 332 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 333 334 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 335 336 %sr = extractelement <2 x double> %vr, i32 0 337 store double %sr, double* %a 338 ret void 339} 340 341define void @fnmsub_aab_sd(double* %a, double* %b) #0 { 342; CHECK-LABEL: fnmsub_aab_sd: 343; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 344; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]] 345; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 346; CHECK-NEXT: ret 347 %a.val = load double, double* %a 348 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 349 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 350 351 %b.val = load double, double* %b 352 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 353 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 354 355 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 356 357 %sr = extractelement <2 x double> %vr, i32 0 358 store double %sr, double* %a 359 ret void 360} 361 362define void @fnmsub_aba_sd(double* %a, double* %b) #0 { 363; CHECK-LABEL: fnmsub_aba_sd: 364; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]] 365; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]] 366; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx) 367; CHECK-NEXT: ret 368 %a.val = load double, double* %a 369 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 370 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 371 372 %b.val = load double, double* %b 373 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 374 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 375 376 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 377 378 %sr = extractelement <2 x double> %vr, i32 0 379 store double %sr, double* %a 380 ret void 381} 382 383 384