xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; VFMADD
5*9880d681SAndroid Build Coastguard Workerdefine < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
6*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddss (%{{.*}})
7*9880d681SAndroid Build Coastguard Worker  %x = load float , float *%a2
8*9880d681SAndroid Build Coastguard Worker  %y = insertelement <4 x float> undef, float %x, i32 0
9*9880d681SAndroid Build Coastguard Worker  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
10*9880d681SAndroid Build Coastguard Worker  ret < 4 x float > %res
11*9880d681SAndroid Build Coastguard Worker}
12*9880d681SAndroid Build Coastguard Workerdefine < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
13*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
14*9880d681SAndroid Build Coastguard Worker  %x = load float , float *%a1
15*9880d681SAndroid Build Coastguard Worker  %y = insertelement <4 x float> undef, float %x, i32 0
16*9880d681SAndroid Build Coastguard Worker  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
17*9880d681SAndroid Build Coastguard Worker  ret < 4 x float > %res
18*9880d681SAndroid Build Coastguard Worker}
19*9880d681SAndroid Build Coastguard Worker
20*9880d681SAndroid Build Coastguard Workerdeclare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Workerdefine < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
23*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddsd (%{{.*}})
24*9880d681SAndroid Build Coastguard Worker  %x = load double , double *%a2
25*9880d681SAndroid Build Coastguard Worker  %y = insertelement <2 x double> undef, double %x, i32 0
26*9880d681SAndroid Build Coastguard Worker  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
27*9880d681SAndroid Build Coastguard Worker  ret < 2 x double > %res
28*9880d681SAndroid Build Coastguard Worker}
29*9880d681SAndroid Build Coastguard Workerdefine < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
30*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
31*9880d681SAndroid Build Coastguard Worker  %x = load double , double *%a1
32*9880d681SAndroid Build Coastguard Worker  %y = insertelement <2 x double> undef, double %x, i32 0
33*9880d681SAndroid Build Coastguard Worker  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
34*9880d681SAndroid Build Coastguard Worker  ret < 2 x double > %res
35*9880d681SAndroid Build Coastguard Worker}
36*9880d681SAndroid Build Coastguard Workerdeclare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
37*9880d681SAndroid Build Coastguard Workerdefine < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
38*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddps (%{{.*}})
39*9880d681SAndroid Build Coastguard Worker  %x = load <4 x float>, <4 x float>* %a2
40*9880d681SAndroid Build Coastguard Worker  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
41*9880d681SAndroid Build Coastguard Worker  ret < 4 x float > %res
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Workerdefine < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
44*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
45*9880d681SAndroid Build Coastguard Worker  %x = load <4 x float>, <4 x float>* %a1
46*9880d681SAndroid Build Coastguard Worker  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
47*9880d681SAndroid Build Coastguard Worker  ret < 4 x float > %res
48*9880d681SAndroid Build Coastguard Worker}
49*9880d681SAndroid Build Coastguard Workerdeclare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Worker; To test execution dependency
52*9880d681SAndroid Build Coastguard Workerdefine < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
53*9880d681SAndroid Build Coastguard Worker  ; CHECK: vmovaps
54*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
55*9880d681SAndroid Build Coastguard Worker  %x = load <4 x float>, <4 x float>* %a0
56*9880d681SAndroid Build Coastguard Worker  %y = load <4 x float>, <4 x float>* %a1
57*9880d681SAndroid Build Coastguard Worker  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
58*9880d681SAndroid Build Coastguard Worker  ret < 4 x float > %res
59*9880d681SAndroid Build Coastguard Worker}
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Workerdefine < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
62*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddpd (%{{.*}})
63*9880d681SAndroid Build Coastguard Worker  %x = load <2 x double>, <2 x double>* %a2
64*9880d681SAndroid Build Coastguard Worker  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
65*9880d681SAndroid Build Coastguard Worker  ret < 2 x double > %res
66*9880d681SAndroid Build Coastguard Worker}
67*9880d681SAndroid Build Coastguard Workerdefine < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
68*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
69*9880d681SAndroid Build Coastguard Worker  %x = load <2 x double>, <2 x double>* %a1
70*9880d681SAndroid Build Coastguard Worker  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
71*9880d681SAndroid Build Coastguard Worker  ret < 2 x double > %res
72*9880d681SAndroid Build Coastguard Worker}
73*9880d681SAndroid Build Coastguard Workerdeclare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Worker; To test execution dependency
76*9880d681SAndroid Build Coastguard Workerdefine < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
77*9880d681SAndroid Build Coastguard Worker  ; CHECK: vmovapd
78*9880d681SAndroid Build Coastguard Worker  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
79*9880d681SAndroid Build Coastguard Worker  %x = load <2 x double>, <2 x double>* %a0
80*9880d681SAndroid Build Coastguard Worker  %y = load <2 x double>, <2 x double>* %a1
81*9880d681SAndroid Build Coastguard Worker  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
82*9880d681SAndroid Build Coastguard Worker  ret < 2 x double > %res
83*9880d681SAndroid Build Coastguard Worker}
84*9880d681SAndroid Build Coastguard Worker
85