xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
3
4; VFMADD
5define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
6  ; CHECK: vfmaddss (%{{.*}})
7  %x = load float , float *%a2
8  %y = insertelement <4 x float> undef, float %x, i32 0
9  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
10  ret < 4 x float > %res
11}
12define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
13  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
14  %x = load float , float *%a1
15  %y = insertelement <4 x float> undef, float %x, i32 0
16  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
17  ret < 4 x float > %res
18}
19
20declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
21
22define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
23  ; CHECK: vfmaddsd (%{{.*}})
24  %x = load double , double *%a2
25  %y = insertelement <2 x double> undef, double %x, i32 0
26  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
27  ret < 2 x double > %res
28}
29define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
30  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
31  %x = load double , double *%a1
32  %y = insertelement <2 x double> undef, double %x, i32 0
33  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
34  ret < 2 x double > %res
35}
36declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
37define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
38  ; CHECK: vfmaddps (%{{.*}})
39  %x = load <4 x float>, <4 x float>* %a2
40  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
41  ret < 4 x float > %res
42}
43define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
44  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
45  %x = load <4 x float>, <4 x float>* %a1
46  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
47  ret < 4 x float > %res
48}
49declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
50
51; To test execution dependency
52define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
53  ; CHECK: vmovaps
54  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
55  %x = load <4 x float>, <4 x float>* %a0
56  %y = load <4 x float>, <4 x float>* %a1
57  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
58  ret < 4 x float > %res
59}
60
61define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
62  ; CHECK: vfmaddpd (%{{.*}})
63  %x = load <2 x double>, <2 x double>* %a2
64  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
65  ret < 2 x double > %res
66}
67define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
68  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
69  %x = load <2 x double>, <2 x double>* %a1
70  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
71  ret < 2 x double > %res
72}
73declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
74
75; To test execution dependency
76define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
77  ; CHECK: vmovapd
78  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
79  %x = load <2 x double>, <2 x double>* %a0
80  %y = load <2 x double>, <2 x double>* %a1
81  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
82  ret < 2 x double > %res
83}
84
85