xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512-fma.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmadd_ps_z:
7*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
8*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
9*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
10*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a1
11*9880d681SAndroid Build Coastguard Worker  %res = fadd <16 x float> %x, %a2
12*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
13*9880d681SAndroid Build Coastguard Worker}
14*9880d681SAndroid Build Coastguard Worker
15*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmsub_ps_z:
17*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
20*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a1
21*9880d681SAndroid Build Coastguard Worker  %res = fsub <16 x float> %x, %a2
22*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fnmadd_ps_z:
27*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
28*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
29*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
30*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a1
31*9880d681SAndroid Build Coastguard Worker  %res = fsub <16 x float> %a2, %x
32*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fnmsub_ps_z:
37*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
38*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
39*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
40*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a1
41*9880d681SAndroid Build Coastguard Worker  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42*9880d681SAndroid Build Coastguard Worker                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43*9880d681SAndroid Build Coastguard Worker                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44*9880d681SAndroid Build Coastguard Worker                          float -0.000000e+00>, %x
45*9880d681SAndroid Build Coastguard Worker  %res = fsub <16 x float> %y, %a2
46*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
47*9880d681SAndroid Build Coastguard Worker}
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmadd_pd_z:
51*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
52*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
53*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
54*9880d681SAndroid Build Coastguard Worker  %x = fmul <8 x double> %a0, %a1
55*9880d681SAndroid Build Coastguard Worker  %res = fadd <8 x double> %x, %a2
56*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res
57*9880d681SAndroid Build Coastguard Worker}
58*9880d681SAndroid Build Coastguard Worker
59*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmsub_pd_z:
61*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
62*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
63*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
64*9880d681SAndroid Build Coastguard Worker  %x = fmul <8 x double> %a0, %a1
65*9880d681SAndroid Build Coastguard Worker  %res = fsub <8 x double> %x, %a2
66*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res
67*9880d681SAndroid Build Coastguard Worker}
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Workerdefine double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmsub_213:
71*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
72*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
73*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
74*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
75*9880d681SAndroid Build Coastguard Worker  %x = fmul double %a0, %a1
76*9880d681SAndroid Build Coastguard Worker  %res = fsub double %x, %a2
77*9880d681SAndroid Build Coastguard Worker  ret double %res
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerdefine double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
81*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmsub_213_m:
82*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
83*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
84*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
85*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
86*9880d681SAndroid Build Coastguard Worker  %a2 = load double , double *%a2_ptr
87*9880d681SAndroid Build Coastguard Worker  %x = fmul double %a0, %a1
88*9880d681SAndroid Build Coastguard Worker  %res = fsub double %x, %a2
89*9880d681SAndroid Build Coastguard Worker  ret double %res
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
93*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_x86_fmsub_231_m:
94*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
95*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
96*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
97*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
98*9880d681SAndroid Build Coastguard Worker  %a2 = load double , double *%a2_ptr
99*9880d681SAndroid Build Coastguard Worker  %x = fmul double %a0, %a2
100*9880d681SAndroid Build Coastguard Worker  %res = fsub double %x, %a1
101*9880d681SAndroid Build Coastguard Worker  ret double %res
102*9880d681SAndroid Build Coastguard Worker}
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
105*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test231_br:
106*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
107*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
108*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %zmm1, %zmm0
109*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
110*9880d681SAndroid Build Coastguard Worker  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
111*9880d681SAndroid Build Coastguard Worker  %b2 = fadd <16 x float> %b1, %a2
112*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %b2
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
116*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test213_br:
117*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
118*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
119*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
120*9880d681SAndroid Build Coastguard Worker  %b1 = fmul <16 x float> %a1, %a2
121*9880d681SAndroid Build Coastguard Worker  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
122*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %b2
123*9880d681SAndroid Build Coastguard Worker}
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker;mask (a*c+b , a)
126*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
127*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_x86_fmadd132_ps:
128*9880d681SAndroid Build Coastguard Worker; KNL:       ## BB#0:
129*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
130*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
131*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
132*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
133*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    retq
134*9880d681SAndroid Build Coastguard Worker;
135*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_x86_fmadd132_ps:
136*9880d681SAndroid Build Coastguard Worker; SKX:       ## BB#0:
137*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
138*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpmovb2m %xmm2, %k1
139*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
140*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    retq
141*9880d681SAndroid Build Coastguard Worker  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
142*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a2
143*9880d681SAndroid Build Coastguard Worker  %y = fadd <16 x float> %x, %a1
144*9880d681SAndroid Build Coastguard Worker  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
145*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
146*9880d681SAndroid Build Coastguard Worker}
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Worker;mask (a*c+b , b)
149*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
150*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_x86_fmadd231_ps:
151*9880d681SAndroid Build Coastguard Worker; KNL:       ## BB#0:
152*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
153*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
154*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
155*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
156*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vmovaps %zmm1, %zmm0
157*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    retq
158*9880d681SAndroid Build Coastguard Worker;
159*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_x86_fmadd231_ps:
160*9880d681SAndroid Build Coastguard Worker; SKX:       ## BB#0:
161*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
162*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpmovb2m %xmm2, %k1
163*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
164*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vmovaps %zmm1, %zmm0
165*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    retq
166*9880d681SAndroid Build Coastguard Worker  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
167*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a0, %a2
168*9880d681SAndroid Build Coastguard Worker  %y = fadd <16 x float> %x, %a1
169*9880d681SAndroid Build Coastguard Worker  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
170*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
171*9880d681SAndroid Build Coastguard Worker}
172*9880d681SAndroid Build Coastguard Worker
173*9880d681SAndroid Build Coastguard Worker;mask (b*a+c , b)
174*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
175*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_x86_fmadd213_ps:
176*9880d681SAndroid Build Coastguard Worker; KNL:       ## BB#0:
177*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
178*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
179*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
180*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
181*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    vmovaps %zmm1, %zmm0
182*9880d681SAndroid Build Coastguard Worker; KNL-NEXT:    retq
183*9880d681SAndroid Build Coastguard Worker;
184*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_x86_fmadd213_ps:
185*9880d681SAndroid Build Coastguard Worker; SKX:       ## BB#0:
186*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
187*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vpmovb2m %xmm2, %k1
188*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
189*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    vmovaps %zmm1, %zmm0
190*9880d681SAndroid Build Coastguard Worker; SKX-NEXT:    retq
191*9880d681SAndroid Build Coastguard Worker  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
192*9880d681SAndroid Build Coastguard Worker  %x = fmul <16 x float> %a1, %a0
193*9880d681SAndroid Build Coastguard Worker  %y = fadd <16 x float> %x, %a2
194*9880d681SAndroid Build Coastguard Worker  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
195*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res
196*9880d681SAndroid Build Coastguard Worker}
197*9880d681SAndroid Build Coastguard Worker
198