xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/fma-scalar-memfold.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
2
3attributes #0 = { nounwind }
4
5declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
6declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
7declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
8declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
9
10declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
11declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
12declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
13declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
14
15define void @fmadd_aab_ss(float* %a, float* %b) #0 {
16; CHECK-LABEL: fmadd_aab_ss:
17; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
18; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]]
19; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
20; CHECK-NEXT: ret
21  %a.val = load float, float* %a
22  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
23  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
24  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
25  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
26
27  %b.val = load float, float* %b
28  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
29  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
30  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
31  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
32
33  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
34
35  %sr = extractelement <4 x float> %vr, i32 0
36  store float %sr, float* %a
37  ret void
38}
39
40define void @fmadd_aba_ss(float* %a, float* %b) #0 {
41; CHECK-LABEL: fmadd_aba_ss:
42; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
43; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]]
44; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
45; CHECK-NEXT: ret
46  %a.val = load float, float* %a
47  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
48  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
49  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
50  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
51
52  %b.val = load float, float* %b
53  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
54  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
55  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
56  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
57
58  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
59
60  %sr = extractelement <4 x float> %vr, i32 0
61  store float %sr, float* %a
62  ret void
63}
64
65define void @fmsub_aab_ss(float* %a, float* %b) #0 {
66; CHECK-LABEL: fmsub_aab_ss:
67; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
68; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]]
69; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
70; CHECK-NEXT: ret
71  %a.val = load float, float* %a
72  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
73  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
74  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
75  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
76
77  %b.val = load float, float* %b
78  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
79  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
80  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
81  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
82
83  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
84
85  %sr = extractelement <4 x float> %vr, i32 0
86  store float %sr, float* %a
87  ret void
88}
89
90define void @fmsub_aba_ss(float* %a, float* %b) #0 {
91; CHECK-LABEL: fmsub_aba_ss:
92; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
93; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]]
94; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
95; CHECK-NEXT: ret
96  %a.val = load float, float* %a
97  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
98  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
99  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
100  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
101
102  %b.val = load float, float* %b
103  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
104  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
105  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
106  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
107
108  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
109
110  %sr = extractelement <4 x float> %vr, i32 0
111  store float %sr, float* %a
112  ret void
113}
114
115define void @fnmadd_aab_ss(float* %a, float* %b) #0 {
116; CHECK-LABEL: fnmadd_aab_ss:
117; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
118; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]]
119; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
120; CHECK-NEXT: ret
121  %a.val = load float, float* %a
122  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
123  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
124  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
125  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
126
127  %b.val = load float, float* %b
128  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
129  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
130  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
131  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
132
133  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
134
135  %sr = extractelement <4 x float> %vr, i32 0
136  store float %sr, float* %a
137  ret void
138}
139
140define void @fnmadd_aba_ss(float* %a, float* %b) #0 {
141; CHECK-LABEL: fnmadd_aba_ss:
142; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
143; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]]
144; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
145; CHECK-NEXT: ret
146  %a.val = load float, float* %a
147  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
148  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
149  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
150  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
151
152  %b.val = load float, float* %b
153  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
154  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
155  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
156  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
157
158  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
159
160  %sr = extractelement <4 x float> %vr, i32 0
161  store float %sr, float* %a
162  ret void
163}
164
165define void @fnmsub_aab_ss(float* %a, float* %b) #0 {
166; CHECK-LABEL: fnmsub_aab_ss:
167; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
168; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]]
169; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
170; CHECK-NEXT: ret
171  %a.val = load float, float* %a
172  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
173  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
174  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
175  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
176
177  %b.val = load float, float* %b
178  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
179  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
180  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
181  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
182
183  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
184
185  %sr = extractelement <4 x float> %vr, i32 0
186  store float %sr, float* %a
187  ret void
188}
189
190define void @fnmsub_aba_ss(float* %a, float* %b) #0 {
191; CHECK-LABEL: fnmsub_aba_ss:
192; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
193; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]]
194; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
195; CHECK-NEXT: ret
196  %a.val = load float, float* %a
197  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
198  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
199  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
200  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
201
202  %b.val = load float, float* %b
203  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
204  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
205  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
206  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
207
208  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
209
210  %sr = extractelement <4 x float> %vr, i32 0
211  store float %sr, float* %a
212  ret void
213}
214
215define void @fmadd_aab_sd(double* %a, double* %b) #0 {
216; CHECK-LABEL: fmadd_aab_sd:
217; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
218; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
219; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
220; CHECK-NEXT: ret
221  %a.val = load double, double* %a
222  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
223  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
224
225  %b.val = load double, double* %b
226  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
227  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
228
229  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
230
231  %sr = extractelement <2 x double> %vr, i32 0
232  store double %sr, double* %a
233  ret void
234}
235
236define void @fmadd_aba_sd(double* %a, double* %b) #0 {
237; CHECK-LABEL: fmadd_aba_sd:
238; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
239; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
240; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
241; CHECK-NEXT: ret
242  %a.val = load double, double* %a
243  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
244  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
245
246  %b.val = load double, double* %b
247  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
248  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
249
250  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
251
252  %sr = extractelement <2 x double> %vr, i32 0
253  store double %sr, double* %a
254  ret void
255}
256
257define void @fmsub_aab_sd(double* %a, double* %b) #0 {
258; CHECK-LABEL: fmsub_aab_sd:
259; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
260; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
261; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
262; CHECK-NEXT: ret
263  %a.val = load double, double* %a
264  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
265  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
266
267  %b.val = load double, double* %b
268  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
269  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
270
271  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
272
273  %sr = extractelement <2 x double> %vr, i32 0
274  store double %sr, double* %a
275  ret void
276}
277
278define void @fmsub_aba_sd(double* %a, double* %b) #0 {
279; CHECK-LABEL: fmsub_aba_sd:
280; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
281; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
282; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
283; CHECK-NEXT: ret
284  %a.val = load double, double* %a
285  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
286  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
287
288  %b.val = load double, double* %b
289  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
290  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
291
292  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
293
294  %sr = extractelement <2 x double> %vr, i32 0
295  store double %sr, double* %a
296  ret void
297}
298
299define void @fnmadd_aab_sd(double* %a, double* %b) #0 {
300; CHECK-LABEL: fnmadd_aab_sd:
301; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
302; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
303; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
304; CHECK-NEXT: ret
305  %a.val = load double, double* %a
306  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
307  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
308
309  %b.val = load double, double* %b
310  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
311  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
312
313  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
314
315  %sr = extractelement <2 x double> %vr, i32 0
316  store double %sr, double* %a
317  ret void
318}
319
320define void @fnmadd_aba_sd(double* %a, double* %b) #0 {
321; CHECK-LABEL: fnmadd_aba_sd:
322; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
323; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
324; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
325; CHECK-NEXT: ret
326  %a.val = load double, double* %a
327  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
328  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
329
330  %b.val = load double, double* %b
331  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
332  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
333
334  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
335
336  %sr = extractelement <2 x double> %vr, i32 0
337  store double %sr, double* %a
338  ret void
339}
340
341define void @fnmsub_aab_sd(double* %a, double* %b) #0 {
342; CHECK-LABEL: fnmsub_aab_sd:
343; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
344; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
345; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
346; CHECK-NEXT: ret
347  %a.val = load double, double* %a
348  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
349  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
350
351  %b.val = load double, double* %b
352  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
353  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
354
355  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
356
357  %sr = extractelement <2 x double> %vr, i32 0
358  store double %sr, double* %a
359  ret void
360}
361
362define void @fnmsub_aba_sd(double* %a, double* %b) #0 {
363; CHECK-LABEL: fnmsub_aba_sd:
364; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
365; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
366; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
367; CHECK-NEXT: ret
368  %a.val = load double, double* %a
369  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
370  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
371
372  %b.val = load double, double* %b
373  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
374  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
375
376  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
377
378  %sr = extractelement <2 x double> %vr, i32 0
379  store double %sr, double* %a
380  ret void
381}
382
383
384