xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/peephole-multiple-folds.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker;
3*9880d681SAndroid Build Coastguard Worker; Test multiple peephole-time folds in a single basic block.
4*9880d681SAndroid Build Coastguard Worker; <rdar://problem/16478629>
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x float>* %p2) {
7*9880d681SAndroid Build Coastguard Workerentry:
8*9880d681SAndroid Build Coastguard Worker  br label %loopbody
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Workerloopbody:
11*9880d681SAndroid Build Coastguard Worker; CHECK: test_peephole_multi_fold:
12*9880d681SAndroid Build Coastguard Worker; CHECK: vfmadd231ps ({{%rdi|%rcx}}),
13*9880d681SAndroid Build Coastguard Worker; CHECK: vfmadd231ps ({{%rsi|%rdx}}),
14*9880d681SAndroid Build Coastguard Worker  %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]
15*9880d681SAndroid Build Coastguard Worker  %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]
16*9880d681SAndroid Build Coastguard Worker  %m1 = load <8 x float>, <8 x float>* %p1, align 1
17*9880d681SAndroid Build Coastguard Worker  %m2 = load <8 x float>, <8 x float>* %p2, align 1
18*9880d681SAndroid Build Coastguard Worker  %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
19*9880d681SAndroid Build Coastguard Worker  %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
20*9880d681SAndroid Build Coastguard Worker  %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
21*9880d681SAndroid Build Coastguard Worker  %c = fcmp oeq float %vsum1.next.1, 0.0
22*9880d681SAndroid Build Coastguard Worker  br i1 %c, label %loopbody, label %loopexit
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerloopexit:
25*9880d681SAndroid Build Coastguard Worker  %r = fadd <8 x float> %vsum1.next, %vsum2.next
26*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %r
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
30