xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fma.f32(float, float, float)
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; This checks that rematerialization support of the coalescer does not
6*9880d681SAndroid Build Coastguard Worker; unnecessarily widen the register class. Without those fixes > 20 VGprs
7*9880d681SAndroid Build Coastguard Worker; are used here
8*9880d681SAndroid Build Coastguard Worker; Also check that some rematerialization of the 0 constant happened.
9*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foobar
10*9880d681SAndroid Build Coastguard Worker; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
11*9880d681SAndroid Build Coastguard Worker; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
12*9880d681SAndroid Build Coastguard Worker; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
13*9880d681SAndroid Build Coastguard Worker; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
14*9880d681SAndroid Build Coastguard Worker; It's probably OK if this is slightly higher:
15*9880d681SAndroid Build Coastguard Worker; CHECK: ; NumVgprs: 9
16*9880d681SAndroid Build Coastguard Workerdefine void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
17*9880d681SAndroid Build Coastguard Workerentry:
18*9880d681SAndroid Build Coastguard Worker  %cmpflag = icmp eq i32 %flag, 1
19*9880d681SAndroid Build Coastguard Worker  br i1 %cmpflag, label %loop, label %exit
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Workerloop:
22*9880d681SAndroid Build Coastguard Worker  %c = phi i32 [0, %entry], [%cnext, %loop]
23*9880d681SAndroid Build Coastguard Worker  %v0 = phi float [0.0, %entry], [%fma.0, %loop]
24*9880d681SAndroid Build Coastguard Worker  %v1 = phi float [0.0, %entry], [%fma.1, %loop]
25*9880d681SAndroid Build Coastguard Worker  %v2 = phi float [0.0, %entry], [%fma.2, %loop]
26*9880d681SAndroid Build Coastguard Worker  %v3 = phi float [0.0, %entry], [%fma.3, %loop]
27*9880d681SAndroid Build Coastguard Worker
28*9880d681SAndroid Build Coastguard Worker  ; Try to get the 0 constant to get coalesced into a wide register
29*9880d681SAndroid Build Coastguard Worker  %blup = insertelement <4 x float> undef, float %v0, i32 0
30*9880d681SAndroid Build Coastguard Worker  store <4 x float> %blup, <4 x float> addrspace(1)* %out
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Worker  %load = load <4 x float>, <4 x float> addrspace(1)* %in
33*9880d681SAndroid Build Coastguard Worker  %load.0 = extractelement <4 x float> %load, i32 0
34*9880d681SAndroid Build Coastguard Worker  %load.1 = extractelement <4 x float> %load, i32 1
35*9880d681SAndroid Build Coastguard Worker  %load.2 = extractelement <4 x float> %load, i32 2
36*9880d681SAndroid Build Coastguard Worker  %load.3 = extractelement <4 x float> %load, i32 3
37*9880d681SAndroid Build Coastguard Worker  %fma.0 = call float @llvm.fma.f32(float %v0, float %load.0, float %v0)
38*9880d681SAndroid Build Coastguard Worker  %fma.1 = call float @llvm.fma.f32(float %v1, float %load.1, float %v1)
39*9880d681SAndroid Build Coastguard Worker  %fma.2 = call float @llvm.fma.f32(float %v2, float %load.2, float %v2)
40*9880d681SAndroid Build Coastguard Worker  %fma.3 = call float @llvm.fma.f32(float %v3, float %load.3, float %v3)
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker  %cnext = add nsw i32 %c, 1
43*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq i32 %cnext, 42
44*9880d681SAndroid Build Coastguard Worker  br i1 %cmp, label %exit, label %loop
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Workerexit:
47*9880d681SAndroid Build Coastguard Worker  %ev0 = phi float [0.0, %entry], [%fma.0, %loop]
48*9880d681SAndroid Build Coastguard Worker  %ev1 = phi float [0.0, %entry], [%fma.1, %loop]
49*9880d681SAndroid Build Coastguard Worker  %ev2 = phi float [0.0, %entry], [%fma.2, %loop]
50*9880d681SAndroid Build Coastguard Worker  %ev3 = phi float [0.0, %entry], [%fma.3, %loop]
51*9880d681SAndroid Build Coastguard Worker  %dst.0 = insertelement <4 x float> undef,  float %ev0, i32 0
52*9880d681SAndroid Build Coastguard Worker  %dst.1 = insertelement <4 x float> %dst.0, float %ev1, i32 1
53*9880d681SAndroid Build Coastguard Worker  %dst.2 = insertelement <4 x float> %dst.1, float %ev2, i32 2
54*9880d681SAndroid Build Coastguard Worker  %dst.3 = insertelement <4 x float> %dst.2, float %ev3, i32 3
55*9880d681SAndroid Build Coastguard Worker  store <4 x float> %dst.3, <4 x float> addrspace(1)* %out
56*9880d681SAndroid Build Coastguard Worker  ret void
57*9880d681SAndroid Build Coastguard Worker}
58