xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/mad-combine.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
4*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
5*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker; Make sure we don't form mad with denormals
8*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
9*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #0
12*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fabs.f32(float) #0
13*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fma.f32(float, float, float) #0
14*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fmuladd.f32(float, float, float) #0
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Worker; (fadd (fmul x, y), z) -> (fma x, y, z)
17*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
18*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
19*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
20*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-NOT: v_fma
27*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-NOT: v_mad
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
30*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Worker; SI-DENORM: buffer_store_dword [[RESULT]]
33*9880d681SAndroid Build Coastguard Worker; SI-STD: buffer_store_dword [[C]]
34*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
35*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
36*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
37*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
38*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
39*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
42*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
43*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
44*9880d681SAndroid Build Coastguard Worker
45*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
46*9880d681SAndroid Build Coastguard Worker  %fma = fadd float %mul, %c
47*9880d681SAndroid Build Coastguard Worker  store float %fma, float addrspace(1)* %gep.out
48*9880d681SAndroid Build Coastguard Worker  ret void
49*9880d681SAndroid Build Coastguard Worker}
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Worker; (fadd (fmul x, y), z) -> (fma x, y, z)
52*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
53*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
54*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
55*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
56*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
59*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
62*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
65*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
66*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
69*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
70*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
71*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
72*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
73*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
74*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
75*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
76*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
77*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
78*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
79*9880d681SAndroid Build Coastguard Worker  %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
80*9880d681SAndroid Build Coastguard Worker  %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
83*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
84*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
85*9880d681SAndroid Build Coastguard Worker  %d = load volatile float, float addrspace(1)* %gep.3
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
88*9880d681SAndroid Build Coastguard Worker  %fma0 = fadd float %mul, %c
89*9880d681SAndroid Build Coastguard Worker  %fma1 = fadd float %mul, %d
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Worker  store volatile float %fma0, float addrspace(1)* %gep.out.0
92*9880d681SAndroid Build Coastguard Worker  store volatile float %fma1, float addrspace(1)* %gep.out.1
93*9880d681SAndroid Build Coastguard Worker  ret void
94*9880d681SAndroid Build Coastguard Worker}
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Worker; (fadd x, (fmul y, z)) -> (fma y, z, x)
97*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
98*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
99*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
100*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
103*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
104*9880d681SAndroid Build Coastguard Worker
105*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
106*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
107*9880d681SAndroid Build Coastguard Worker
108*9880d681SAndroid Build Coastguard Worker; SI-DENORM: buffer_store_dword [[RESULT]]
109*9880d681SAndroid Build Coastguard Worker; SI-STD: buffer_store_dword [[C]]
110*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
111*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
112*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
113*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
114*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
115*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
116*9880d681SAndroid Build Coastguard Worker
117*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
118*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
119*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
122*9880d681SAndroid Build Coastguard Worker  %fma = fadd float %c, %mul
123*9880d681SAndroid Build Coastguard Worker  store float %fma, float addrspace(1)* %gep.out
124*9880d681SAndroid Build Coastguard Worker  ret void
125*9880d681SAndroid Build Coastguard Worker}
126*9880d681SAndroid Build Coastguard Worker
127*9880d681SAndroid Build Coastguard Worker; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
128*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
129*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
130*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
131*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
132*9880d681SAndroid Build Coastguard Worker
133*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
134*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
137*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
140*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
141*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
142*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
143*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
144*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
145*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
148*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
149*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
150*9880d681SAndroid Build Coastguard Worker
151*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
152*9880d681SAndroid Build Coastguard Worker  %fma = fsub float %mul, %c
153*9880d681SAndroid Build Coastguard Worker  store float %fma, float addrspace(1)* %gep.out
154*9880d681SAndroid Build Coastguard Worker  ret void
155*9880d681SAndroid Build Coastguard Worker}
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
158*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
159*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
160*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
161*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
162*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
163*9880d681SAndroid Build Coastguard Worker
164*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
165*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
166*9880d681SAndroid Build Coastguard Worker
167*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
168*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
169*9880d681SAndroid Build Coastguard Worker
170*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
171*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
172*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
175*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
176*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
177*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
178*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
179*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
180*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
181*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
182*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
183*9880d681SAndroid Build Coastguard Worker  %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
184*9880d681SAndroid Build Coastguard Worker  %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
187*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
188*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
189*9880d681SAndroid Build Coastguard Worker  %d = load volatile float, float addrspace(1)* %gep.3
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
192*9880d681SAndroid Build Coastguard Worker  %fma0 = fsub float %mul, %c
193*9880d681SAndroid Build Coastguard Worker  %fma1 = fsub float %mul, %d
194*9880d681SAndroid Build Coastguard Worker  store volatile float %fma0, float addrspace(1)* %gep.out.0
195*9880d681SAndroid Build Coastguard Worker  store volatile float %fma1, float addrspace(1)* %gep.out.1
196*9880d681SAndroid Build Coastguard Worker  ret void
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Worker; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
200*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
201*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
202*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
203*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
204*9880d681SAndroid Build Coastguard Worker
205*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
206*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
209*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
210*9880d681SAndroid Build Coastguard Worker
211*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
212*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
213*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
214*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
215*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
216*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
217*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
218*9880d681SAndroid Build Coastguard Worker
219*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
220*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
221*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
222*9880d681SAndroid Build Coastguard Worker
223*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
224*9880d681SAndroid Build Coastguard Worker  %fma = fsub float %c, %mul
225*9880d681SAndroid Build Coastguard Worker  store float %fma, float addrspace(1)* %gep.out
226*9880d681SAndroid Build Coastguard Worker  ret void
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Worker; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
230*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
231*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
232*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
233*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
236*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
239*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
242*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
243*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
246*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
247*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
248*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
249*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
250*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
251*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
252*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
253*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
254*9880d681SAndroid Build Coastguard Worker  %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
255*9880d681SAndroid Build Coastguard Worker  %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
256*9880d681SAndroid Build Coastguard Worker
257*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
258*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
259*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
260*9880d681SAndroid Build Coastguard Worker  %d = load volatile float, float addrspace(1)* %gep.3
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
263*9880d681SAndroid Build Coastguard Worker  %fma0 = fsub float %c, %mul
264*9880d681SAndroid Build Coastguard Worker  %fma1 = fsub float %d, %mul
265*9880d681SAndroid Build Coastguard Worker  store volatile float %fma0, float addrspace(1)* %gep.out.0
266*9880d681SAndroid Build Coastguard Worker  store volatile float %fma1, float addrspace(1)* %gep.out.1
267*9880d681SAndroid Build Coastguard Worker  ret void
268*9880d681SAndroid Build Coastguard Worker}
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Worker; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
271*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
272*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
273*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
274*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
275*9880d681SAndroid Build Coastguard Worker
276*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
279*9880d681SAndroid Build Coastguard Worker
280*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
281*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[TMP]], [[C]]
282*9880d681SAndroid Build Coastguard Worker
283*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]]
284*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
285*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
286*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
287*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
288*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
289*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
292*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
293*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
296*9880d681SAndroid Build Coastguard Worker  %mul.neg = fsub float -0.0, %mul
297*9880d681SAndroid Build Coastguard Worker  %fma = fsub float %mul.neg, %c
298*9880d681SAndroid Build Coastguard Worker
299*9880d681SAndroid Build Coastguard Worker  store float %fma, float addrspace(1)* %gep.out
300*9880d681SAndroid Build Coastguard Worker  ret void
301*9880d681SAndroid Build Coastguard Worker}
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Worker; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
304*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
305*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
306*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
307*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
310*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
311*9880d681SAndroid Build Coastguard Worker
312*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
313*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
316*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
317*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT1:v[0-9]+]], -[[TMP]], [[D]]
318*9880d681SAndroid Build Coastguard Worker
319*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
320*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
321*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
322*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
323*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
324*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
325*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
326*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
327*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
328*9880d681SAndroid Build Coastguard Worker  %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
329*9880d681SAndroid Build Coastguard Worker  %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
332*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
333*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
334*9880d681SAndroid Build Coastguard Worker  %d = load volatile float, float addrspace(1)* %gep.3
335*9880d681SAndroid Build Coastguard Worker
336*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
337*9880d681SAndroid Build Coastguard Worker  %mul.neg = fsub float -0.0, %mul
338*9880d681SAndroid Build Coastguard Worker  %fma0 = fsub float %mul.neg, %c
339*9880d681SAndroid Build Coastguard Worker  %fma1 = fsub float %mul.neg, %d
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker  store volatile float %fma0, float addrspace(1)* %gep.out.0
342*9880d681SAndroid Build Coastguard Worker  store volatile float %fma1, float addrspace(1)* %gep.out.1
343*9880d681SAndroid Build Coastguard Worker  ret void
344*9880d681SAndroid Build Coastguard Worker}
345*9880d681SAndroid Build Coastguard Worker
346*9880d681SAndroid Build Coastguard Worker; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
347*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
348*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
349*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
350*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
351*9880d681SAndroid Build Coastguard Worker
352*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
353*9880d681SAndroid Build Coastguard Worker; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
354*9880d681SAndroid Build Coastguard Worker
355*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
356*9880d681SAndroid Build Coastguard Worker; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
357*9880d681SAndroid Build Coastguard Worker
358*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
359*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
360*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
363*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
364*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
365*9880d681SAndroid Build Coastguard Workerdefine void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
366*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
367*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
368*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
369*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
370*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
371*9880d681SAndroid Build Coastguard Worker  %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
372*9880d681SAndroid Build Coastguard Worker  %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
373*9880d681SAndroid Build Coastguard Worker
374*9880d681SAndroid Build Coastguard Worker  %a = load volatile float, float addrspace(1)* %gep.0
375*9880d681SAndroid Build Coastguard Worker  %b = load volatile float, float addrspace(1)* %gep.1
376*9880d681SAndroid Build Coastguard Worker  %c = load volatile float, float addrspace(1)* %gep.2
377*9880d681SAndroid Build Coastguard Worker  %d = load volatile float, float addrspace(1)* %gep.3
378*9880d681SAndroid Build Coastguard Worker
379*9880d681SAndroid Build Coastguard Worker  %mul = fmul float %a, %b
380*9880d681SAndroid Build Coastguard Worker  %mul.neg = fsub float -0.0, %mul
381*9880d681SAndroid Build Coastguard Worker  %fma0 = fsub float %mul.neg, %c
382*9880d681SAndroid Build Coastguard Worker  %fma1 = fsub float %mul, %d
383*9880d681SAndroid Build Coastguard Worker
384*9880d681SAndroid Build Coastguard Worker  store volatile float %fma0, float addrspace(1)* %gep.out.0
385*9880d681SAndroid Build Coastguard Worker  store volatile float %fma1, float addrspace(1)* %gep.out.1
386*9880d681SAndroid Build Coastguard Worker  ret void
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Worker; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
390*9880d681SAndroid Build Coastguard Worker
391*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
392*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
393*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
394*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
395*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
396*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
397*9880d681SAndroid Build Coastguard Worker
398*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
399*9880d681SAndroid Build Coastguard Worker; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
400*9880d681SAndroid Build Coastguard Worker; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
401*9880d681SAndroid Build Coastguard Worker
402*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
403*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
404*9880d681SAndroid Build Coastguard Worker
405*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
406*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
407*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
410*9880d681SAndroid Build Coastguard Workerdefine void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
411*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
412*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
413*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
414*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
415*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
416*9880d681SAndroid Build Coastguard Worker  %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
417*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
418*9880d681SAndroid Build Coastguard Worker
419*9880d681SAndroid Build Coastguard Worker  %x = load volatile float, float addrspace(1)* %gep.0
420*9880d681SAndroid Build Coastguard Worker  %y = load volatile float, float addrspace(1)* %gep.1
421*9880d681SAndroid Build Coastguard Worker  %z = load volatile float, float addrspace(1)* %gep.2
422*9880d681SAndroid Build Coastguard Worker  %u = load volatile float, float addrspace(1)* %gep.3
423*9880d681SAndroid Build Coastguard Worker  %v = load volatile float, float addrspace(1)* %gep.4
424*9880d681SAndroid Build Coastguard Worker
425*9880d681SAndroid Build Coastguard Worker  %tmp0 = fmul float %u, %v
426*9880d681SAndroid Build Coastguard Worker  %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
427*9880d681SAndroid Build Coastguard Worker  %tmp2 = fsub float %tmp1, %z
428*9880d681SAndroid Build Coastguard Worker
429*9880d681SAndroid Build Coastguard Worker  store float %tmp2, float addrspace(1)* %gep.out
430*9880d681SAndroid Build Coastguard Worker  ret void
431*9880d681SAndroid Build Coastguard Worker}
432*9880d681SAndroid Build Coastguard Worker
433*9880d681SAndroid Build Coastguard Worker; fold (fsub x, (fma y, z, (fmul u, v)))
434*9880d681SAndroid Build Coastguard Worker;   -> (fma (fneg y), z, (fma (fneg u), v, x))
435*9880d681SAndroid Build Coastguard Worker
436*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
437*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
438*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
439*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
440*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
441*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
444*9880d681SAndroid Build Coastguard Worker; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
445*9880d681SAndroid Build Coastguard Worker; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
446*9880d681SAndroid Build Coastguard Worker
447*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
448*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
449*9880d681SAndroid Build Coastguard Worker
450*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
451*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
452*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
453*9880d681SAndroid Build Coastguard Worker
454*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
455*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
456*9880d681SAndroid Build Coastguard Workerdefine void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
457*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
458*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
459*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
460*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
461*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
462*9880d681SAndroid Build Coastguard Worker  %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
463*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
464*9880d681SAndroid Build Coastguard Worker
465*9880d681SAndroid Build Coastguard Worker  %x = load volatile float, float addrspace(1)* %gep.0
466*9880d681SAndroid Build Coastguard Worker  %y = load volatile float, float addrspace(1)* %gep.1
467*9880d681SAndroid Build Coastguard Worker  %z = load volatile float, float addrspace(1)* %gep.2
468*9880d681SAndroid Build Coastguard Worker  %u = load volatile float, float addrspace(1)* %gep.3
469*9880d681SAndroid Build Coastguard Worker  %v = load volatile float, float addrspace(1)* %gep.4
470*9880d681SAndroid Build Coastguard Worker
471*9880d681SAndroid Build Coastguard Worker  %tmp0 = fmul float %u, %v
472*9880d681SAndroid Build Coastguard Worker  %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
473*9880d681SAndroid Build Coastguard Worker  %tmp2 = fsub float %x, %tmp1
474*9880d681SAndroid Build Coastguard Worker
475*9880d681SAndroid Build Coastguard Worker  store float %tmp2, float addrspace(1)* %gep.out
476*9880d681SAndroid Build Coastguard Worker  ret void
477*9880d681SAndroid Build Coastguard Worker}
478*9880d681SAndroid Build Coastguard Worker
479*9880d681SAndroid Build Coastguard Worker; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
480*9880d681SAndroid Build Coastguard Worker
481*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
482*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
483*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
484*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
485*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
486*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
487*9880d681SAndroid Build Coastguard Worker
488*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
489*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
492*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
493*9880d681SAndroid Build Coastguard Worker
494*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
495*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
496*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
497*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
498*9880d681SAndroid Build Coastguard Worker
499*9880d681SAndroid Build Coastguard Worker; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
500*9880d681SAndroid Build Coastguard Worker; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
501*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
502*9880d681SAndroid Build Coastguard Workerdefine void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
503*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
504*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
505*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
506*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
507*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
508*9880d681SAndroid Build Coastguard Worker  %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
509*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
510*9880d681SAndroid Build Coastguard Worker
511*9880d681SAndroid Build Coastguard Worker  %x = load volatile float, float addrspace(1)* %gep.0
512*9880d681SAndroid Build Coastguard Worker  %y = load volatile float, float addrspace(1)* %gep.1
513*9880d681SAndroid Build Coastguard Worker  %z = load volatile float, float addrspace(1)* %gep.2
514*9880d681SAndroid Build Coastguard Worker  %u = load volatile float, float addrspace(1)* %gep.3
515*9880d681SAndroid Build Coastguard Worker  %v = load volatile float, float addrspace(1)* %gep.4
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Worker  %tmp0 = fmul float %u, %v
518*9880d681SAndroid Build Coastguard Worker  %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
519*9880d681SAndroid Build Coastguard Worker  %tmp2 = fsub float %tmp1, %z
520*9880d681SAndroid Build Coastguard Worker
521*9880d681SAndroid Build Coastguard Worker  store float %tmp2, float addrspace(1)* %gep.out
522*9880d681SAndroid Build Coastguard Worker  ret void
523*9880d681SAndroid Build Coastguard Worker}
524*9880d681SAndroid Build Coastguard Worker
525*9880d681SAndroid Build Coastguard Worker; fold (fsub x, (fmuladd y, z, (fmul u, v)))
526*9880d681SAndroid Build Coastguard Worker;   -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
527*9880d681SAndroid Build Coastguard Worker
528*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
529*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
530*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
531*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
532*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
533*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
534*9880d681SAndroid Build Coastguard Worker
535*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
536*9880d681SAndroid Build Coastguard Worker; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
537*9880d681SAndroid Build Coastguard Worker
538*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
539*9880d681SAndroid Build Coastguard Worker; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
540*9880d681SAndroid Build Coastguard Worker
541*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
542*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
543*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
544*9880d681SAndroid Build Coastguard Worker; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
545*9880d681SAndroid Build Coastguard Worker
546*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
547*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
548*9880d681SAndroid Build Coastguard Workerdefine void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
549*9880d681SAndroid Build Coastguard Worker  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
550*9880d681SAndroid Build Coastguard Worker  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
551*9880d681SAndroid Build Coastguard Worker  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
552*9880d681SAndroid Build Coastguard Worker  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
553*9880d681SAndroid Build Coastguard Worker  %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
554*9880d681SAndroid Build Coastguard Worker  %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
555*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
556*9880d681SAndroid Build Coastguard Worker
557*9880d681SAndroid Build Coastguard Worker  %x = load volatile float, float addrspace(1)* %gep.0
558*9880d681SAndroid Build Coastguard Worker  %y = load volatile float, float addrspace(1)* %gep.1
559*9880d681SAndroid Build Coastguard Worker  %z = load volatile float, float addrspace(1)* %gep.2
560*9880d681SAndroid Build Coastguard Worker  %u = load volatile float, float addrspace(1)* %gep.3
561*9880d681SAndroid Build Coastguard Worker  %v = load volatile float, float addrspace(1)* %gep.4
562*9880d681SAndroid Build Coastguard Worker
563*9880d681SAndroid Build Coastguard Worker  %tmp0 = fmul float %u, %v
564*9880d681SAndroid Build Coastguard Worker  %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
565*9880d681SAndroid Build Coastguard Worker  %tmp2 = fsub float %x, %tmp1
566*9880d681SAndroid Build Coastguard Worker
567*9880d681SAndroid Build Coastguard Worker  store float %tmp2, float addrspace(1)* %gep.out
568*9880d681SAndroid Build Coastguard Worker  ret void
569*9880d681SAndroid Build Coastguard Worker}
570*9880d681SAndroid Build Coastguard Worker
571*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind readnone }
572*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind }
573