1*9880d681SAndroid Build Coastguard Worker //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief Insert wait instructions for memory reads and writes.
12*9880d681SAndroid Build Coastguard Worker ///
13*9880d681SAndroid Build Coastguard Worker /// Memory reads and writes are issued asynchronously, so we need to insert
14*9880d681SAndroid Build Coastguard Worker /// S_WAITCNT instructions when we want to access any of their results or
15*9880d681SAndroid Build Coastguard Worker /// overwrite any register that's used asynchronously.
16*9880d681SAndroid Build Coastguard Worker //
17*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
18*9880d681SAndroid Build Coastguard Worker
19*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
20*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
21*9880d681SAndroid Build Coastguard Worker #include "SIDefines.h"
22*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
23*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-insert-waits"
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Worker using namespace llvm;
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Worker namespace {
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Worker /// \brief One variable for each of the hardware counters
36*9880d681SAndroid Build Coastguard Worker typedef union {
37*9880d681SAndroid Build Coastguard Worker struct {
38*9880d681SAndroid Build Coastguard Worker unsigned VM;
39*9880d681SAndroid Build Coastguard Worker unsigned EXP;
40*9880d681SAndroid Build Coastguard Worker unsigned LGKM;
41*9880d681SAndroid Build Coastguard Worker } Named;
42*9880d681SAndroid Build Coastguard Worker unsigned Array[3];
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker } Counters;
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Worker typedef enum {
47*9880d681SAndroid Build Coastguard Worker OTHER,
48*9880d681SAndroid Build Coastguard Worker SMEM,
49*9880d681SAndroid Build Coastguard Worker VMEM
50*9880d681SAndroid Build Coastguard Worker } InstType;
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Worker typedef Counters RegCounters[512];
53*9880d681SAndroid Build Coastguard Worker typedef std::pair<unsigned, unsigned> RegInterval;
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Worker class SIInsertWaits : public MachineFunctionPass {
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Worker private:
58*9880d681SAndroid Build Coastguard Worker const SISubtarget *ST;
59*9880d681SAndroid Build Coastguard Worker const SIInstrInfo *TII;
60*9880d681SAndroid Build Coastguard Worker const SIRegisterInfo *TRI;
61*9880d681SAndroid Build Coastguard Worker const MachineRegisterInfo *MRI;
62*9880d681SAndroid Build Coastguard Worker
63*9880d681SAndroid Build Coastguard Worker /// \brief Constant hardware limits
64*9880d681SAndroid Build Coastguard Worker static const Counters WaitCounts;
65*9880d681SAndroid Build Coastguard Worker
66*9880d681SAndroid Build Coastguard Worker /// \brief Constant zero value
67*9880d681SAndroid Build Coastguard Worker static const Counters ZeroCounts;
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Worker /// \brief Counter values we have already waited on.
70*9880d681SAndroid Build Coastguard Worker Counters WaitedOn;
71*9880d681SAndroid Build Coastguard Worker
72*9880d681SAndroid Build Coastguard Worker /// \brief Counter values that we must wait on before the next counter
73*9880d681SAndroid Build Coastguard Worker /// increase.
74*9880d681SAndroid Build Coastguard Worker Counters DelayedWaitOn;
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Worker /// \brief Counter values for last instruction issued.
77*9880d681SAndroid Build Coastguard Worker Counters LastIssued;
78*9880d681SAndroid Build Coastguard Worker
79*9880d681SAndroid Build Coastguard Worker /// \brief Registers used by async instructions.
80*9880d681SAndroid Build Coastguard Worker RegCounters UsedRegs;
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Worker /// \brief Registers defined by async instructions.
83*9880d681SAndroid Build Coastguard Worker RegCounters DefinedRegs;
84*9880d681SAndroid Build Coastguard Worker
85*9880d681SAndroid Build Coastguard Worker /// \brief Different export instruction types seen since last wait.
86*9880d681SAndroid Build Coastguard Worker unsigned ExpInstrTypesSeen;
87*9880d681SAndroid Build Coastguard Worker
88*9880d681SAndroid Build Coastguard Worker /// \brief Type of the last opcode.
89*9880d681SAndroid Build Coastguard Worker InstType LastOpcodeType;
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Worker bool LastInstWritesM0;
92*9880d681SAndroid Build Coastguard Worker
93*9880d681SAndroid Build Coastguard Worker /// \brief Whether the machine function returns void
94*9880d681SAndroid Build Coastguard Worker bool ReturnsVoid;
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Worker /// Whether the VCCZ bit is possibly corrupt
97*9880d681SAndroid Build Coastguard Worker bool VCCZCorrupt;
98*9880d681SAndroid Build Coastguard Worker
99*9880d681SAndroid Build Coastguard Worker /// \brief Get increment/decrement amount for this instruction.
100*9880d681SAndroid Build Coastguard Worker Counters getHwCounts(MachineInstr &MI);
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker /// \brief Is operand relevant for async execution?
103*9880d681SAndroid Build Coastguard Worker bool isOpRelevant(MachineOperand &Op);
104*9880d681SAndroid Build Coastguard Worker
105*9880d681SAndroid Build Coastguard Worker /// \brief Get register interval an operand affects.
106*9880d681SAndroid Build Coastguard Worker RegInterval getRegInterval(const TargetRegisterClass *RC,
107*9880d681SAndroid Build Coastguard Worker const MachineOperand &Reg) const;
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Worker /// \brief Handle instructions async components
110*9880d681SAndroid Build Coastguard Worker void pushInstruction(MachineBasicBlock &MBB,
111*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I,
112*9880d681SAndroid Build Coastguard Worker const Counters& Increment);
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Worker /// \brief Insert the actual wait instruction
115*9880d681SAndroid Build Coastguard Worker bool insertWait(MachineBasicBlock &MBB,
116*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I,
117*9880d681SAndroid Build Coastguard Worker const Counters &Counts);
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker /// \brief Handle existing wait instructions (from intrinsics)
120*9880d681SAndroid Build Coastguard Worker void handleExistingWait(MachineBasicBlock::iterator I);
121*9880d681SAndroid Build Coastguard Worker
122*9880d681SAndroid Build Coastguard Worker /// \brief Do we need def2def checks?
123*9880d681SAndroid Build Coastguard Worker bool unorderedDefines(MachineInstr &MI);
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker /// \brief Resolve all operand dependencies to counter requirements
126*9880d681SAndroid Build Coastguard Worker Counters handleOperands(MachineInstr &MI);
127*9880d681SAndroid Build Coastguard Worker
128*9880d681SAndroid Build Coastguard Worker /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
129*9880d681SAndroid Build Coastguard Worker void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Worker /// Return true if there are LGKM instrucitons that haven't been waited on
132*9880d681SAndroid Build Coastguard Worker /// yet.
133*9880d681SAndroid Build Coastguard Worker bool hasOutstandingLGKM() const;
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Worker public:
136*9880d681SAndroid Build Coastguard Worker static char ID;
137*9880d681SAndroid Build Coastguard Worker
SIInsertWaits()138*9880d681SAndroid Build Coastguard Worker SIInsertWaits() :
139*9880d681SAndroid Build Coastguard Worker MachineFunctionPass(ID),
140*9880d681SAndroid Build Coastguard Worker ST(nullptr),
141*9880d681SAndroid Build Coastguard Worker TII(nullptr),
142*9880d681SAndroid Build Coastguard Worker TRI(nullptr),
143*9880d681SAndroid Build Coastguard Worker ExpInstrTypesSeen(0),
144*9880d681SAndroid Build Coastguard Worker VCCZCorrupt(false) { }
145*9880d681SAndroid Build Coastguard Worker
146*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
147*9880d681SAndroid Build Coastguard Worker
getPassName() const148*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
149*9880d681SAndroid Build Coastguard Worker return "SI insert wait instructions";
150*9880d681SAndroid Build Coastguard Worker }
151*9880d681SAndroid Build Coastguard Worker
getAnalysisUsage(AnalysisUsage & AU) const152*9880d681SAndroid Build Coastguard Worker void getAnalysisUsage(AnalysisUsage &AU) const override {
153*9880d681SAndroid Build Coastguard Worker AU.setPreservesCFG();
154*9880d681SAndroid Build Coastguard Worker MachineFunctionPass::getAnalysisUsage(AU);
155*9880d681SAndroid Build Coastguard Worker }
156*9880d681SAndroid Build Coastguard Worker };
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
161*9880d681SAndroid Build Coastguard Worker "SI Insert Waits", false, false)
162*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(SIInsertWaits, DEBUG_TYPE,
163*9880d681SAndroid Build Coastguard Worker "SI Insert Waits", false, false)
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Worker char SIInsertWaits::ID = 0;
166*9880d681SAndroid Build Coastguard Worker
167*9880d681SAndroid Build Coastguard Worker char &llvm::SIInsertWaitsID = SIInsertWaits::ID;
168*9880d681SAndroid Build Coastguard Worker
createSIInsertWaitsPass()169*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSIInsertWaitsPass() {
170*9880d681SAndroid Build Coastguard Worker return new SIInsertWaits();
171*9880d681SAndroid Build Coastguard Worker }
172*9880d681SAndroid Build Coastguard Worker
173*9880d681SAndroid Build Coastguard Worker const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
174*9880d681SAndroid Build Coastguard Worker const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
175*9880d681SAndroid Build Coastguard Worker
readsVCCZ(unsigned Opcode)176*9880d681SAndroid Build Coastguard Worker static bool readsVCCZ(unsigned Opcode) {
177*9880d681SAndroid Build Coastguard Worker return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCZ;
178*9880d681SAndroid Build Coastguard Worker }
179*9880d681SAndroid Build Coastguard Worker
hasOutstandingLGKM() const180*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::hasOutstandingLGKM() const {
181*9880d681SAndroid Build Coastguard Worker return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
182*9880d681SAndroid Build Coastguard Worker }
183*9880d681SAndroid Build Coastguard Worker
getHwCounts(MachineInstr & MI)184*9880d681SAndroid Build Coastguard Worker Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
185*9880d681SAndroid Build Coastguard Worker uint64_t TSFlags = MI.getDesc().TSFlags;
186*9880d681SAndroid Build Coastguard Worker Counters Result = { { 0, 0, 0 } };
187*9880d681SAndroid Build Coastguard Worker
188*9880d681SAndroid Build Coastguard Worker Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Worker // Only consider stores or EXP for EXP_CNT
191*9880d681SAndroid Build Coastguard Worker Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
192*9880d681SAndroid Build Coastguard Worker (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
193*9880d681SAndroid Build Coastguard Worker
194*9880d681SAndroid Build Coastguard Worker // LGKM may uses larger values
195*9880d681SAndroid Build Coastguard Worker if (TSFlags & SIInstrFlags::LGKM_CNT) {
196*9880d681SAndroid Build Coastguard Worker
197*9880d681SAndroid Build Coastguard Worker if (TII->isSMRD(MI)) {
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Worker if (MI.getNumOperands() != 0) {
200*9880d681SAndroid Build Coastguard Worker assert(MI.getOperand(0).isReg() &&
201*9880d681SAndroid Build Coastguard Worker "First LGKM operand must be a register!");
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Worker // XXX - What if this is a write into a super register?
204*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
205*9880d681SAndroid Build Coastguard Worker unsigned Size = RC->getSize();
206*9880d681SAndroid Build Coastguard Worker Result.Named.LGKM = Size > 4 ? 2 : 1;
207*9880d681SAndroid Build Coastguard Worker } else {
208*9880d681SAndroid Build Coastguard Worker // s_dcache_inv etc. do not have a a destination register. Assume we
209*9880d681SAndroid Build Coastguard Worker // want a wait on these.
210*9880d681SAndroid Build Coastguard Worker // XXX - What is the right value?
211*9880d681SAndroid Build Coastguard Worker Result.Named.LGKM = 1;
212*9880d681SAndroid Build Coastguard Worker }
213*9880d681SAndroid Build Coastguard Worker } else {
214*9880d681SAndroid Build Coastguard Worker // DS
215*9880d681SAndroid Build Coastguard Worker Result.Named.LGKM = 1;
216*9880d681SAndroid Build Coastguard Worker }
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker } else {
219*9880d681SAndroid Build Coastguard Worker Result.Named.LGKM = 0;
220*9880d681SAndroid Build Coastguard Worker }
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Worker return Result;
223*9880d681SAndroid Build Coastguard Worker }
224*9880d681SAndroid Build Coastguard Worker
isOpRelevant(MachineOperand & Op)225*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
226*9880d681SAndroid Build Coastguard Worker // Constants are always irrelevant
227*9880d681SAndroid Build Coastguard Worker if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
228*9880d681SAndroid Build Coastguard Worker return false;
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker // Defines are always relevant
231*9880d681SAndroid Build Coastguard Worker if (Op.isDef())
232*9880d681SAndroid Build Coastguard Worker return true;
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Worker // For exports all registers are relevant
235*9880d681SAndroid Build Coastguard Worker MachineInstr &MI = *Op.getParent();
236*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == AMDGPU::EXP)
237*9880d681SAndroid Build Coastguard Worker return true;
238*9880d681SAndroid Build Coastguard Worker
239*9880d681SAndroid Build Coastguard Worker // For stores the stored value is also relevant
240*9880d681SAndroid Build Coastguard Worker if (!MI.getDesc().mayStore())
241*9880d681SAndroid Build Coastguard Worker return false;
242*9880d681SAndroid Build Coastguard Worker
243*9880d681SAndroid Build Coastguard Worker // Check if this operand is the value being stored.
244*9880d681SAndroid Build Coastguard Worker // Special case for DS/FLAT instructions, since the address
245*9880d681SAndroid Build Coastguard Worker // operand comes before the value operand and it may have
246*9880d681SAndroid Build Coastguard Worker // multiple data operands.
247*9880d681SAndroid Build Coastguard Worker
248*9880d681SAndroid Build Coastguard Worker if (TII->isDS(MI) || TII->isFLAT(MI)) {
249*9880d681SAndroid Build Coastguard Worker MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
250*9880d681SAndroid Build Coastguard Worker if (Data && Op.isIdenticalTo(*Data))
251*9880d681SAndroid Build Coastguard Worker return true;
252*9880d681SAndroid Build Coastguard Worker }
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker if (TII->isDS(MI)) {
255*9880d681SAndroid Build Coastguard Worker MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
256*9880d681SAndroid Build Coastguard Worker if (Data0 && Op.isIdenticalTo(*Data0))
257*9880d681SAndroid Build Coastguard Worker return true;
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
260*9880d681SAndroid Build Coastguard Worker return Data1 && Op.isIdenticalTo(*Data1);
261*9880d681SAndroid Build Coastguard Worker }
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker // NOTE: This assumes that the value operand is before the
264*9880d681SAndroid Build Coastguard Worker // address operand, and that there is only one value operand.
265*9880d681SAndroid Build Coastguard Worker for (MachineInstr::mop_iterator I = MI.operands_begin(),
266*9880d681SAndroid Build Coastguard Worker E = MI.operands_end(); I != E; ++I) {
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker if (I->isReg() && I->isUse())
269*9880d681SAndroid Build Coastguard Worker return Op.isIdenticalTo(*I);
270*9880d681SAndroid Build Coastguard Worker }
271*9880d681SAndroid Build Coastguard Worker
272*9880d681SAndroid Build Coastguard Worker return false;
273*9880d681SAndroid Build Coastguard Worker }
274*9880d681SAndroid Build Coastguard Worker
getRegInterval(const TargetRegisterClass * RC,const MachineOperand & Reg) const275*9880d681SAndroid Build Coastguard Worker RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
276*9880d681SAndroid Build Coastguard Worker const MachineOperand &Reg) const {
277*9880d681SAndroid Build Coastguard Worker unsigned Size = RC->getSize();
278*9880d681SAndroid Build Coastguard Worker assert(Size >= 4);
279*9880d681SAndroid Build Coastguard Worker
280*9880d681SAndroid Build Coastguard Worker RegInterval Result;
281*9880d681SAndroid Build Coastguard Worker Result.first = TRI->getEncodingValue(Reg.getReg());
282*9880d681SAndroid Build Coastguard Worker Result.second = Result.first + Size / 4;
283*9880d681SAndroid Build Coastguard Worker
284*9880d681SAndroid Build Coastguard Worker return Result;
285*9880d681SAndroid Build Coastguard Worker }
286*9880d681SAndroid Build Coastguard Worker
pushInstruction(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Increment)287*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
288*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I,
289*9880d681SAndroid Build Coastguard Worker const Counters &Increment) {
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker // Get the hardware counter increments and sum them up
292*9880d681SAndroid Build Coastguard Worker Counters Limit = ZeroCounts;
293*9880d681SAndroid Build Coastguard Worker unsigned Sum = 0;
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < 3; ++i) {
296*9880d681SAndroid Build Coastguard Worker LastIssued.Array[i] += Increment.Array[i];
297*9880d681SAndroid Build Coastguard Worker if (Increment.Array[i])
298*9880d681SAndroid Build Coastguard Worker Limit.Array[i] = LastIssued.Array[i];
299*9880d681SAndroid Build Coastguard Worker Sum += Increment.Array[i];
300*9880d681SAndroid Build Coastguard Worker }
301*9880d681SAndroid Build Coastguard Worker
302*9880d681SAndroid Build Coastguard Worker // If we don't increase anything then that's it
303*9880d681SAndroid Build Coastguard Worker if (Sum == 0) {
304*9880d681SAndroid Build Coastguard Worker LastOpcodeType = OTHER;
305*9880d681SAndroid Build Coastguard Worker return;
306*9880d681SAndroid Build Coastguard Worker }
307*9880d681SAndroid Build Coastguard Worker
308*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
309*9880d681SAndroid Build Coastguard Worker // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
310*9880d681SAndroid Build Coastguard Worker // or SMEM clause, respectively.
311*9880d681SAndroid Build Coastguard Worker //
312*9880d681SAndroid Build Coastguard Worker // The temporary workaround is to break the clauses with S_NOP.
313*9880d681SAndroid Build Coastguard Worker //
314*9880d681SAndroid Build Coastguard Worker // The proper solution would be to allocate registers such that all source
315*9880d681SAndroid Build Coastguard Worker // and destination registers don't overlap, e.g. this is illegal:
316*9880d681SAndroid Build Coastguard Worker // r0 = load r2
317*9880d681SAndroid Build Coastguard Worker // r2 = load r0
318*9880d681SAndroid Build Coastguard Worker if (LastOpcodeType == VMEM && Increment.Named.VM) {
319*9880d681SAndroid Build Coastguard Worker // Insert a NOP to break the clause.
320*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
321*9880d681SAndroid Build Coastguard Worker .addImm(0);
322*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = false;
323*9880d681SAndroid Build Coastguard Worker }
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Worker if (TII->isSMRD(*I))
326*9880d681SAndroid Build Coastguard Worker LastOpcodeType = SMEM;
327*9880d681SAndroid Build Coastguard Worker else if (Increment.Named.VM)
328*9880d681SAndroid Build Coastguard Worker LastOpcodeType = VMEM;
329*9880d681SAndroid Build Coastguard Worker }
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Worker // Remember which export instructions we have seen
332*9880d681SAndroid Build Coastguard Worker if (Increment.Named.EXP) {
333*9880d681SAndroid Build Coastguard Worker ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
334*9880d681SAndroid Build Coastguard Worker }
335*9880d681SAndroid Build Coastguard Worker
336*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
337*9880d681SAndroid Build Coastguard Worker MachineOperand &Op = I->getOperand(i);
338*9880d681SAndroid Build Coastguard Worker if (!isOpRelevant(Op))
339*9880d681SAndroid Build Coastguard Worker continue;
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
342*9880d681SAndroid Build Coastguard Worker RegInterval Interval = getRegInterval(RC, Op);
343*9880d681SAndroid Build Coastguard Worker for (unsigned j = Interval.first; j < Interval.second; ++j) {
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Worker // Remember which registers we define
346*9880d681SAndroid Build Coastguard Worker if (Op.isDef())
347*9880d681SAndroid Build Coastguard Worker DefinedRegs[j] = Limit;
348*9880d681SAndroid Build Coastguard Worker
349*9880d681SAndroid Build Coastguard Worker // and which one we are using
350*9880d681SAndroid Build Coastguard Worker if (Op.isUse())
351*9880d681SAndroid Build Coastguard Worker UsedRegs[j] = Limit;
352*9880d681SAndroid Build Coastguard Worker }
353*9880d681SAndroid Build Coastguard Worker }
354*9880d681SAndroid Build Coastguard Worker }
355*9880d681SAndroid Build Coastguard Worker
insertWait(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Required)356*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
357*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I,
358*9880d681SAndroid Build Coastguard Worker const Counters &Required) {
359*9880d681SAndroid Build Coastguard Worker
360*9880d681SAndroid Build Coastguard Worker // End of program? No need to wait on anything
361*9880d681SAndroid Build Coastguard Worker // A function not returning void needs to wait, because other bytecode will
362*9880d681SAndroid Build Coastguard Worker // be appended after it and we don't know what it will be.
363*9880d681SAndroid Build Coastguard Worker if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
364*9880d681SAndroid Build Coastguard Worker return false;
365*9880d681SAndroid Build Coastguard Worker
366*9880d681SAndroid Build Coastguard Worker // Figure out if the async instructions execute in order
367*9880d681SAndroid Build Coastguard Worker bool Ordered[3];
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Worker // VM_CNT is always ordered
370*9880d681SAndroid Build Coastguard Worker Ordered[0] = true;
371*9880d681SAndroid Build Coastguard Worker
372*9880d681SAndroid Build Coastguard Worker // EXP_CNT is unordered if we have both EXP & VM-writes
373*9880d681SAndroid Build Coastguard Worker Ordered[1] = ExpInstrTypesSeen == 3;
374*9880d681SAndroid Build Coastguard Worker
375*9880d681SAndroid Build Coastguard Worker // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
376*9880d681SAndroid Build Coastguard Worker Ordered[2] = false;
377*9880d681SAndroid Build Coastguard Worker
378*9880d681SAndroid Build Coastguard Worker // The values we are going to put into the S_WAITCNT instruction
379*9880d681SAndroid Build Coastguard Worker Counters Counts = WaitCounts;
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Worker // Do we really need to wait?
382*9880d681SAndroid Build Coastguard Worker bool NeedWait = false;
383*9880d681SAndroid Build Coastguard Worker
384*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < 3; ++i) {
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker if (Required.Array[i] <= WaitedOn.Array[i])
387*9880d681SAndroid Build Coastguard Worker continue;
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Worker NeedWait = true;
390*9880d681SAndroid Build Coastguard Worker
391*9880d681SAndroid Build Coastguard Worker if (Ordered[i]) {
392*9880d681SAndroid Build Coastguard Worker unsigned Value = LastIssued.Array[i] - Required.Array[i];
393*9880d681SAndroid Build Coastguard Worker
394*9880d681SAndroid Build Coastguard Worker // Adjust the value to the real hardware possibilities.
395*9880d681SAndroid Build Coastguard Worker Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Worker } else
398*9880d681SAndroid Build Coastguard Worker Counts.Array[i] = 0;
399*9880d681SAndroid Build Coastguard Worker
400*9880d681SAndroid Build Coastguard Worker // Remember on what we have waited on.
401*9880d681SAndroid Build Coastguard Worker WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
402*9880d681SAndroid Build Coastguard Worker }
403*9880d681SAndroid Build Coastguard Worker
404*9880d681SAndroid Build Coastguard Worker if (!NeedWait)
405*9880d681SAndroid Build Coastguard Worker return false;
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Worker // Reset EXP_CNT instruction types
408*9880d681SAndroid Build Coastguard Worker if (Counts.Named.EXP == 0)
409*9880d681SAndroid Build Coastguard Worker ExpInstrTypesSeen = 0;
410*9880d681SAndroid Build Coastguard Worker
411*9880d681SAndroid Build Coastguard Worker // Build the wait instruction
412*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
413*9880d681SAndroid Build Coastguard Worker .addImm((Counts.Named.VM & 0xF) |
414*9880d681SAndroid Build Coastguard Worker ((Counts.Named.EXP & 0x7) << 4) |
415*9880d681SAndroid Build Coastguard Worker ((Counts.Named.LGKM & 0xF) << 8));
416*9880d681SAndroid Build Coastguard Worker
417*9880d681SAndroid Build Coastguard Worker LastOpcodeType = OTHER;
418*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = false;
419*9880d681SAndroid Build Coastguard Worker return true;
420*9880d681SAndroid Build Coastguard Worker }
421*9880d681SAndroid Build Coastguard Worker
422*9880d681SAndroid Build Coastguard Worker /// \brief helper function for handleOperands
increaseCounters(Counters & Dst,const Counters & Src)423*9880d681SAndroid Build Coastguard Worker static void increaseCounters(Counters &Dst, const Counters &Src) {
424*9880d681SAndroid Build Coastguard Worker
425*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < 3; ++i)
426*9880d681SAndroid Build Coastguard Worker Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
427*9880d681SAndroid Build Coastguard Worker }
428*9880d681SAndroid Build Coastguard Worker
429*9880d681SAndroid Build Coastguard Worker /// \brief check whether any of the counters is non-zero
countersNonZero(const Counters & Counter)430*9880d681SAndroid Build Coastguard Worker static bool countersNonZero(const Counters &Counter) {
431*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < 3; ++i)
432*9880d681SAndroid Build Coastguard Worker if (Counter.Array[i])
433*9880d681SAndroid Build Coastguard Worker return true;
434*9880d681SAndroid Build Coastguard Worker return false;
435*9880d681SAndroid Build Coastguard Worker }
436*9880d681SAndroid Build Coastguard Worker
handleExistingWait(MachineBasicBlock::iterator I)437*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
438*9880d681SAndroid Build Coastguard Worker assert(I->getOpcode() == AMDGPU::S_WAITCNT);
439*9880d681SAndroid Build Coastguard Worker
440*9880d681SAndroid Build Coastguard Worker unsigned Imm = I->getOperand(0).getImm();
441*9880d681SAndroid Build Coastguard Worker Counters Counts, WaitOn;
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Worker Counts.Named.VM = Imm & 0xF;
444*9880d681SAndroid Build Coastguard Worker Counts.Named.EXP = (Imm >> 4) & 0x7;
445*9880d681SAndroid Build Coastguard Worker Counts.Named.LGKM = (Imm >> 8) & 0xF;
446*9880d681SAndroid Build Coastguard Worker
447*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < 3; ++i) {
448*9880d681SAndroid Build Coastguard Worker if (Counts.Array[i] <= LastIssued.Array[i])
449*9880d681SAndroid Build Coastguard Worker WaitOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
450*9880d681SAndroid Build Coastguard Worker else
451*9880d681SAndroid Build Coastguard Worker WaitOn.Array[i] = 0;
452*9880d681SAndroid Build Coastguard Worker }
453*9880d681SAndroid Build Coastguard Worker
454*9880d681SAndroid Build Coastguard Worker increaseCounters(DelayedWaitOn, WaitOn);
455*9880d681SAndroid Build Coastguard Worker }
456*9880d681SAndroid Build Coastguard Worker
handleOperands(MachineInstr & MI)457*9880d681SAndroid Build Coastguard Worker Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
458*9880d681SAndroid Build Coastguard Worker
459*9880d681SAndroid Build Coastguard Worker Counters Result = ZeroCounts;
460*9880d681SAndroid Build Coastguard Worker
461*9880d681SAndroid Build Coastguard Worker // For each register affected by this instruction increase the result
462*9880d681SAndroid Build Coastguard Worker // sequence.
463*9880d681SAndroid Build Coastguard Worker //
464*9880d681SAndroid Build Coastguard Worker // TODO: We could probably just look at explicit operands if we removed VCC /
465*9880d681SAndroid Build Coastguard Worker // EXEC from SMRD dest reg classes.
466*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
467*9880d681SAndroid Build Coastguard Worker MachineOperand &Op = MI.getOperand(i);
468*9880d681SAndroid Build Coastguard Worker if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
469*9880d681SAndroid Build Coastguard Worker continue;
470*9880d681SAndroid Build Coastguard Worker
471*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
472*9880d681SAndroid Build Coastguard Worker RegInterval Interval = getRegInterval(RC, Op);
473*9880d681SAndroid Build Coastguard Worker for (unsigned j = Interval.first; j < Interval.second; ++j) {
474*9880d681SAndroid Build Coastguard Worker
475*9880d681SAndroid Build Coastguard Worker if (Op.isDef()) {
476*9880d681SAndroid Build Coastguard Worker increaseCounters(Result, UsedRegs[j]);
477*9880d681SAndroid Build Coastguard Worker increaseCounters(Result, DefinedRegs[j]);
478*9880d681SAndroid Build Coastguard Worker }
479*9880d681SAndroid Build Coastguard Worker
480*9880d681SAndroid Build Coastguard Worker if (Op.isUse())
481*9880d681SAndroid Build Coastguard Worker increaseCounters(Result, DefinedRegs[j]);
482*9880d681SAndroid Build Coastguard Worker }
483*9880d681SAndroid Build Coastguard Worker }
484*9880d681SAndroid Build Coastguard Worker
485*9880d681SAndroid Build Coastguard Worker return Result;
486*9880d681SAndroid Build Coastguard Worker }
487*9880d681SAndroid Build Coastguard Worker
handleSendMsg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)488*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
489*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I) {
490*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
491*9880d681SAndroid Build Coastguard Worker return;
492*9880d681SAndroid Build Coastguard Worker
493*9880d681SAndroid Build Coastguard Worker // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
494*9880d681SAndroid Build Coastguard Worker if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
495*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
496*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = false;
497*9880d681SAndroid Build Coastguard Worker return;
498*9880d681SAndroid Build Coastguard Worker }
499*9880d681SAndroid Build Coastguard Worker
500*9880d681SAndroid Build Coastguard Worker // Set whether this instruction sets M0
501*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = false;
502*9880d681SAndroid Build Coastguard Worker
503*9880d681SAndroid Build Coastguard Worker unsigned NumOperands = I->getNumOperands();
504*9880d681SAndroid Build Coastguard Worker for (unsigned i = 0; i < NumOperands; i++) {
505*9880d681SAndroid Build Coastguard Worker const MachineOperand &Op = I->getOperand(i);
506*9880d681SAndroid Build Coastguard Worker
507*9880d681SAndroid Build Coastguard Worker if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
508*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = true;
509*9880d681SAndroid Build Coastguard Worker }
510*9880d681SAndroid Build Coastguard Worker }
511*9880d681SAndroid Build Coastguard Worker
512*9880d681SAndroid Build Coastguard Worker // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
513*9880d681SAndroid Build Coastguard Worker // around other non-memory instructions.
runOnMachineFunction(MachineFunction & MF)514*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
515*9880d681SAndroid Build Coastguard Worker bool Changes = false;
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Worker ST = &MF.getSubtarget<SISubtarget>();
518*9880d681SAndroid Build Coastguard Worker TII = ST->getInstrInfo();
519*9880d681SAndroid Build Coastguard Worker TRI = &TII->getRegisterInfo();
520*9880d681SAndroid Build Coastguard Worker MRI = &MF.getRegInfo();
521*9880d681SAndroid Build Coastguard Worker
522*9880d681SAndroid Build Coastguard Worker WaitedOn = ZeroCounts;
523*9880d681SAndroid Build Coastguard Worker DelayedWaitOn = ZeroCounts;
524*9880d681SAndroid Build Coastguard Worker LastIssued = ZeroCounts;
525*9880d681SAndroid Build Coastguard Worker LastOpcodeType = OTHER;
526*9880d681SAndroid Build Coastguard Worker LastInstWritesM0 = false;
527*9880d681SAndroid Build Coastguard Worker ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
528*9880d681SAndroid Build Coastguard Worker
529*9880d681SAndroid Build Coastguard Worker memset(&UsedRegs, 0, sizeof(UsedRegs));
530*9880d681SAndroid Build Coastguard Worker memset(&DefinedRegs, 0, sizeof(DefinedRegs));
531*9880d681SAndroid Build Coastguard Worker
532*9880d681SAndroid Build Coastguard Worker SmallVector<MachineInstr *, 4> RemoveMI;
533*9880d681SAndroid Build Coastguard Worker
534*9880d681SAndroid Build Coastguard Worker for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
535*9880d681SAndroid Build Coastguard Worker BI != BE; ++BI) {
536*9880d681SAndroid Build Coastguard Worker
537*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *BI;
538*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
539*9880d681SAndroid Build Coastguard Worker I != E; ++I) {
540*9880d681SAndroid Build Coastguard Worker
541*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
542*9880d681SAndroid Build Coastguard Worker // There is a hardware bug on CI/SI where SMRD instruction may corrupt
543*9880d681SAndroid Build Coastguard Worker // vccz bit, so when we detect that an instruction may read from a
544*9880d681SAndroid Build Coastguard Worker // corrupt vccz bit, we need to:
545*9880d681SAndroid Build Coastguard Worker // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
546*9880d681SAndroid Build Coastguard Worker // complete.
547*9880d681SAndroid Build Coastguard Worker // 2. Restore the correct value of vccz by writing the current value
548*9880d681SAndroid Build Coastguard Worker // of vcc back to vcc.
549*9880d681SAndroid Build Coastguard Worker
550*9880d681SAndroid Build Coastguard Worker if (TII->isSMRD(I->getOpcode())) {
551*9880d681SAndroid Build Coastguard Worker VCCZCorrupt = true;
552*9880d681SAndroid Build Coastguard Worker } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
553*9880d681SAndroid Build Coastguard Worker // FIXME: We only care about SMRD instructions here, not LDS or GDS.
554*9880d681SAndroid Build Coastguard Worker // Whenever we store a value in vcc, the correct value of vccz is
555*9880d681SAndroid Build Coastguard Worker // restored.
556*9880d681SAndroid Build Coastguard Worker VCCZCorrupt = false;
557*9880d681SAndroid Build Coastguard Worker }
558*9880d681SAndroid Build Coastguard Worker
559*9880d681SAndroid Build Coastguard Worker // Check if we need to apply the bug work-around
560*9880d681SAndroid Build Coastguard Worker if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
561*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
562*9880d681SAndroid Build Coastguard Worker
563*9880d681SAndroid Build Coastguard Worker // Wait on everything, not just LGKM. vccz reads usually come from
564*9880d681SAndroid Build Coastguard Worker // terminators, and we always wait on everything at the end of the
565*9880d681SAndroid Build Coastguard Worker // block, so if we only wait on LGKM here, we might end up with
566*9880d681SAndroid Build Coastguard Worker // another s_waitcnt inserted right after this if there are non-LGKM
567*9880d681SAndroid Build Coastguard Worker // instructions still outstanding.
568*9880d681SAndroid Build Coastguard Worker insertWait(MBB, I, LastIssued);
569*9880d681SAndroid Build Coastguard Worker
570*9880d681SAndroid Build Coastguard Worker // Restore the vccz bit. Any time a value is written to vcc, the vcc
571*9880d681SAndroid Build Coastguard Worker // bit is updated, so we can restore the bit by reading the value of
572*9880d681SAndroid Build Coastguard Worker // vcc and then writing it back to the register.
573*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
574*9880d681SAndroid Build Coastguard Worker AMDGPU::VCC)
575*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VCC);
576*9880d681SAndroid Build Coastguard Worker }
577*9880d681SAndroid Build Coastguard Worker }
578*9880d681SAndroid Build Coastguard Worker
579*9880d681SAndroid Build Coastguard Worker // Record pre-existing, explicitly requested waits
580*9880d681SAndroid Build Coastguard Worker if (I->getOpcode() == AMDGPU::S_WAITCNT) {
581*9880d681SAndroid Build Coastguard Worker handleExistingWait(*I);
582*9880d681SAndroid Build Coastguard Worker RemoveMI.push_back(&*I);
583*9880d681SAndroid Build Coastguard Worker continue;
584*9880d681SAndroid Build Coastguard Worker }
585*9880d681SAndroid Build Coastguard Worker
586*9880d681SAndroid Build Coastguard Worker Counters Required;
587*9880d681SAndroid Build Coastguard Worker
588*9880d681SAndroid Build Coastguard Worker // Wait for everything before a barrier.
589*9880d681SAndroid Build Coastguard Worker //
590*9880d681SAndroid Build Coastguard Worker // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
591*9880d681SAndroid Build Coastguard Worker // but we also want to wait for any other outstanding transfers before
592*9880d681SAndroid Build Coastguard Worker // signalling other hardware blocks
593*9880d681SAndroid Build Coastguard Worker if (I->getOpcode() == AMDGPU::S_BARRIER ||
594*9880d681SAndroid Build Coastguard Worker I->getOpcode() == AMDGPU::S_SENDMSG)
595*9880d681SAndroid Build Coastguard Worker Required = LastIssued;
596*9880d681SAndroid Build Coastguard Worker else
597*9880d681SAndroid Build Coastguard Worker Required = handleOperands(*I);
598*9880d681SAndroid Build Coastguard Worker
599*9880d681SAndroid Build Coastguard Worker Counters Increment = getHwCounts(*I);
600*9880d681SAndroid Build Coastguard Worker
601*9880d681SAndroid Build Coastguard Worker if (countersNonZero(Required) || countersNonZero(Increment))
602*9880d681SAndroid Build Coastguard Worker increaseCounters(Required, DelayedWaitOn);
603*9880d681SAndroid Build Coastguard Worker
604*9880d681SAndroid Build Coastguard Worker Changes |= insertWait(MBB, I, Required);
605*9880d681SAndroid Build Coastguard Worker
606*9880d681SAndroid Build Coastguard Worker pushInstruction(MBB, I, Increment);
607*9880d681SAndroid Build Coastguard Worker handleSendMsg(MBB, I);
608*9880d681SAndroid Build Coastguard Worker }
609*9880d681SAndroid Build Coastguard Worker
610*9880d681SAndroid Build Coastguard Worker // Wait for everything at the end of the MBB
611*9880d681SAndroid Build Coastguard Worker Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
612*9880d681SAndroid Build Coastguard Worker }
613*9880d681SAndroid Build Coastguard Worker
614*9880d681SAndroid Build Coastguard Worker for (MachineInstr *I : RemoveMI)
615*9880d681SAndroid Build Coastguard Worker I->eraseFromParent();
616*9880d681SAndroid Build Coastguard Worker
617*9880d681SAndroid Build Coastguard Worker return Changes;
618*9880d681SAndroid Build Coastguard Worker }
619