xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// This pass compute turns all control flow pseudo instructions into native one
12*9880d681SAndroid Build Coastguard Worker /// computing their address on the fly ; it also sets STACK_SIZE info.
13*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
14*9880d681SAndroid Build Coastguard Worker 
15*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
16*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
17*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
18*9880d681SAndroid Build Coastguard Worker #include "R600Defines.h"
19*9880d681SAndroid Build Coastguard Worker #include "R600InstrInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "R600MachineFunctionInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "R600RegisterInfo.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
26*9880d681SAndroid Build Coastguard Worker 
27*9880d681SAndroid Build Coastguard Worker using namespace llvm;
28*9880d681SAndroid Build Coastguard Worker 
29*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "r600cf"
30*9880d681SAndroid Build Coastguard Worker 
31*9880d681SAndroid Build Coastguard Worker namespace {
32*9880d681SAndroid Build Coastguard Worker 
33*9880d681SAndroid Build Coastguard Worker struct CFStack {
34*9880d681SAndroid Build Coastguard Worker 
35*9880d681SAndroid Build Coastguard Worker   enum StackItem {
36*9880d681SAndroid Build Coastguard Worker     ENTRY = 0,
37*9880d681SAndroid Build Coastguard Worker     SUB_ENTRY = 1,
38*9880d681SAndroid Build Coastguard Worker     FIRST_NON_WQM_PUSH = 2,
39*9880d681SAndroid Build Coastguard Worker     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
40*9880d681SAndroid Build Coastguard Worker   };
41*9880d681SAndroid Build Coastguard Worker 
42*9880d681SAndroid Build Coastguard Worker   const R600Subtarget *ST;
43*9880d681SAndroid Build Coastguard Worker   std::vector<StackItem> BranchStack;
44*9880d681SAndroid Build Coastguard Worker   std::vector<StackItem> LoopStack;
45*9880d681SAndroid Build Coastguard Worker   unsigned MaxStackSize;
46*9880d681SAndroid Build Coastguard Worker   unsigned CurrentEntries;
47*9880d681SAndroid Build Coastguard Worker   unsigned CurrentSubEntries;
48*9880d681SAndroid Build Coastguard Worker 
CFStack__anonbbe3dcb00111::CFStack49*9880d681SAndroid Build Coastguard Worker   CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
50*9880d681SAndroid Build Coastguard Worker       // We need to reserve a stack entry for CALL_FS in vertex shaders.
51*9880d681SAndroid Build Coastguard Worker       MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
52*9880d681SAndroid Build Coastguard Worker       CurrentEntries(0), CurrentSubEntries(0) { }
53*9880d681SAndroid Build Coastguard Worker 
54*9880d681SAndroid Build Coastguard Worker   unsigned getLoopDepth();
55*9880d681SAndroid Build Coastguard Worker   bool branchStackContains(CFStack::StackItem);
56*9880d681SAndroid Build Coastguard Worker   bool requiresWorkAroundForInst(unsigned Opcode);
57*9880d681SAndroid Build Coastguard Worker   unsigned getSubEntrySize(CFStack::StackItem Item);
58*9880d681SAndroid Build Coastguard Worker   void updateMaxStackSize();
59*9880d681SAndroid Build Coastguard Worker   void pushBranch(unsigned Opcode, bool isWQM = false);
60*9880d681SAndroid Build Coastguard Worker   void pushLoop();
61*9880d681SAndroid Build Coastguard Worker   void popBranch();
62*9880d681SAndroid Build Coastguard Worker   void popLoop();
63*9880d681SAndroid Build Coastguard Worker };
64*9880d681SAndroid Build Coastguard Worker 
getLoopDepth()65*9880d681SAndroid Build Coastguard Worker unsigned CFStack::getLoopDepth() {
66*9880d681SAndroid Build Coastguard Worker   return LoopStack.size();
67*9880d681SAndroid Build Coastguard Worker }
68*9880d681SAndroid Build Coastguard Worker 
branchStackContains(CFStack::StackItem Item)69*9880d681SAndroid Build Coastguard Worker bool CFStack::branchStackContains(CFStack::StackItem Item) {
70*9880d681SAndroid Build Coastguard Worker   for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
71*9880d681SAndroid Build Coastguard Worker        E = BranchStack.end(); I != E; ++I) {
72*9880d681SAndroid Build Coastguard Worker     if (*I == Item)
73*9880d681SAndroid Build Coastguard Worker       return true;
74*9880d681SAndroid Build Coastguard Worker   }
75*9880d681SAndroid Build Coastguard Worker   return false;
76*9880d681SAndroid Build Coastguard Worker }
77*9880d681SAndroid Build Coastguard Worker 
requiresWorkAroundForInst(unsigned Opcode)78*9880d681SAndroid Build Coastguard Worker bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
79*9880d681SAndroid Build Coastguard Worker   if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
80*9880d681SAndroid Build Coastguard Worker       getLoopDepth() > 1)
81*9880d681SAndroid Build Coastguard Worker     return true;
82*9880d681SAndroid Build Coastguard Worker 
83*9880d681SAndroid Build Coastguard Worker   if (!ST->hasCFAluBug())
84*9880d681SAndroid Build Coastguard Worker     return false;
85*9880d681SAndroid Build Coastguard Worker 
86*9880d681SAndroid Build Coastguard Worker   switch(Opcode) {
87*9880d681SAndroid Build Coastguard Worker   default: return false;
88*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_ALU_PUSH_BEFORE:
89*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_ALU_ELSE_AFTER:
90*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_ALU_BREAK:
91*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_ALU_CONTINUE:
92*9880d681SAndroid Build Coastguard Worker     if (CurrentSubEntries == 0)
93*9880d681SAndroid Build Coastguard Worker       return false;
94*9880d681SAndroid Build Coastguard Worker     if (ST->getWavefrontSize() == 64) {
95*9880d681SAndroid Build Coastguard Worker       // We are being conservative here.  We only require this work-around if
96*9880d681SAndroid Build Coastguard Worker       // CurrentSubEntries > 3 &&
97*9880d681SAndroid Build Coastguard Worker       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
98*9880d681SAndroid Build Coastguard Worker       //
99*9880d681SAndroid Build Coastguard Worker       // We have to be conservative, because we don't know for certain that
100*9880d681SAndroid Build Coastguard Worker       // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
101*9880d681SAndroid Build Coastguard Worker       // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
102*9880d681SAndroid Build Coastguard Worker       // resources without any problems.
103*9880d681SAndroid Build Coastguard Worker       return CurrentSubEntries > 3;
104*9880d681SAndroid Build Coastguard Worker     } else {
105*9880d681SAndroid Build Coastguard Worker       assert(ST->getWavefrontSize() == 32);
106*9880d681SAndroid Build Coastguard Worker       // We are being conservative here.  We only require the work-around if
107*9880d681SAndroid Build Coastguard Worker       // CurrentSubEntries > 7 &&
108*9880d681SAndroid Build Coastguard Worker       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
109*9880d681SAndroid Build Coastguard Worker       // See the comment on the wavefront size == 64 case for why we are
110*9880d681SAndroid Build Coastguard Worker       // being conservative.
111*9880d681SAndroid Build Coastguard Worker       return CurrentSubEntries > 7;
112*9880d681SAndroid Build Coastguard Worker     }
113*9880d681SAndroid Build Coastguard Worker   }
114*9880d681SAndroid Build Coastguard Worker }
115*9880d681SAndroid Build Coastguard Worker 
getSubEntrySize(CFStack::StackItem Item)116*9880d681SAndroid Build Coastguard Worker unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
117*9880d681SAndroid Build Coastguard Worker   switch(Item) {
118*9880d681SAndroid Build Coastguard Worker   default:
119*9880d681SAndroid Build Coastguard Worker     return 0;
120*9880d681SAndroid Build Coastguard Worker   case CFStack::FIRST_NON_WQM_PUSH:
121*9880d681SAndroid Build Coastguard Worker   assert(!ST->hasCaymanISA());
122*9880d681SAndroid Build Coastguard Worker   if (ST->getGeneration() <= R600Subtarget::R700) {
123*9880d681SAndroid Build Coastguard Worker     // +1 For the push operation.
124*9880d681SAndroid Build Coastguard Worker     // +2 Extra space required.
125*9880d681SAndroid Build Coastguard Worker     return 3;
126*9880d681SAndroid Build Coastguard Worker   } else {
127*9880d681SAndroid Build Coastguard Worker     // Some documentation says that this is not necessary on Evergreen,
128*9880d681SAndroid Build Coastguard Worker     // but experimentation has show that we need to allocate 1 extra
129*9880d681SAndroid Build Coastguard Worker     // sub-entry for the first non-WQM push.
130*9880d681SAndroid Build Coastguard Worker     // +1 For the push operation.
131*9880d681SAndroid Build Coastguard Worker     // +1 Extra space required.
132*9880d681SAndroid Build Coastguard Worker     return 2;
133*9880d681SAndroid Build Coastguard Worker   }
134*9880d681SAndroid Build Coastguard Worker   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
135*9880d681SAndroid Build Coastguard Worker     assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
136*9880d681SAndroid Build Coastguard Worker     // +1 For the push operation.
137*9880d681SAndroid Build Coastguard Worker     // +1 Extra space required.
138*9880d681SAndroid Build Coastguard Worker     return 2;
139*9880d681SAndroid Build Coastguard Worker   case CFStack::SUB_ENTRY:
140*9880d681SAndroid Build Coastguard Worker     return 1;
141*9880d681SAndroid Build Coastguard Worker   }
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker 
updateMaxStackSize()144*9880d681SAndroid Build Coastguard Worker void CFStack::updateMaxStackSize() {
145*9880d681SAndroid Build Coastguard Worker   unsigned CurrentStackSize =
146*9880d681SAndroid Build Coastguard Worker       CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
147*9880d681SAndroid Build Coastguard Worker   MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
148*9880d681SAndroid Build Coastguard Worker }
149*9880d681SAndroid Build Coastguard Worker 
pushBranch(unsigned Opcode,bool isWQM)150*9880d681SAndroid Build Coastguard Worker void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
151*9880d681SAndroid Build Coastguard Worker   CFStack::StackItem Item = CFStack::ENTRY;
152*9880d681SAndroid Build Coastguard Worker   switch(Opcode) {
153*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_PUSH_EG:
154*9880d681SAndroid Build Coastguard Worker   case AMDGPU::CF_ALU_PUSH_BEFORE:
155*9880d681SAndroid Build Coastguard Worker     if (!isWQM) {
156*9880d681SAndroid Build Coastguard Worker       if (!ST->hasCaymanISA() &&
157*9880d681SAndroid Build Coastguard Worker           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
158*9880d681SAndroid Build Coastguard Worker         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
159*9880d681SAndroid Build Coastguard Worker                                              // See comment in
160*9880d681SAndroid Build Coastguard Worker                                              // CFStack::getSubEntrySize()
161*9880d681SAndroid Build Coastguard Worker       else if (CurrentEntries > 0 &&
162*9880d681SAndroid Build Coastguard Worker                ST->getGeneration() > R600Subtarget::EVERGREEN &&
163*9880d681SAndroid Build Coastguard Worker                !ST->hasCaymanISA() &&
164*9880d681SAndroid Build Coastguard Worker                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
165*9880d681SAndroid Build Coastguard Worker         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
166*9880d681SAndroid Build Coastguard Worker       else
167*9880d681SAndroid Build Coastguard Worker         Item = CFStack::SUB_ENTRY;
168*9880d681SAndroid Build Coastguard Worker     } else
169*9880d681SAndroid Build Coastguard Worker       Item = CFStack::ENTRY;
170*9880d681SAndroid Build Coastguard Worker     break;
171*9880d681SAndroid Build Coastguard Worker   }
172*9880d681SAndroid Build Coastguard Worker   BranchStack.push_back(Item);
173*9880d681SAndroid Build Coastguard Worker   if (Item == CFStack::ENTRY)
174*9880d681SAndroid Build Coastguard Worker     CurrentEntries++;
175*9880d681SAndroid Build Coastguard Worker   else
176*9880d681SAndroid Build Coastguard Worker     CurrentSubEntries += getSubEntrySize(Item);
177*9880d681SAndroid Build Coastguard Worker   updateMaxStackSize();
178*9880d681SAndroid Build Coastguard Worker }
179*9880d681SAndroid Build Coastguard Worker 
pushLoop()180*9880d681SAndroid Build Coastguard Worker void CFStack::pushLoop() {
181*9880d681SAndroid Build Coastguard Worker   LoopStack.push_back(CFStack::ENTRY);
182*9880d681SAndroid Build Coastguard Worker   CurrentEntries++;
183*9880d681SAndroid Build Coastguard Worker   updateMaxStackSize();
184*9880d681SAndroid Build Coastguard Worker }
185*9880d681SAndroid Build Coastguard Worker 
popBranch()186*9880d681SAndroid Build Coastguard Worker void CFStack::popBranch() {
187*9880d681SAndroid Build Coastguard Worker   CFStack::StackItem Top = BranchStack.back();
188*9880d681SAndroid Build Coastguard Worker   if (Top == CFStack::ENTRY)
189*9880d681SAndroid Build Coastguard Worker     CurrentEntries--;
190*9880d681SAndroid Build Coastguard Worker   else
191*9880d681SAndroid Build Coastguard Worker     CurrentSubEntries-= getSubEntrySize(Top);
192*9880d681SAndroid Build Coastguard Worker   BranchStack.pop_back();
193*9880d681SAndroid Build Coastguard Worker }
194*9880d681SAndroid Build Coastguard Worker 
popLoop()195*9880d681SAndroid Build Coastguard Worker void CFStack::popLoop() {
196*9880d681SAndroid Build Coastguard Worker   CurrentEntries--;
197*9880d681SAndroid Build Coastguard Worker   LoopStack.pop_back();
198*9880d681SAndroid Build Coastguard Worker }
199*9880d681SAndroid Build Coastguard Worker 
200*9880d681SAndroid Build Coastguard Worker class R600ControlFlowFinalizer : public MachineFunctionPass {
201*9880d681SAndroid Build Coastguard Worker 
202*9880d681SAndroid Build Coastguard Worker private:
203*9880d681SAndroid Build Coastguard Worker   typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
204*9880d681SAndroid Build Coastguard Worker 
205*9880d681SAndroid Build Coastguard Worker   enum ControlFlowInstruction {
206*9880d681SAndroid Build Coastguard Worker     CF_TC,
207*9880d681SAndroid Build Coastguard Worker     CF_VC,
208*9880d681SAndroid Build Coastguard Worker     CF_CALL_FS,
209*9880d681SAndroid Build Coastguard Worker     CF_WHILE_LOOP,
210*9880d681SAndroid Build Coastguard Worker     CF_END_LOOP,
211*9880d681SAndroid Build Coastguard Worker     CF_LOOP_BREAK,
212*9880d681SAndroid Build Coastguard Worker     CF_LOOP_CONTINUE,
213*9880d681SAndroid Build Coastguard Worker     CF_JUMP,
214*9880d681SAndroid Build Coastguard Worker     CF_ELSE,
215*9880d681SAndroid Build Coastguard Worker     CF_POP,
216*9880d681SAndroid Build Coastguard Worker     CF_END
217*9880d681SAndroid Build Coastguard Worker   };
218*9880d681SAndroid Build Coastguard Worker 
219*9880d681SAndroid Build Coastguard Worker   static char ID;
220*9880d681SAndroid Build Coastguard Worker   const R600InstrInfo *TII;
221*9880d681SAndroid Build Coastguard Worker   const R600RegisterInfo *TRI;
222*9880d681SAndroid Build Coastguard Worker   unsigned MaxFetchInst;
223*9880d681SAndroid Build Coastguard Worker   const R600Subtarget *ST;
224*9880d681SAndroid Build Coastguard Worker 
IsTrivialInst(MachineInstr & MI) const225*9880d681SAndroid Build Coastguard Worker   bool IsTrivialInst(MachineInstr &MI) const {
226*9880d681SAndroid Build Coastguard Worker     switch (MI.getOpcode()) {
227*9880d681SAndroid Build Coastguard Worker     case AMDGPU::KILL:
228*9880d681SAndroid Build Coastguard Worker     case AMDGPU::RETURN:
229*9880d681SAndroid Build Coastguard Worker       return true;
230*9880d681SAndroid Build Coastguard Worker     default:
231*9880d681SAndroid Build Coastguard Worker       return false;
232*9880d681SAndroid Build Coastguard Worker     }
233*9880d681SAndroid Build Coastguard Worker   }
234*9880d681SAndroid Build Coastguard Worker 
getHWInstrDesc(ControlFlowInstruction CFI) const235*9880d681SAndroid Build Coastguard Worker   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
236*9880d681SAndroid Build Coastguard Worker     unsigned Opcode = 0;
237*9880d681SAndroid Build Coastguard Worker     bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
238*9880d681SAndroid Build Coastguard Worker     switch (CFI) {
239*9880d681SAndroid Build Coastguard Worker     case CF_TC:
240*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
241*9880d681SAndroid Build Coastguard Worker       break;
242*9880d681SAndroid Build Coastguard Worker     case CF_VC:
243*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
244*9880d681SAndroid Build Coastguard Worker       break;
245*9880d681SAndroid Build Coastguard Worker     case CF_CALL_FS:
246*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
247*9880d681SAndroid Build Coastguard Worker       break;
248*9880d681SAndroid Build Coastguard Worker     case CF_WHILE_LOOP:
249*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
250*9880d681SAndroid Build Coastguard Worker       break;
251*9880d681SAndroid Build Coastguard Worker     case CF_END_LOOP:
252*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
253*9880d681SAndroid Build Coastguard Worker       break;
254*9880d681SAndroid Build Coastguard Worker     case CF_LOOP_BREAK:
255*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
256*9880d681SAndroid Build Coastguard Worker       break;
257*9880d681SAndroid Build Coastguard Worker     case CF_LOOP_CONTINUE:
258*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
259*9880d681SAndroid Build Coastguard Worker       break;
260*9880d681SAndroid Build Coastguard Worker     case CF_JUMP:
261*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
262*9880d681SAndroid Build Coastguard Worker       break;
263*9880d681SAndroid Build Coastguard Worker     case CF_ELSE:
264*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
265*9880d681SAndroid Build Coastguard Worker       break;
266*9880d681SAndroid Build Coastguard Worker     case CF_POP:
267*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
268*9880d681SAndroid Build Coastguard Worker       break;
269*9880d681SAndroid Build Coastguard Worker     case CF_END:
270*9880d681SAndroid Build Coastguard Worker       if (ST->hasCaymanISA()) {
271*9880d681SAndroid Build Coastguard Worker         Opcode = AMDGPU::CF_END_CM;
272*9880d681SAndroid Build Coastguard Worker         break;
273*9880d681SAndroid Build Coastguard Worker       }
274*9880d681SAndroid Build Coastguard Worker       Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
275*9880d681SAndroid Build Coastguard Worker       break;
276*9880d681SAndroid Build Coastguard Worker     }
277*9880d681SAndroid Build Coastguard Worker     assert (Opcode && "No opcode selected");
278*9880d681SAndroid Build Coastguard Worker     return TII->get(Opcode);
279*9880d681SAndroid Build Coastguard Worker   }
280*9880d681SAndroid Build Coastguard Worker 
isCompatibleWithClause(const MachineInstr & MI,std::set<unsigned> & DstRegs) const281*9880d681SAndroid Build Coastguard Worker   bool isCompatibleWithClause(const MachineInstr &MI,
282*9880d681SAndroid Build Coastguard Worker                               std::set<unsigned> &DstRegs) const {
283*9880d681SAndroid Build Coastguard Worker     unsigned DstMI, SrcMI;
284*9880d681SAndroid Build Coastguard Worker     for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
285*9880d681SAndroid Build Coastguard Worker                                           E = MI.operands_end();
286*9880d681SAndroid Build Coastguard Worker          I != E; ++I) {
287*9880d681SAndroid Build Coastguard Worker       const MachineOperand &MO = *I;
288*9880d681SAndroid Build Coastguard Worker       if (!MO.isReg())
289*9880d681SAndroid Build Coastguard Worker         continue;
290*9880d681SAndroid Build Coastguard Worker       if (MO.isDef()) {
291*9880d681SAndroid Build Coastguard Worker         unsigned Reg = MO.getReg();
292*9880d681SAndroid Build Coastguard Worker         if (AMDGPU::R600_Reg128RegClass.contains(Reg))
293*9880d681SAndroid Build Coastguard Worker           DstMI = Reg;
294*9880d681SAndroid Build Coastguard Worker         else
295*9880d681SAndroid Build Coastguard Worker           DstMI = TRI->getMatchingSuperReg(Reg,
296*9880d681SAndroid Build Coastguard Worker               TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
297*9880d681SAndroid Build Coastguard Worker               &AMDGPU::R600_Reg128RegClass);
298*9880d681SAndroid Build Coastguard Worker       }
299*9880d681SAndroid Build Coastguard Worker       if (MO.isUse()) {
300*9880d681SAndroid Build Coastguard Worker         unsigned Reg = MO.getReg();
301*9880d681SAndroid Build Coastguard Worker         if (AMDGPU::R600_Reg128RegClass.contains(Reg))
302*9880d681SAndroid Build Coastguard Worker           SrcMI = Reg;
303*9880d681SAndroid Build Coastguard Worker         else
304*9880d681SAndroid Build Coastguard Worker           SrcMI = TRI->getMatchingSuperReg(Reg,
305*9880d681SAndroid Build Coastguard Worker               TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
306*9880d681SAndroid Build Coastguard Worker               &AMDGPU::R600_Reg128RegClass);
307*9880d681SAndroid Build Coastguard Worker       }
308*9880d681SAndroid Build Coastguard Worker     }
309*9880d681SAndroid Build Coastguard Worker     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
310*9880d681SAndroid Build Coastguard Worker       DstRegs.insert(DstMI);
311*9880d681SAndroid Build Coastguard Worker       return true;
312*9880d681SAndroid Build Coastguard Worker     } else
313*9880d681SAndroid Build Coastguard Worker       return false;
314*9880d681SAndroid Build Coastguard Worker   }
315*9880d681SAndroid Build Coastguard Worker 
316*9880d681SAndroid Build Coastguard Worker   ClauseFile
MakeFetchClause(MachineBasicBlock & MBB,MachineBasicBlock::iterator & I) const317*9880d681SAndroid Build Coastguard Worker   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
318*9880d681SAndroid Build Coastguard Worker       const {
319*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock::iterator ClauseHead = I;
320*9880d681SAndroid Build Coastguard Worker     std::vector<MachineInstr *> ClauseContent;
321*9880d681SAndroid Build Coastguard Worker     unsigned AluInstCount = 0;
322*9880d681SAndroid Build Coastguard Worker     bool IsTex = TII->usesTextureCache(*ClauseHead);
323*9880d681SAndroid Build Coastguard Worker     std::set<unsigned> DstRegs;
324*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
325*9880d681SAndroid Build Coastguard Worker       if (IsTrivialInst(*I))
326*9880d681SAndroid Build Coastguard Worker         continue;
327*9880d681SAndroid Build Coastguard Worker       if (AluInstCount >= MaxFetchInst)
328*9880d681SAndroid Build Coastguard Worker         break;
329*9880d681SAndroid Build Coastguard Worker       if ((IsTex && !TII->usesTextureCache(*I)) ||
330*9880d681SAndroid Build Coastguard Worker           (!IsTex && !TII->usesVertexCache(*I)))
331*9880d681SAndroid Build Coastguard Worker         break;
332*9880d681SAndroid Build Coastguard Worker       if (!isCompatibleWithClause(*I, DstRegs))
333*9880d681SAndroid Build Coastguard Worker         break;
334*9880d681SAndroid Build Coastguard Worker       AluInstCount ++;
335*9880d681SAndroid Build Coastguard Worker       ClauseContent.push_back(&*I);
336*9880d681SAndroid Build Coastguard Worker     }
337*9880d681SAndroid Build Coastguard Worker     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
338*9880d681SAndroid Build Coastguard Worker         getHWInstrDesc(IsTex?CF_TC:CF_VC))
339*9880d681SAndroid Build Coastguard Worker         .addImm(0) // ADDR
340*9880d681SAndroid Build Coastguard Worker         .addImm(AluInstCount - 1); // COUNT
341*9880d681SAndroid Build Coastguard Worker     return ClauseFile(MIb, std::move(ClauseContent));
342*9880d681SAndroid Build Coastguard Worker   }
343*9880d681SAndroid Build Coastguard Worker 
getLiteral(MachineInstr & MI,std::vector<MachineOperand * > & Lits) const344*9880d681SAndroid Build Coastguard Worker   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
345*9880d681SAndroid Build Coastguard Worker     static const unsigned LiteralRegs[] = {
346*9880d681SAndroid Build Coastguard Worker       AMDGPU::ALU_LITERAL_X,
347*9880d681SAndroid Build Coastguard Worker       AMDGPU::ALU_LITERAL_Y,
348*9880d681SAndroid Build Coastguard Worker       AMDGPU::ALU_LITERAL_Z,
349*9880d681SAndroid Build Coastguard Worker       AMDGPU::ALU_LITERAL_W
350*9880d681SAndroid Build Coastguard Worker     };
351*9880d681SAndroid Build Coastguard Worker     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
352*9880d681SAndroid Build Coastguard Worker         TII->getSrcs(MI);
353*9880d681SAndroid Build Coastguard Worker     for (const auto &Src:Srcs) {
354*9880d681SAndroid Build Coastguard Worker       if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
355*9880d681SAndroid Build Coastguard Worker         continue;
356*9880d681SAndroid Build Coastguard Worker       int64_t Imm = Src.second;
357*9880d681SAndroid Build Coastguard Worker       std::vector<MachineOperand*>::iterator It =
358*9880d681SAndroid Build Coastguard Worker           std::find_if(Lits.begin(), Lits.end(),
359*9880d681SAndroid Build Coastguard Worker                     [&](MachineOperand* val)
360*9880d681SAndroid Build Coastguard Worker                         { return val->isImm() && (val->getImm() == Imm);});
361*9880d681SAndroid Build Coastguard Worker 
362*9880d681SAndroid Build Coastguard Worker       // Get corresponding Operand
363*9880d681SAndroid Build Coastguard Worker       MachineOperand &Operand = MI.getOperand(
364*9880d681SAndroid Build Coastguard Worker           TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
365*9880d681SAndroid Build Coastguard Worker 
366*9880d681SAndroid Build Coastguard Worker       if (It != Lits.end()) {
367*9880d681SAndroid Build Coastguard Worker         // Reuse existing literal reg
368*9880d681SAndroid Build Coastguard Worker         unsigned Index = It - Lits.begin();
369*9880d681SAndroid Build Coastguard Worker         Src.first->setReg(LiteralRegs[Index]);
370*9880d681SAndroid Build Coastguard Worker       } else {
371*9880d681SAndroid Build Coastguard Worker         // Allocate new literal reg
372*9880d681SAndroid Build Coastguard Worker         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
373*9880d681SAndroid Build Coastguard Worker         Src.first->setReg(LiteralRegs[Lits.size()]);
374*9880d681SAndroid Build Coastguard Worker         Lits.push_back(&Operand);
375*9880d681SAndroid Build Coastguard Worker       }
376*9880d681SAndroid Build Coastguard Worker     }
377*9880d681SAndroid Build Coastguard Worker   }
378*9880d681SAndroid Build Coastguard Worker 
insertLiterals(MachineBasicBlock::iterator InsertPos,const std::vector<unsigned> & Literals) const379*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock::iterator insertLiterals(
380*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock::iterator InsertPos,
381*9880d681SAndroid Build Coastguard Worker       const std::vector<unsigned> &Literals) const {
382*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *MBB = InsertPos->getParent();
383*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
384*9880d681SAndroid Build Coastguard Worker       unsigned LiteralPair0 = Literals[i];
385*9880d681SAndroid Build Coastguard Worker       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
386*9880d681SAndroid Build Coastguard Worker       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
387*9880d681SAndroid Build Coastguard Worker           TII->get(AMDGPU::LITERALS))
388*9880d681SAndroid Build Coastguard Worker           .addImm(LiteralPair0)
389*9880d681SAndroid Build Coastguard Worker           .addImm(LiteralPair1);
390*9880d681SAndroid Build Coastguard Worker     }
391*9880d681SAndroid Build Coastguard Worker     return InsertPos;
392*9880d681SAndroid Build Coastguard Worker   }
393*9880d681SAndroid Build Coastguard Worker 
394*9880d681SAndroid Build Coastguard Worker   ClauseFile
MakeALUClause(MachineBasicBlock & MBB,MachineBasicBlock::iterator & I) const395*9880d681SAndroid Build Coastguard Worker   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
396*9880d681SAndroid Build Coastguard Worker       const {
397*9880d681SAndroid Build Coastguard Worker     MachineInstr &ClauseHead = *I;
398*9880d681SAndroid Build Coastguard Worker     std::vector<MachineInstr *> ClauseContent;
399*9880d681SAndroid Build Coastguard Worker     I++;
400*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
401*9880d681SAndroid Build Coastguard Worker       if (IsTrivialInst(*I)) {
402*9880d681SAndroid Build Coastguard Worker         ++I;
403*9880d681SAndroid Build Coastguard Worker         continue;
404*9880d681SAndroid Build Coastguard Worker       }
405*9880d681SAndroid Build Coastguard Worker       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
406*9880d681SAndroid Build Coastguard Worker         break;
407*9880d681SAndroid Build Coastguard Worker       std::vector<MachineOperand *>Literals;
408*9880d681SAndroid Build Coastguard Worker       if (I->isBundle()) {
409*9880d681SAndroid Build Coastguard Worker         MachineInstr &DeleteMI = *I;
410*9880d681SAndroid Build Coastguard Worker         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
411*9880d681SAndroid Build Coastguard Worker         while (++BI != E && BI->isBundledWithPred()) {
412*9880d681SAndroid Build Coastguard Worker           BI->unbundleFromPred();
413*9880d681SAndroid Build Coastguard Worker           for (MachineOperand &MO : BI->operands()) {
414*9880d681SAndroid Build Coastguard Worker             if (MO.isReg() && MO.isInternalRead())
415*9880d681SAndroid Build Coastguard Worker               MO.setIsInternalRead(false);
416*9880d681SAndroid Build Coastguard Worker           }
417*9880d681SAndroid Build Coastguard Worker           getLiteral(*BI, Literals);
418*9880d681SAndroid Build Coastguard Worker           ClauseContent.push_back(&*BI);
419*9880d681SAndroid Build Coastguard Worker         }
420*9880d681SAndroid Build Coastguard Worker         I = BI;
421*9880d681SAndroid Build Coastguard Worker         DeleteMI.eraseFromParent();
422*9880d681SAndroid Build Coastguard Worker       } else {
423*9880d681SAndroid Build Coastguard Worker         getLiteral(*I, Literals);
424*9880d681SAndroid Build Coastguard Worker         ClauseContent.push_back(&*I);
425*9880d681SAndroid Build Coastguard Worker         I++;
426*9880d681SAndroid Build Coastguard Worker       }
427*9880d681SAndroid Build Coastguard Worker       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
428*9880d681SAndroid Build Coastguard Worker         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
429*9880d681SAndroid Build Coastguard Worker             TII->get(AMDGPU::LITERALS));
430*9880d681SAndroid Build Coastguard Worker         if (Literals[i]->isImm()) {
431*9880d681SAndroid Build Coastguard Worker             MILit.addImm(Literals[i]->getImm());
432*9880d681SAndroid Build Coastguard Worker         } else {
433*9880d681SAndroid Build Coastguard Worker             MILit.addGlobalAddress(Literals[i]->getGlobal(),
434*9880d681SAndroid Build Coastguard Worker                                    Literals[i]->getOffset());
435*9880d681SAndroid Build Coastguard Worker         }
436*9880d681SAndroid Build Coastguard Worker         if (i + 1 < e) {
437*9880d681SAndroid Build Coastguard Worker           if (Literals[i + 1]->isImm()) {
438*9880d681SAndroid Build Coastguard Worker             MILit.addImm(Literals[i + 1]->getImm());
439*9880d681SAndroid Build Coastguard Worker           } else {
440*9880d681SAndroid Build Coastguard Worker             MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
441*9880d681SAndroid Build Coastguard Worker                                    Literals[i + 1]->getOffset());
442*9880d681SAndroid Build Coastguard Worker           }
443*9880d681SAndroid Build Coastguard Worker         } else
444*9880d681SAndroid Build Coastguard Worker           MILit.addImm(0);
445*9880d681SAndroid Build Coastguard Worker         ClauseContent.push_back(MILit);
446*9880d681SAndroid Build Coastguard Worker       }
447*9880d681SAndroid Build Coastguard Worker     }
448*9880d681SAndroid Build Coastguard Worker     assert(ClauseContent.size() < 128 && "ALU clause is too big");
449*9880d681SAndroid Build Coastguard Worker     ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
450*9880d681SAndroid Build Coastguard Worker     return ClauseFile(&ClauseHead, std::move(ClauseContent));
451*9880d681SAndroid Build Coastguard Worker   }
452*9880d681SAndroid Build Coastguard Worker 
453*9880d681SAndroid Build Coastguard Worker   void
EmitFetchClause(MachineBasicBlock::iterator InsertPos,ClauseFile & Clause,unsigned & CfCount)454*9880d681SAndroid Build Coastguard Worker   EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
455*9880d681SAndroid Build Coastguard Worker       unsigned &CfCount) {
456*9880d681SAndroid Build Coastguard Worker     CounterPropagateAddr(*Clause.first, CfCount);
457*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB = Clause.first->getParent();
458*9880d681SAndroid Build Coastguard Worker     BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
459*9880d681SAndroid Build Coastguard Worker         .addImm(CfCount);
460*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
461*9880d681SAndroid Build Coastguard Worker       BB->splice(InsertPos, BB, Clause.second[i]);
462*9880d681SAndroid Build Coastguard Worker     }
463*9880d681SAndroid Build Coastguard Worker     CfCount += 2 * Clause.second.size();
464*9880d681SAndroid Build Coastguard Worker   }
465*9880d681SAndroid Build Coastguard Worker 
466*9880d681SAndroid Build Coastguard Worker   void
EmitALUClause(MachineBasicBlock::iterator InsertPos,ClauseFile & Clause,unsigned & CfCount)467*9880d681SAndroid Build Coastguard Worker   EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
468*9880d681SAndroid Build Coastguard Worker       unsigned &CfCount) {
469*9880d681SAndroid Build Coastguard Worker     Clause.first->getOperand(0).setImm(0);
470*9880d681SAndroid Build Coastguard Worker     CounterPropagateAddr(*Clause.first, CfCount);
471*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB = Clause.first->getParent();
472*9880d681SAndroid Build Coastguard Worker     BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
473*9880d681SAndroid Build Coastguard Worker         .addImm(CfCount);
474*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
475*9880d681SAndroid Build Coastguard Worker       BB->splice(InsertPos, BB, Clause.second[i]);
476*9880d681SAndroid Build Coastguard Worker     }
477*9880d681SAndroid Build Coastguard Worker     CfCount += Clause.second.size();
478*9880d681SAndroid Build Coastguard Worker   }
479*9880d681SAndroid Build Coastguard Worker 
CounterPropagateAddr(MachineInstr & MI,unsigned Addr) const480*9880d681SAndroid Build Coastguard Worker   void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
481*9880d681SAndroid Build Coastguard Worker     MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
482*9880d681SAndroid Build Coastguard Worker   }
CounterPropagateAddr(const std::set<MachineInstr * > & MIs,unsigned Addr) const483*9880d681SAndroid Build Coastguard Worker   void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
484*9880d681SAndroid Build Coastguard Worker                             unsigned Addr) const {
485*9880d681SAndroid Build Coastguard Worker     for (MachineInstr *MI : MIs) {
486*9880d681SAndroid Build Coastguard Worker       CounterPropagateAddr(*MI, Addr);
487*9880d681SAndroid Build Coastguard Worker     }
488*9880d681SAndroid Build Coastguard Worker   }
489*9880d681SAndroid Build Coastguard Worker 
490*9880d681SAndroid Build Coastguard Worker public:
R600ControlFlowFinalizer(TargetMachine & tm)491*9880d681SAndroid Build Coastguard Worker   R600ControlFlowFinalizer(TargetMachine &tm)
492*9880d681SAndroid Build Coastguard Worker       : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
493*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)494*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override {
495*9880d681SAndroid Build Coastguard Worker     ST = &MF.getSubtarget<R600Subtarget>();
496*9880d681SAndroid Build Coastguard Worker     MaxFetchInst = ST->getTexVTXClauseSize();
497*9880d681SAndroid Build Coastguard Worker     TII = ST->getInstrInfo();
498*9880d681SAndroid Build Coastguard Worker     TRI = ST->getRegisterInfo();
499*9880d681SAndroid Build Coastguard Worker 
500*9880d681SAndroid Build Coastguard Worker     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
501*9880d681SAndroid Build Coastguard Worker 
502*9880d681SAndroid Build Coastguard Worker     CFStack CFStack(ST, MF.getFunction()->getCallingConv());
503*9880d681SAndroid Build Coastguard Worker     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
504*9880d681SAndroid Build Coastguard Worker         ++MB) {
505*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock &MBB = *MB;
506*9880d681SAndroid Build Coastguard Worker       unsigned CfCount = 0;
507*9880d681SAndroid Build Coastguard Worker       std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
508*9880d681SAndroid Build Coastguard Worker       std::vector<MachineInstr * > IfThenElseStack;
509*9880d681SAndroid Build Coastguard Worker       if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
510*9880d681SAndroid Build Coastguard Worker         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
511*9880d681SAndroid Build Coastguard Worker             getHWInstrDesc(CF_CALL_FS));
512*9880d681SAndroid Build Coastguard Worker         CfCount++;
513*9880d681SAndroid Build Coastguard Worker       }
514*9880d681SAndroid Build Coastguard Worker       std::vector<ClauseFile> FetchClauses, AluClauses;
515*9880d681SAndroid Build Coastguard Worker       std::vector<MachineInstr *> LastAlu(1);
516*9880d681SAndroid Build Coastguard Worker       std::vector<MachineInstr *> ToPopAfter;
517*9880d681SAndroid Build Coastguard Worker 
518*9880d681SAndroid Build Coastguard Worker       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
519*9880d681SAndroid Build Coastguard Worker           I != E;) {
520*9880d681SAndroid Build Coastguard Worker         if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
521*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << CfCount << ":"; I->dump(););
522*9880d681SAndroid Build Coastguard Worker           FetchClauses.push_back(MakeFetchClause(MBB, I));
523*9880d681SAndroid Build Coastguard Worker           CfCount++;
524*9880d681SAndroid Build Coastguard Worker           LastAlu.back() = nullptr;
525*9880d681SAndroid Build Coastguard Worker           continue;
526*9880d681SAndroid Build Coastguard Worker         }
527*9880d681SAndroid Build Coastguard Worker 
528*9880d681SAndroid Build Coastguard Worker         MachineBasicBlock::iterator MI = I;
529*9880d681SAndroid Build Coastguard Worker         if (MI->getOpcode() != AMDGPU::ENDIF)
530*9880d681SAndroid Build Coastguard Worker           LastAlu.back() = nullptr;
531*9880d681SAndroid Build Coastguard Worker         if (MI->getOpcode() == AMDGPU::CF_ALU)
532*9880d681SAndroid Build Coastguard Worker           LastAlu.back() = &*MI;
533*9880d681SAndroid Build Coastguard Worker         I++;
534*9880d681SAndroid Build Coastguard Worker         bool RequiresWorkAround =
535*9880d681SAndroid Build Coastguard Worker             CFStack.requiresWorkAroundForInst(MI->getOpcode());
536*9880d681SAndroid Build Coastguard Worker         switch (MI->getOpcode()) {
537*9880d681SAndroid Build Coastguard Worker         case AMDGPU::CF_ALU_PUSH_BEFORE:
538*9880d681SAndroid Build Coastguard Worker           if (RequiresWorkAround) {
539*9880d681SAndroid Build Coastguard Worker             DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
540*9880d681SAndroid Build Coastguard Worker             BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
541*9880d681SAndroid Build Coastguard Worker                 .addImm(CfCount + 1)
542*9880d681SAndroid Build Coastguard Worker                 .addImm(1);
543*9880d681SAndroid Build Coastguard Worker             MI->setDesc(TII->get(AMDGPU::CF_ALU));
544*9880d681SAndroid Build Coastguard Worker             CfCount++;
545*9880d681SAndroid Build Coastguard Worker             CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
546*9880d681SAndroid Build Coastguard Worker           } else
547*9880d681SAndroid Build Coastguard Worker             CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
548*9880d681SAndroid Build Coastguard Worker 
549*9880d681SAndroid Build Coastguard Worker         case AMDGPU::CF_ALU:
550*9880d681SAndroid Build Coastguard Worker           I = MI;
551*9880d681SAndroid Build Coastguard Worker           AluClauses.push_back(MakeALUClause(MBB, I));
552*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
553*9880d681SAndroid Build Coastguard Worker           CfCount++;
554*9880d681SAndroid Build Coastguard Worker           break;
555*9880d681SAndroid Build Coastguard Worker         case AMDGPU::WHILELOOP: {
556*9880d681SAndroid Build Coastguard Worker           CFStack.pushLoop();
557*9880d681SAndroid Build Coastguard Worker           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
558*9880d681SAndroid Build Coastguard Worker               getHWInstrDesc(CF_WHILE_LOOP))
559*9880d681SAndroid Build Coastguard Worker               .addImm(1);
560*9880d681SAndroid Build Coastguard Worker           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
561*9880d681SAndroid Build Coastguard Worker               std::set<MachineInstr *>());
562*9880d681SAndroid Build Coastguard Worker           Pair.second.insert(MIb);
563*9880d681SAndroid Build Coastguard Worker           LoopStack.push_back(std::move(Pair));
564*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
565*9880d681SAndroid Build Coastguard Worker           CfCount++;
566*9880d681SAndroid Build Coastguard Worker           break;
567*9880d681SAndroid Build Coastguard Worker         }
568*9880d681SAndroid Build Coastguard Worker         case AMDGPU::ENDLOOP: {
569*9880d681SAndroid Build Coastguard Worker           CFStack.popLoop();
570*9880d681SAndroid Build Coastguard Worker           std::pair<unsigned, std::set<MachineInstr *> > Pair =
571*9880d681SAndroid Build Coastguard Worker               std::move(LoopStack.back());
572*9880d681SAndroid Build Coastguard Worker           LoopStack.pop_back();
573*9880d681SAndroid Build Coastguard Worker           CounterPropagateAddr(Pair.second, CfCount);
574*9880d681SAndroid Build Coastguard Worker           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
575*9880d681SAndroid Build Coastguard Worker               .addImm(Pair.first + 1);
576*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
577*9880d681SAndroid Build Coastguard Worker           CfCount++;
578*9880d681SAndroid Build Coastguard Worker           break;
579*9880d681SAndroid Build Coastguard Worker         }
580*9880d681SAndroid Build Coastguard Worker         case AMDGPU::IF_PREDICATE_SET: {
581*9880d681SAndroid Build Coastguard Worker           LastAlu.push_back(nullptr);
582*9880d681SAndroid Build Coastguard Worker           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
583*9880d681SAndroid Build Coastguard Worker               getHWInstrDesc(CF_JUMP))
584*9880d681SAndroid Build Coastguard Worker               .addImm(0)
585*9880d681SAndroid Build Coastguard Worker               .addImm(0);
586*9880d681SAndroid Build Coastguard Worker           IfThenElseStack.push_back(MIb);
587*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
588*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
589*9880d681SAndroid Build Coastguard Worker           CfCount++;
590*9880d681SAndroid Build Coastguard Worker           break;
591*9880d681SAndroid Build Coastguard Worker         }
592*9880d681SAndroid Build Coastguard Worker         case AMDGPU::ELSE: {
593*9880d681SAndroid Build Coastguard Worker           MachineInstr * JumpInst = IfThenElseStack.back();
594*9880d681SAndroid Build Coastguard Worker           IfThenElseStack.pop_back();
595*9880d681SAndroid Build Coastguard Worker           CounterPropagateAddr(*JumpInst, CfCount);
596*9880d681SAndroid Build Coastguard Worker           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
597*9880d681SAndroid Build Coastguard Worker               getHWInstrDesc(CF_ELSE))
598*9880d681SAndroid Build Coastguard Worker               .addImm(0)
599*9880d681SAndroid Build Coastguard Worker               .addImm(0);
600*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
601*9880d681SAndroid Build Coastguard Worker           IfThenElseStack.push_back(MIb);
602*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
603*9880d681SAndroid Build Coastguard Worker           CfCount++;
604*9880d681SAndroid Build Coastguard Worker           break;
605*9880d681SAndroid Build Coastguard Worker         }
606*9880d681SAndroid Build Coastguard Worker         case AMDGPU::ENDIF: {
607*9880d681SAndroid Build Coastguard Worker           CFStack.popBranch();
608*9880d681SAndroid Build Coastguard Worker           if (LastAlu.back()) {
609*9880d681SAndroid Build Coastguard Worker             ToPopAfter.push_back(LastAlu.back());
610*9880d681SAndroid Build Coastguard Worker           } else {
611*9880d681SAndroid Build Coastguard Worker             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
612*9880d681SAndroid Build Coastguard Worker                 getHWInstrDesc(CF_POP))
613*9880d681SAndroid Build Coastguard Worker                 .addImm(CfCount + 1)
614*9880d681SAndroid Build Coastguard Worker                 .addImm(1);
615*9880d681SAndroid Build Coastguard Worker             (void)MIb;
616*9880d681SAndroid Build Coastguard Worker             DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
617*9880d681SAndroid Build Coastguard Worker             CfCount++;
618*9880d681SAndroid Build Coastguard Worker           }
619*9880d681SAndroid Build Coastguard Worker 
620*9880d681SAndroid Build Coastguard Worker           MachineInstr *IfOrElseInst = IfThenElseStack.back();
621*9880d681SAndroid Build Coastguard Worker           IfThenElseStack.pop_back();
622*9880d681SAndroid Build Coastguard Worker           CounterPropagateAddr(*IfOrElseInst, CfCount);
623*9880d681SAndroid Build Coastguard Worker           IfOrElseInst->getOperand(1).setImm(1);
624*9880d681SAndroid Build Coastguard Worker           LastAlu.pop_back();
625*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
626*9880d681SAndroid Build Coastguard Worker           break;
627*9880d681SAndroid Build Coastguard Worker         }
628*9880d681SAndroid Build Coastguard Worker         case AMDGPU::BREAK: {
629*9880d681SAndroid Build Coastguard Worker           CfCount ++;
630*9880d681SAndroid Build Coastguard Worker           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
631*9880d681SAndroid Build Coastguard Worker               getHWInstrDesc(CF_LOOP_BREAK))
632*9880d681SAndroid Build Coastguard Worker               .addImm(0);
633*9880d681SAndroid Build Coastguard Worker           LoopStack.back().second.insert(MIb);
634*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
635*9880d681SAndroid Build Coastguard Worker           break;
636*9880d681SAndroid Build Coastguard Worker         }
637*9880d681SAndroid Build Coastguard Worker         case AMDGPU::CONTINUE: {
638*9880d681SAndroid Build Coastguard Worker           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
639*9880d681SAndroid Build Coastguard Worker               getHWInstrDesc(CF_LOOP_CONTINUE))
640*9880d681SAndroid Build Coastguard Worker               .addImm(0);
641*9880d681SAndroid Build Coastguard Worker           LoopStack.back().second.insert(MIb);
642*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
643*9880d681SAndroid Build Coastguard Worker           CfCount++;
644*9880d681SAndroid Build Coastguard Worker           break;
645*9880d681SAndroid Build Coastguard Worker         }
646*9880d681SAndroid Build Coastguard Worker         case AMDGPU::RETURN: {
647*9880d681SAndroid Build Coastguard Worker           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
648*9880d681SAndroid Build Coastguard Worker           CfCount++;
649*9880d681SAndroid Build Coastguard Worker           if (CfCount % 2) {
650*9880d681SAndroid Build Coastguard Worker             BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
651*9880d681SAndroid Build Coastguard Worker             CfCount++;
652*9880d681SAndroid Build Coastguard Worker           }
653*9880d681SAndroid Build Coastguard Worker           MI->eraseFromParent();
654*9880d681SAndroid Build Coastguard Worker           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
655*9880d681SAndroid Build Coastguard Worker             EmitFetchClause(I, FetchClauses[i], CfCount);
656*9880d681SAndroid Build Coastguard Worker           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
657*9880d681SAndroid Build Coastguard Worker             EmitALUClause(I, AluClauses[i], CfCount);
658*9880d681SAndroid Build Coastguard Worker           break;
659*9880d681SAndroid Build Coastguard Worker         }
660*9880d681SAndroid Build Coastguard Worker         default:
661*9880d681SAndroid Build Coastguard Worker           if (TII->isExport(MI->getOpcode())) {
662*9880d681SAndroid Build Coastguard Worker             DEBUG(dbgs() << CfCount << ":"; MI->dump(););
663*9880d681SAndroid Build Coastguard Worker             CfCount++;
664*9880d681SAndroid Build Coastguard Worker           }
665*9880d681SAndroid Build Coastguard Worker           break;
666*9880d681SAndroid Build Coastguard Worker         }
667*9880d681SAndroid Build Coastguard Worker       }
668*9880d681SAndroid Build Coastguard Worker       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
669*9880d681SAndroid Build Coastguard Worker         MachineInstr *Alu = ToPopAfter[i];
670*9880d681SAndroid Build Coastguard Worker         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
671*9880d681SAndroid Build Coastguard Worker             TII->get(AMDGPU::CF_ALU_POP_AFTER))
672*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(0).getImm())
673*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(1).getImm())
674*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(2).getImm())
675*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(3).getImm())
676*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(4).getImm())
677*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(5).getImm())
678*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(6).getImm())
679*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(7).getImm())
680*9880d681SAndroid Build Coastguard Worker             .addImm(Alu->getOperand(8).getImm());
681*9880d681SAndroid Build Coastguard Worker         Alu->eraseFromParent();
682*9880d681SAndroid Build Coastguard Worker       }
683*9880d681SAndroid Build Coastguard Worker       MFI->StackSize = CFStack.MaxStackSize;
684*9880d681SAndroid Build Coastguard Worker     }
685*9880d681SAndroid Build Coastguard Worker 
686*9880d681SAndroid Build Coastguard Worker     return false;
687*9880d681SAndroid Build Coastguard Worker   }
688*9880d681SAndroid Build Coastguard Worker 
getPassName() const689*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
690*9880d681SAndroid Build Coastguard Worker     return "R600 Control Flow Finalizer Pass";
691*9880d681SAndroid Build Coastguard Worker   }
692*9880d681SAndroid Build Coastguard Worker };
693*9880d681SAndroid Build Coastguard Worker 
694*9880d681SAndroid Build Coastguard Worker char R600ControlFlowFinalizer::ID = 0;
695*9880d681SAndroid Build Coastguard Worker 
696*9880d681SAndroid Build Coastguard Worker } // end anonymous namespace
697*9880d681SAndroid Build Coastguard Worker 
698*9880d681SAndroid Build Coastguard Worker 
createR600ControlFlowFinalizer(TargetMachine & TM)699*9880d681SAndroid Build Coastguard Worker llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
700*9880d681SAndroid Build Coastguard Worker   return new R600ControlFlowFinalizer(TM);
701*9880d681SAndroid Build Coastguard Worker }
702