xref: /aosp_15_r20/external/llvm/lib/Target/X86/X86VZeroUpper.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This file defines the pass which inserts x86 AVX vzeroupper instructions
11*9880d681SAndroid Build Coastguard Worker // before calls to SSE encoded functions. This avoids transition latency
12*9880d681SAndroid Build Coastguard Worker // penalty when transferring control between AVX encoded instructions and old
13*9880d681SAndroid Build Coastguard Worker // SSE encoding mode.
14*9880d681SAndroid Build Coastguard Worker //
15*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
16*9880d681SAndroid Build Coastguard Worker 
17*9880d681SAndroid Build Coastguard Worker #include "X86.h"
18*9880d681SAndroid Build Coastguard Worker #include "X86InstrInfo.h"
19*9880d681SAndroid Build Coastguard Worker #include "X86Subtarget.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/Passes.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetInstrInfo.h"
28*9880d681SAndroid Build Coastguard Worker using namespace llvm;
29*9880d681SAndroid Build Coastguard Worker 
30*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "x86-vzeroupper"
31*9880d681SAndroid Build Coastguard Worker 
32*9880d681SAndroid Build Coastguard Worker STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
33*9880d681SAndroid Build Coastguard Worker 
34*9880d681SAndroid Build Coastguard Worker namespace {
35*9880d681SAndroid Build Coastguard Worker 
36*9880d681SAndroid Build Coastguard Worker   class VZeroUpperInserter : public MachineFunctionPass {
37*9880d681SAndroid Build Coastguard Worker   public:
38*9880d681SAndroid Build Coastguard Worker 
VZeroUpperInserter()39*9880d681SAndroid Build Coastguard Worker     VZeroUpperInserter() : MachineFunctionPass(ID) {}
40*9880d681SAndroid Build Coastguard Worker     bool runOnMachineFunction(MachineFunction &MF) override;
getRequiredProperties() const41*9880d681SAndroid Build Coastguard Worker     MachineFunctionProperties getRequiredProperties() const override {
42*9880d681SAndroid Build Coastguard Worker       return MachineFunctionProperties().set(
43*9880d681SAndroid Build Coastguard Worker           MachineFunctionProperties::Property::AllVRegsAllocated);
44*9880d681SAndroid Build Coastguard Worker     }
getPassName() const45*9880d681SAndroid Build Coastguard Worker     const char *getPassName() const override {return "X86 vzeroupper inserter";}
46*9880d681SAndroid Build Coastguard Worker 
47*9880d681SAndroid Build Coastguard Worker   private:
48*9880d681SAndroid Build Coastguard Worker 
49*9880d681SAndroid Build Coastguard Worker     void processBasicBlock(MachineBasicBlock &MBB);
50*9880d681SAndroid Build Coastguard Worker     void insertVZeroUpper(MachineBasicBlock::iterator I,
51*9880d681SAndroid Build Coastguard Worker                           MachineBasicBlock &MBB);
52*9880d681SAndroid Build Coastguard Worker     void addDirtySuccessor(MachineBasicBlock &MBB);
53*9880d681SAndroid Build Coastguard Worker 
54*9880d681SAndroid Build Coastguard Worker     typedef enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY } BlockExitState;
55*9880d681SAndroid Build Coastguard Worker     static const char* getBlockExitStateName(BlockExitState ST);
56*9880d681SAndroid Build Coastguard Worker 
57*9880d681SAndroid Build Coastguard Worker     // Core algorithm state:
58*9880d681SAndroid Build Coastguard Worker     // BlockState - Each block is either:
59*9880d681SAndroid Build Coastguard Worker     //   - PASS_THROUGH: There are neither YMM dirtying instructions nor
60*9880d681SAndroid Build Coastguard Worker     //                   vzeroupper instructions in this block.
61*9880d681SAndroid Build Coastguard Worker     //   - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
62*9880d681SAndroid Build Coastguard Worker     //                  block that will ensure that YMM is clean on exit.
63*9880d681SAndroid Build Coastguard Worker     //   - EXITS_DIRTY: An instruction in the block dirties YMM and no
64*9880d681SAndroid Build Coastguard Worker     //                  subsequent vzeroupper in the block clears it.
65*9880d681SAndroid Build Coastguard Worker     //
66*9880d681SAndroid Build Coastguard Worker     // AddedToDirtySuccessors - This flag is raised when a block is added to the
67*9880d681SAndroid Build Coastguard Worker     //                          DirtySuccessors list to ensure that it's not
68*9880d681SAndroid Build Coastguard Worker     //                          added multiple times.
69*9880d681SAndroid Build Coastguard Worker     //
70*9880d681SAndroid Build Coastguard Worker     // FirstUnguardedCall - Records the location of the first unguarded call in
71*9880d681SAndroid Build Coastguard Worker     //                      each basic block that may need to be guarded by a
72*9880d681SAndroid Build Coastguard Worker     //                      vzeroupper. We won't know whether it actually needs
73*9880d681SAndroid Build Coastguard Worker     //                      to be guarded until we discover a predecessor that
74*9880d681SAndroid Build Coastguard Worker     //                      is DIRTY_OUT.
75*9880d681SAndroid Build Coastguard Worker     struct BlockState {
BlockState__anon08a8f69b0111::VZeroUpperInserter::BlockState76*9880d681SAndroid Build Coastguard Worker       BlockState() : ExitState(PASS_THROUGH), AddedToDirtySuccessors(false) {}
77*9880d681SAndroid Build Coastguard Worker       BlockExitState ExitState;
78*9880d681SAndroid Build Coastguard Worker       bool AddedToDirtySuccessors;
79*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock::iterator FirstUnguardedCall;
80*9880d681SAndroid Build Coastguard Worker     };
81*9880d681SAndroid Build Coastguard Worker     typedef SmallVector<BlockState, 8> BlockStateMap;
82*9880d681SAndroid Build Coastguard Worker     typedef SmallVector<MachineBasicBlock*, 8> DirtySuccessorsWorkList;
83*9880d681SAndroid Build Coastguard Worker 
84*9880d681SAndroid Build Coastguard Worker     BlockStateMap BlockStates;
85*9880d681SAndroid Build Coastguard Worker     DirtySuccessorsWorkList DirtySuccessors;
86*9880d681SAndroid Build Coastguard Worker     bool EverMadeChange;
87*9880d681SAndroid Build Coastguard Worker     bool IsX86INTR;
88*9880d681SAndroid Build Coastguard Worker     const TargetInstrInfo *TII;
89*9880d681SAndroid Build Coastguard Worker 
90*9880d681SAndroid Build Coastguard Worker     static char ID;
91*9880d681SAndroid Build Coastguard Worker   };
92*9880d681SAndroid Build Coastguard Worker 
93*9880d681SAndroid Build Coastguard Worker   char VZeroUpperInserter::ID = 0;
94*9880d681SAndroid Build Coastguard Worker }
95*9880d681SAndroid Build Coastguard Worker 
createX86IssueVZeroUpperPass()96*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createX86IssueVZeroUpperPass() {
97*9880d681SAndroid Build Coastguard Worker   return new VZeroUpperInserter();
98*9880d681SAndroid Build Coastguard Worker }
99*9880d681SAndroid Build Coastguard Worker 
getBlockExitStateName(BlockExitState ST)100*9880d681SAndroid Build Coastguard Worker const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
101*9880d681SAndroid Build Coastguard Worker   switch (ST) {
102*9880d681SAndroid Build Coastguard Worker     case PASS_THROUGH: return "Pass-through";
103*9880d681SAndroid Build Coastguard Worker     case EXITS_DIRTY: return "Exits-dirty";
104*9880d681SAndroid Build Coastguard Worker     case EXITS_CLEAN: return "Exits-clean";
105*9880d681SAndroid Build Coastguard Worker   }
106*9880d681SAndroid Build Coastguard Worker   llvm_unreachable("Invalid block exit state.");
107*9880d681SAndroid Build Coastguard Worker }
108*9880d681SAndroid Build Coastguard Worker 
isYmmReg(unsigned Reg)109*9880d681SAndroid Build Coastguard Worker static bool isYmmReg(unsigned Reg) {
110*9880d681SAndroid Build Coastguard Worker   return (Reg >= X86::YMM0 && Reg <= X86::YMM15);
111*9880d681SAndroid Build Coastguard Worker }
112*9880d681SAndroid Build Coastguard Worker 
checkFnHasLiveInYmm(MachineRegisterInfo & MRI)113*9880d681SAndroid Build Coastguard Worker static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
114*9880d681SAndroid Build Coastguard Worker   for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
115*9880d681SAndroid Build Coastguard Worker        E = MRI.livein_end(); I != E; ++I)
116*9880d681SAndroid Build Coastguard Worker     if (isYmmReg(I->first))
117*9880d681SAndroid Build Coastguard Worker       return true;
118*9880d681SAndroid Build Coastguard Worker 
119*9880d681SAndroid Build Coastguard Worker   return false;
120*9880d681SAndroid Build Coastguard Worker }
121*9880d681SAndroid Build Coastguard Worker 
clobbersAllYmmRegs(const MachineOperand & MO)122*9880d681SAndroid Build Coastguard Worker static bool clobbersAllYmmRegs(const MachineOperand &MO) {
123*9880d681SAndroid Build Coastguard Worker   for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
124*9880d681SAndroid Build Coastguard Worker     if (!MO.clobbersPhysReg(reg))
125*9880d681SAndroid Build Coastguard Worker       return false;
126*9880d681SAndroid Build Coastguard Worker   }
127*9880d681SAndroid Build Coastguard Worker   return true;
128*9880d681SAndroid Build Coastguard Worker }
129*9880d681SAndroid Build Coastguard Worker 
hasYmmReg(MachineInstr & MI)130*9880d681SAndroid Build Coastguard Worker static bool hasYmmReg(MachineInstr &MI) {
131*9880d681SAndroid Build Coastguard Worker   for (const MachineOperand &MO : MI.operands()) {
132*9880d681SAndroid Build Coastguard Worker     if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
133*9880d681SAndroid Build Coastguard Worker       return true;
134*9880d681SAndroid Build Coastguard Worker     if (!MO.isReg())
135*9880d681SAndroid Build Coastguard Worker       continue;
136*9880d681SAndroid Build Coastguard Worker     if (MO.isDebug())
137*9880d681SAndroid Build Coastguard Worker       continue;
138*9880d681SAndroid Build Coastguard Worker     if (isYmmReg(MO.getReg()))
139*9880d681SAndroid Build Coastguard Worker       return true;
140*9880d681SAndroid Build Coastguard Worker   }
141*9880d681SAndroid Build Coastguard Worker   return false;
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker 
144*9880d681SAndroid Build Coastguard Worker /// Check if any YMM register will be clobbered by this instruction.
callClobbersAnyYmmReg(MachineInstr & MI)145*9880d681SAndroid Build Coastguard Worker static bool callClobbersAnyYmmReg(MachineInstr &MI) {
146*9880d681SAndroid Build Coastguard Worker   assert(MI.isCall() && "Can only be called on call instructions.");
147*9880d681SAndroid Build Coastguard Worker   for (const MachineOperand &MO : MI.operands()) {
148*9880d681SAndroid Build Coastguard Worker     if (!MO.isRegMask())
149*9880d681SAndroid Build Coastguard Worker       continue;
150*9880d681SAndroid Build Coastguard Worker     for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
151*9880d681SAndroid Build Coastguard Worker       if (MO.clobbersPhysReg(reg))
152*9880d681SAndroid Build Coastguard Worker         return true;
153*9880d681SAndroid Build Coastguard Worker     }
154*9880d681SAndroid Build Coastguard Worker   }
155*9880d681SAndroid Build Coastguard Worker   return false;
156*9880d681SAndroid Build Coastguard Worker }
157*9880d681SAndroid Build Coastguard Worker 
158*9880d681SAndroid Build Coastguard Worker /// Insert a vzeroupper instruction before I.
insertVZeroUpper(MachineBasicBlock::iterator I,MachineBasicBlock & MBB)159*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
160*9880d681SAndroid Build Coastguard Worker                                           MachineBasicBlock &MBB) {
161*9880d681SAndroid Build Coastguard Worker   DebugLoc dl = I->getDebugLoc();
162*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
163*9880d681SAndroid Build Coastguard Worker   ++NumVZU;
164*9880d681SAndroid Build Coastguard Worker   EverMadeChange = true;
165*9880d681SAndroid Build Coastguard Worker }
166*9880d681SAndroid Build Coastguard Worker 
167*9880d681SAndroid Build Coastguard Worker /// Add MBB to the DirtySuccessors list if it hasn't already been added.
addDirtySuccessor(MachineBasicBlock & MBB)168*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
169*9880d681SAndroid Build Coastguard Worker   if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
170*9880d681SAndroid Build Coastguard Worker     DirtySuccessors.push_back(&MBB);
171*9880d681SAndroid Build Coastguard Worker     BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
172*9880d681SAndroid Build Coastguard Worker   }
173*9880d681SAndroid Build Coastguard Worker }
174*9880d681SAndroid Build Coastguard Worker 
175*9880d681SAndroid Build Coastguard Worker /// Loop over all of the instructions in the basic block, inserting vzeroupper
176*9880d681SAndroid Build Coastguard Worker /// instructions before function calls.
processBasicBlock(MachineBasicBlock & MBB)177*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
178*9880d681SAndroid Build Coastguard Worker 
179*9880d681SAndroid Build Coastguard Worker   // Start by assuming that the block is PASS_THROUGH which implies no unguarded
180*9880d681SAndroid Build Coastguard Worker   // calls.
181*9880d681SAndroid Build Coastguard Worker   BlockExitState CurState = PASS_THROUGH;
182*9880d681SAndroid Build Coastguard Worker   BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
183*9880d681SAndroid Build Coastguard Worker 
184*9880d681SAndroid Build Coastguard Worker   for (MachineInstr &MI : MBB) {
185*9880d681SAndroid Build Coastguard Worker     // No need for vzeroupper before iret in interrupt handler function,
186*9880d681SAndroid Build Coastguard Worker     // epilogue will restore YMM registers if needed.
187*9880d681SAndroid Build Coastguard Worker     bool IsReturnFromX86INTR = IsX86INTR && MI.isReturn();
188*9880d681SAndroid Build Coastguard Worker     bool IsControlFlow = MI.isCall() || MI.isReturn();
189*9880d681SAndroid Build Coastguard Worker 
190*9880d681SAndroid Build Coastguard Worker     // An existing VZERO* instruction resets the state.
191*9880d681SAndroid Build Coastguard Worker     if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
192*9880d681SAndroid Build Coastguard Worker       CurState = EXITS_CLEAN;
193*9880d681SAndroid Build Coastguard Worker       continue;
194*9880d681SAndroid Build Coastguard Worker     }
195*9880d681SAndroid Build Coastguard Worker 
196*9880d681SAndroid Build Coastguard Worker     // Shortcut: don't need to check regular instructions in dirty state.
197*9880d681SAndroid Build Coastguard Worker     if ((!IsControlFlow || IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
198*9880d681SAndroid Build Coastguard Worker       continue;
199*9880d681SAndroid Build Coastguard Worker 
200*9880d681SAndroid Build Coastguard Worker     if (hasYmmReg(MI)) {
201*9880d681SAndroid Build Coastguard Worker       // We found a ymm-using instruction; this could be an AVX instruction,
202*9880d681SAndroid Build Coastguard Worker       // or it could be control flow.
203*9880d681SAndroid Build Coastguard Worker       CurState = EXITS_DIRTY;
204*9880d681SAndroid Build Coastguard Worker       continue;
205*9880d681SAndroid Build Coastguard Worker     }
206*9880d681SAndroid Build Coastguard Worker 
207*9880d681SAndroid Build Coastguard Worker     // Check for control-flow out of the current function (which might
208*9880d681SAndroid Build Coastguard Worker     // indirectly execute SSE instructions).
209*9880d681SAndroid Build Coastguard Worker     if (!IsControlFlow || IsReturnFromX86INTR)
210*9880d681SAndroid Build Coastguard Worker       continue;
211*9880d681SAndroid Build Coastguard Worker 
212*9880d681SAndroid Build Coastguard Worker     // If the call won't clobber any YMM register, skip it as well. It usually
213*9880d681SAndroid Build Coastguard Worker     // happens on helper function calls (such as '_chkstk', '_ftol2') where
214*9880d681SAndroid Build Coastguard Worker     // standard calling convention is not used (RegMask is not used to mark
215*9880d681SAndroid Build Coastguard Worker     // register clobbered and register usage (def/imp-def/use) is well-defined
216*9880d681SAndroid Build Coastguard Worker     // and explicitly specified.
217*9880d681SAndroid Build Coastguard Worker     if (MI.isCall() && !callClobbersAnyYmmReg(MI))
218*9880d681SAndroid Build Coastguard Worker       continue;
219*9880d681SAndroid Build Coastguard Worker 
220*9880d681SAndroid Build Coastguard Worker     // The VZEROUPPER instruction resets the upper 128 bits of all AVX
221*9880d681SAndroid Build Coastguard Worker     // registers. In addition, the processor changes back to Clean state, after
222*9880d681SAndroid Build Coastguard Worker     // which execution of SSE instructions or AVX instructions has no transition
223*9880d681SAndroid Build Coastguard Worker     // penalty. Add the VZEROUPPER instruction before any function call/return
224*9880d681SAndroid Build Coastguard Worker     // that might execute SSE code.
225*9880d681SAndroid Build Coastguard Worker     // FIXME: In some cases, we may want to move the VZEROUPPER into a
226*9880d681SAndroid Build Coastguard Worker     // predecessor block.
227*9880d681SAndroid Build Coastguard Worker     if (CurState == EXITS_DIRTY) {
228*9880d681SAndroid Build Coastguard Worker       // After the inserted VZEROUPPER the state becomes clean again, but
229*9880d681SAndroid Build Coastguard Worker       // other YMM may appear before other subsequent calls or even before
230*9880d681SAndroid Build Coastguard Worker       // the end of the BB.
231*9880d681SAndroid Build Coastguard Worker       insertVZeroUpper(MI, MBB);
232*9880d681SAndroid Build Coastguard Worker       CurState = EXITS_CLEAN;
233*9880d681SAndroid Build Coastguard Worker     } else if (CurState == PASS_THROUGH) {
234*9880d681SAndroid Build Coastguard Worker       // If this block is currently in pass-through state and we encounter a
235*9880d681SAndroid Build Coastguard Worker       // call then whether we need a vzeroupper or not depends on whether this
236*9880d681SAndroid Build Coastguard Worker       // block has successors that exit dirty. Record the location of the call,
237*9880d681SAndroid Build Coastguard Worker       // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
238*9880d681SAndroid Build Coastguard Worker       // It will be inserted later if necessary.
239*9880d681SAndroid Build Coastguard Worker       BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
240*9880d681SAndroid Build Coastguard Worker       CurState = EXITS_CLEAN;
241*9880d681SAndroid Build Coastguard Worker     }
242*9880d681SAndroid Build Coastguard Worker   }
243*9880d681SAndroid Build Coastguard Worker 
244*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
245*9880d681SAndroid Build Coastguard Worker                << getBlockExitStateName(CurState) << '\n');
246*9880d681SAndroid Build Coastguard Worker 
247*9880d681SAndroid Build Coastguard Worker   if (CurState == EXITS_DIRTY)
248*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
249*9880d681SAndroid Build Coastguard Worker                                           SE = MBB.succ_end();
250*9880d681SAndroid Build Coastguard Worker          SI != SE; ++SI)
251*9880d681SAndroid Build Coastguard Worker       addDirtySuccessor(**SI);
252*9880d681SAndroid Build Coastguard Worker 
253*9880d681SAndroid Build Coastguard Worker   BlockStates[MBB.getNumber()].ExitState = CurState;
254*9880d681SAndroid Build Coastguard Worker }
255*9880d681SAndroid Build Coastguard Worker 
256*9880d681SAndroid Build Coastguard Worker /// Loop over all of the basic blocks, inserting vzeroupper instructions before
257*9880d681SAndroid Build Coastguard Worker /// function calls.
runOnMachineFunction(MachineFunction & MF)258*9880d681SAndroid Build Coastguard Worker bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
259*9880d681SAndroid Build Coastguard Worker   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
260*9880d681SAndroid Build Coastguard Worker   if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
261*9880d681SAndroid Build Coastguard Worker     return false;
262*9880d681SAndroid Build Coastguard Worker   TII = ST.getInstrInfo();
263*9880d681SAndroid Build Coastguard Worker   MachineRegisterInfo &MRI = MF.getRegInfo();
264*9880d681SAndroid Build Coastguard Worker   EverMadeChange = false;
265*9880d681SAndroid Build Coastguard Worker   IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR;
266*9880d681SAndroid Build Coastguard Worker 
267*9880d681SAndroid Build Coastguard Worker   bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
268*9880d681SAndroid Build Coastguard Worker 
269*9880d681SAndroid Build Coastguard Worker   // Fast check: if the function doesn't use any ymm registers, we don't need
270*9880d681SAndroid Build Coastguard Worker   // to insert any VZEROUPPER instructions.  This is constant-time, so it is
271*9880d681SAndroid Build Coastguard Worker   // cheap in the common case of no ymm use.
272*9880d681SAndroid Build Coastguard Worker   bool YMMUsed = FnHasLiveInYmm;
273*9880d681SAndroid Build Coastguard Worker   if (!YMMUsed) {
274*9880d681SAndroid Build Coastguard Worker     const TargetRegisterClass *RC = &X86::VR256RegClass;
275*9880d681SAndroid Build Coastguard Worker     for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
276*9880d681SAndroid Build Coastguard Worker          i++) {
277*9880d681SAndroid Build Coastguard Worker       if (!MRI.reg_nodbg_empty(*i)) {
278*9880d681SAndroid Build Coastguard Worker         YMMUsed = true;
279*9880d681SAndroid Build Coastguard Worker         break;
280*9880d681SAndroid Build Coastguard Worker       }
281*9880d681SAndroid Build Coastguard Worker     }
282*9880d681SAndroid Build Coastguard Worker   }
283*9880d681SAndroid Build Coastguard Worker   if (!YMMUsed) {
284*9880d681SAndroid Build Coastguard Worker     return false;
285*9880d681SAndroid Build Coastguard Worker   }
286*9880d681SAndroid Build Coastguard Worker 
287*9880d681SAndroid Build Coastguard Worker   assert(BlockStates.empty() && DirtySuccessors.empty() &&
288*9880d681SAndroid Build Coastguard Worker          "X86VZeroUpper state should be clear");
289*9880d681SAndroid Build Coastguard Worker   BlockStates.resize(MF.getNumBlockIDs());
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker   // Process all blocks. This will compute block exit states, record the first
292*9880d681SAndroid Build Coastguard Worker   // unguarded call in each block, and add successors of dirty blocks to the
293*9880d681SAndroid Build Coastguard Worker   // DirtySuccessors list.
294*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock &MBB : MF)
295*9880d681SAndroid Build Coastguard Worker     processBasicBlock(MBB);
296*9880d681SAndroid Build Coastguard Worker 
297*9880d681SAndroid Build Coastguard Worker   // If any YMM regs are live-in to this function, add the entry block to the
298*9880d681SAndroid Build Coastguard Worker   // DirtySuccessors list
299*9880d681SAndroid Build Coastguard Worker   if (FnHasLiveInYmm)
300*9880d681SAndroid Build Coastguard Worker     addDirtySuccessor(MF.front());
301*9880d681SAndroid Build Coastguard Worker 
302*9880d681SAndroid Build Coastguard Worker   // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
303*9880d681SAndroid Build Coastguard Worker   // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
304*9880d681SAndroid Build Coastguard Worker   // through PASS_THROUGH blocks.
305*9880d681SAndroid Build Coastguard Worker   while (!DirtySuccessors.empty()) {
306*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *DirtySuccessors.back();
307*9880d681SAndroid Build Coastguard Worker     DirtySuccessors.pop_back();
308*9880d681SAndroid Build Coastguard Worker     BlockState &BBState = BlockStates[MBB.getNumber()];
309*9880d681SAndroid Build Coastguard Worker 
310*9880d681SAndroid Build Coastguard Worker     // MBB is a successor of a dirty block, so its first call needs to be
311*9880d681SAndroid Build Coastguard Worker     // guarded.
312*9880d681SAndroid Build Coastguard Worker     if (BBState.FirstUnguardedCall != MBB.end())
313*9880d681SAndroid Build Coastguard Worker       insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
314*9880d681SAndroid Build Coastguard Worker 
315*9880d681SAndroid Build Coastguard Worker     // If this successor was a pass-through block, then it is now dirty. Its
316*9880d681SAndroid Build Coastguard Worker     // successors need to be added to the worklist (if they haven't been
317*9880d681SAndroid Build Coastguard Worker     // already).
318*9880d681SAndroid Build Coastguard Worker     if (BBState.ExitState == PASS_THROUGH) {
319*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "MBB #" << MBB.getNumber()
320*9880d681SAndroid Build Coastguard Worker                    << " was Pass-through, is now Dirty-out.\n");
321*9880d681SAndroid Build Coastguard Worker       for (MachineBasicBlock *Succ : MBB.successors())
322*9880d681SAndroid Build Coastguard Worker         addDirtySuccessor(*Succ);
323*9880d681SAndroid Build Coastguard Worker     }
324*9880d681SAndroid Build Coastguard Worker   }
325*9880d681SAndroid Build Coastguard Worker 
326*9880d681SAndroid Build Coastguard Worker   BlockStates.clear();
327*9880d681SAndroid Build Coastguard Worker   return EverMadeChange;
328*9880d681SAndroid Build Coastguard Worker }
329