1*9880d681SAndroid Build Coastguard Worker //===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This file defines the pass which inserts x86 AVX vzeroupper instructions
11*9880d681SAndroid Build Coastguard Worker // before calls to SSE encoded functions. This avoids transition latency
12*9880d681SAndroid Build Coastguard Worker // penalty when transferring control between AVX encoded instructions and old
13*9880d681SAndroid Build Coastguard Worker // SSE encoding mode.
14*9880d681SAndroid Build Coastguard Worker //
15*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
16*9880d681SAndroid Build Coastguard Worker
17*9880d681SAndroid Build Coastguard Worker #include "X86.h"
18*9880d681SAndroid Build Coastguard Worker #include "X86InstrInfo.h"
19*9880d681SAndroid Build Coastguard Worker #include "X86Subtarget.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/Passes.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetInstrInfo.h"
28*9880d681SAndroid Build Coastguard Worker using namespace llvm;
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "x86-vzeroupper"
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Worker STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Worker namespace {
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Worker class VZeroUpperInserter : public MachineFunctionPass {
37*9880d681SAndroid Build Coastguard Worker public:
38*9880d681SAndroid Build Coastguard Worker
VZeroUpperInserter()39*9880d681SAndroid Build Coastguard Worker VZeroUpperInserter() : MachineFunctionPass(ID) {}
40*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
getRequiredProperties() const41*9880d681SAndroid Build Coastguard Worker MachineFunctionProperties getRequiredProperties() const override {
42*9880d681SAndroid Build Coastguard Worker return MachineFunctionProperties().set(
43*9880d681SAndroid Build Coastguard Worker MachineFunctionProperties::Property::AllVRegsAllocated);
44*9880d681SAndroid Build Coastguard Worker }
getPassName() const45*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {return "X86 vzeroupper inserter";}
46*9880d681SAndroid Build Coastguard Worker
47*9880d681SAndroid Build Coastguard Worker private:
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker void processBasicBlock(MachineBasicBlock &MBB);
50*9880d681SAndroid Build Coastguard Worker void insertVZeroUpper(MachineBasicBlock::iterator I,
51*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB);
52*9880d681SAndroid Build Coastguard Worker void addDirtySuccessor(MachineBasicBlock &MBB);
53*9880d681SAndroid Build Coastguard Worker
54*9880d681SAndroid Build Coastguard Worker typedef enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY } BlockExitState;
55*9880d681SAndroid Build Coastguard Worker static const char* getBlockExitStateName(BlockExitState ST);
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Worker // Core algorithm state:
58*9880d681SAndroid Build Coastguard Worker // BlockState - Each block is either:
59*9880d681SAndroid Build Coastguard Worker // - PASS_THROUGH: There are neither YMM dirtying instructions nor
60*9880d681SAndroid Build Coastguard Worker // vzeroupper instructions in this block.
61*9880d681SAndroid Build Coastguard Worker // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
62*9880d681SAndroid Build Coastguard Worker // block that will ensure that YMM is clean on exit.
63*9880d681SAndroid Build Coastguard Worker // - EXITS_DIRTY: An instruction in the block dirties YMM and no
64*9880d681SAndroid Build Coastguard Worker // subsequent vzeroupper in the block clears it.
65*9880d681SAndroid Build Coastguard Worker //
66*9880d681SAndroid Build Coastguard Worker // AddedToDirtySuccessors - This flag is raised when a block is added to the
67*9880d681SAndroid Build Coastguard Worker // DirtySuccessors list to ensure that it's not
68*9880d681SAndroid Build Coastguard Worker // added multiple times.
69*9880d681SAndroid Build Coastguard Worker //
70*9880d681SAndroid Build Coastguard Worker // FirstUnguardedCall - Records the location of the first unguarded call in
71*9880d681SAndroid Build Coastguard Worker // each basic block that may need to be guarded by a
72*9880d681SAndroid Build Coastguard Worker // vzeroupper. We won't know whether it actually needs
73*9880d681SAndroid Build Coastguard Worker // to be guarded until we discover a predecessor that
74*9880d681SAndroid Build Coastguard Worker // is DIRTY_OUT.
75*9880d681SAndroid Build Coastguard Worker struct BlockState {
BlockState__anon08a8f69b0111::VZeroUpperInserter::BlockState76*9880d681SAndroid Build Coastguard Worker BlockState() : ExitState(PASS_THROUGH), AddedToDirtySuccessors(false) {}
77*9880d681SAndroid Build Coastguard Worker BlockExitState ExitState;
78*9880d681SAndroid Build Coastguard Worker bool AddedToDirtySuccessors;
79*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator FirstUnguardedCall;
80*9880d681SAndroid Build Coastguard Worker };
81*9880d681SAndroid Build Coastguard Worker typedef SmallVector<BlockState, 8> BlockStateMap;
82*9880d681SAndroid Build Coastguard Worker typedef SmallVector<MachineBasicBlock*, 8> DirtySuccessorsWorkList;
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker BlockStateMap BlockStates;
85*9880d681SAndroid Build Coastguard Worker DirtySuccessorsWorkList DirtySuccessors;
86*9880d681SAndroid Build Coastguard Worker bool EverMadeChange;
87*9880d681SAndroid Build Coastguard Worker bool IsX86INTR;
88*9880d681SAndroid Build Coastguard Worker const TargetInstrInfo *TII;
89*9880d681SAndroid Build Coastguard Worker
90*9880d681SAndroid Build Coastguard Worker static char ID;
91*9880d681SAndroid Build Coastguard Worker };
92*9880d681SAndroid Build Coastguard Worker
93*9880d681SAndroid Build Coastguard Worker char VZeroUpperInserter::ID = 0;
94*9880d681SAndroid Build Coastguard Worker }
95*9880d681SAndroid Build Coastguard Worker
createX86IssueVZeroUpperPass()96*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createX86IssueVZeroUpperPass() {
97*9880d681SAndroid Build Coastguard Worker return new VZeroUpperInserter();
98*9880d681SAndroid Build Coastguard Worker }
99*9880d681SAndroid Build Coastguard Worker
getBlockExitStateName(BlockExitState ST)100*9880d681SAndroid Build Coastguard Worker const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
101*9880d681SAndroid Build Coastguard Worker switch (ST) {
102*9880d681SAndroid Build Coastguard Worker case PASS_THROUGH: return "Pass-through";
103*9880d681SAndroid Build Coastguard Worker case EXITS_DIRTY: return "Exits-dirty";
104*9880d681SAndroid Build Coastguard Worker case EXITS_CLEAN: return "Exits-clean";
105*9880d681SAndroid Build Coastguard Worker }
106*9880d681SAndroid Build Coastguard Worker llvm_unreachable("Invalid block exit state.");
107*9880d681SAndroid Build Coastguard Worker }
108*9880d681SAndroid Build Coastguard Worker
isYmmReg(unsigned Reg)109*9880d681SAndroid Build Coastguard Worker static bool isYmmReg(unsigned Reg) {
110*9880d681SAndroid Build Coastguard Worker return (Reg >= X86::YMM0 && Reg <= X86::YMM15);
111*9880d681SAndroid Build Coastguard Worker }
112*9880d681SAndroid Build Coastguard Worker
checkFnHasLiveInYmm(MachineRegisterInfo & MRI)113*9880d681SAndroid Build Coastguard Worker static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
114*9880d681SAndroid Build Coastguard Worker for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
115*9880d681SAndroid Build Coastguard Worker E = MRI.livein_end(); I != E; ++I)
116*9880d681SAndroid Build Coastguard Worker if (isYmmReg(I->first))
117*9880d681SAndroid Build Coastguard Worker return true;
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker return false;
120*9880d681SAndroid Build Coastguard Worker }
121*9880d681SAndroid Build Coastguard Worker
clobbersAllYmmRegs(const MachineOperand & MO)122*9880d681SAndroid Build Coastguard Worker static bool clobbersAllYmmRegs(const MachineOperand &MO) {
123*9880d681SAndroid Build Coastguard Worker for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
124*9880d681SAndroid Build Coastguard Worker if (!MO.clobbersPhysReg(reg))
125*9880d681SAndroid Build Coastguard Worker return false;
126*9880d681SAndroid Build Coastguard Worker }
127*9880d681SAndroid Build Coastguard Worker return true;
128*9880d681SAndroid Build Coastguard Worker }
129*9880d681SAndroid Build Coastguard Worker
hasYmmReg(MachineInstr & MI)130*9880d681SAndroid Build Coastguard Worker static bool hasYmmReg(MachineInstr &MI) {
131*9880d681SAndroid Build Coastguard Worker for (const MachineOperand &MO : MI.operands()) {
132*9880d681SAndroid Build Coastguard Worker if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
133*9880d681SAndroid Build Coastguard Worker return true;
134*9880d681SAndroid Build Coastguard Worker if (!MO.isReg())
135*9880d681SAndroid Build Coastguard Worker continue;
136*9880d681SAndroid Build Coastguard Worker if (MO.isDebug())
137*9880d681SAndroid Build Coastguard Worker continue;
138*9880d681SAndroid Build Coastguard Worker if (isYmmReg(MO.getReg()))
139*9880d681SAndroid Build Coastguard Worker return true;
140*9880d681SAndroid Build Coastguard Worker }
141*9880d681SAndroid Build Coastguard Worker return false;
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Worker /// Check if any YMM register will be clobbered by this instruction.
callClobbersAnyYmmReg(MachineInstr & MI)145*9880d681SAndroid Build Coastguard Worker static bool callClobbersAnyYmmReg(MachineInstr &MI) {
146*9880d681SAndroid Build Coastguard Worker assert(MI.isCall() && "Can only be called on call instructions.");
147*9880d681SAndroid Build Coastguard Worker for (const MachineOperand &MO : MI.operands()) {
148*9880d681SAndroid Build Coastguard Worker if (!MO.isRegMask())
149*9880d681SAndroid Build Coastguard Worker continue;
150*9880d681SAndroid Build Coastguard Worker for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
151*9880d681SAndroid Build Coastguard Worker if (MO.clobbersPhysReg(reg))
152*9880d681SAndroid Build Coastguard Worker return true;
153*9880d681SAndroid Build Coastguard Worker }
154*9880d681SAndroid Build Coastguard Worker }
155*9880d681SAndroid Build Coastguard Worker return false;
156*9880d681SAndroid Build Coastguard Worker }
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Worker /// Insert a vzeroupper instruction before I.
insertVZeroUpper(MachineBasicBlock::iterator I,MachineBasicBlock & MBB)159*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
160*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB) {
161*9880d681SAndroid Build Coastguard Worker DebugLoc dl = I->getDebugLoc();
162*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
163*9880d681SAndroid Build Coastguard Worker ++NumVZU;
164*9880d681SAndroid Build Coastguard Worker EverMadeChange = true;
165*9880d681SAndroid Build Coastguard Worker }
166*9880d681SAndroid Build Coastguard Worker
167*9880d681SAndroid Build Coastguard Worker /// Add MBB to the DirtySuccessors list if it hasn't already been added.
addDirtySuccessor(MachineBasicBlock & MBB)168*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
169*9880d681SAndroid Build Coastguard Worker if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
170*9880d681SAndroid Build Coastguard Worker DirtySuccessors.push_back(&MBB);
171*9880d681SAndroid Build Coastguard Worker BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
172*9880d681SAndroid Build Coastguard Worker }
173*9880d681SAndroid Build Coastguard Worker }
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Worker /// Loop over all of the instructions in the basic block, inserting vzeroupper
176*9880d681SAndroid Build Coastguard Worker /// instructions before function calls.
processBasicBlock(MachineBasicBlock & MBB)177*9880d681SAndroid Build Coastguard Worker void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
178*9880d681SAndroid Build Coastguard Worker
179*9880d681SAndroid Build Coastguard Worker // Start by assuming that the block is PASS_THROUGH which implies no unguarded
180*9880d681SAndroid Build Coastguard Worker // calls.
181*9880d681SAndroid Build Coastguard Worker BlockExitState CurState = PASS_THROUGH;
182*9880d681SAndroid Build Coastguard Worker BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
183*9880d681SAndroid Build Coastguard Worker
184*9880d681SAndroid Build Coastguard Worker for (MachineInstr &MI : MBB) {
185*9880d681SAndroid Build Coastguard Worker // No need for vzeroupper before iret in interrupt handler function,
186*9880d681SAndroid Build Coastguard Worker // epilogue will restore YMM registers if needed.
187*9880d681SAndroid Build Coastguard Worker bool IsReturnFromX86INTR = IsX86INTR && MI.isReturn();
188*9880d681SAndroid Build Coastguard Worker bool IsControlFlow = MI.isCall() || MI.isReturn();
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Worker // An existing VZERO* instruction resets the state.
191*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
192*9880d681SAndroid Build Coastguard Worker CurState = EXITS_CLEAN;
193*9880d681SAndroid Build Coastguard Worker continue;
194*9880d681SAndroid Build Coastguard Worker }
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker // Shortcut: don't need to check regular instructions in dirty state.
197*9880d681SAndroid Build Coastguard Worker if ((!IsControlFlow || IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
198*9880d681SAndroid Build Coastguard Worker continue;
199*9880d681SAndroid Build Coastguard Worker
200*9880d681SAndroid Build Coastguard Worker if (hasYmmReg(MI)) {
201*9880d681SAndroid Build Coastguard Worker // We found a ymm-using instruction; this could be an AVX instruction,
202*9880d681SAndroid Build Coastguard Worker // or it could be control flow.
203*9880d681SAndroid Build Coastguard Worker CurState = EXITS_DIRTY;
204*9880d681SAndroid Build Coastguard Worker continue;
205*9880d681SAndroid Build Coastguard Worker }
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Worker // Check for control-flow out of the current function (which might
208*9880d681SAndroid Build Coastguard Worker // indirectly execute SSE instructions).
209*9880d681SAndroid Build Coastguard Worker if (!IsControlFlow || IsReturnFromX86INTR)
210*9880d681SAndroid Build Coastguard Worker continue;
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Worker // If the call won't clobber any YMM register, skip it as well. It usually
213*9880d681SAndroid Build Coastguard Worker // happens on helper function calls (such as '_chkstk', '_ftol2') where
214*9880d681SAndroid Build Coastguard Worker // standard calling convention is not used (RegMask is not used to mark
215*9880d681SAndroid Build Coastguard Worker // register clobbered and register usage (def/imp-def/use) is well-defined
216*9880d681SAndroid Build Coastguard Worker // and explicitly specified.
217*9880d681SAndroid Build Coastguard Worker if (MI.isCall() && !callClobbersAnyYmmReg(MI))
218*9880d681SAndroid Build Coastguard Worker continue;
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Worker // The VZEROUPPER instruction resets the upper 128 bits of all AVX
221*9880d681SAndroid Build Coastguard Worker // registers. In addition, the processor changes back to Clean state, after
222*9880d681SAndroid Build Coastguard Worker // which execution of SSE instructions or AVX instructions has no transition
223*9880d681SAndroid Build Coastguard Worker // penalty. Add the VZEROUPPER instruction before any function call/return
224*9880d681SAndroid Build Coastguard Worker // that might execute SSE code.
225*9880d681SAndroid Build Coastguard Worker // FIXME: In some cases, we may want to move the VZEROUPPER into a
226*9880d681SAndroid Build Coastguard Worker // predecessor block.
227*9880d681SAndroid Build Coastguard Worker if (CurState == EXITS_DIRTY) {
228*9880d681SAndroid Build Coastguard Worker // After the inserted VZEROUPPER the state becomes clean again, but
229*9880d681SAndroid Build Coastguard Worker // other YMM may appear before other subsequent calls or even before
230*9880d681SAndroid Build Coastguard Worker // the end of the BB.
231*9880d681SAndroid Build Coastguard Worker insertVZeroUpper(MI, MBB);
232*9880d681SAndroid Build Coastguard Worker CurState = EXITS_CLEAN;
233*9880d681SAndroid Build Coastguard Worker } else if (CurState == PASS_THROUGH) {
234*9880d681SAndroid Build Coastguard Worker // If this block is currently in pass-through state and we encounter a
235*9880d681SAndroid Build Coastguard Worker // call then whether we need a vzeroupper or not depends on whether this
236*9880d681SAndroid Build Coastguard Worker // block has successors that exit dirty. Record the location of the call,
237*9880d681SAndroid Build Coastguard Worker // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
238*9880d681SAndroid Build Coastguard Worker // It will be inserted later if necessary.
239*9880d681SAndroid Build Coastguard Worker BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
240*9880d681SAndroid Build Coastguard Worker CurState = EXITS_CLEAN;
241*9880d681SAndroid Build Coastguard Worker }
242*9880d681SAndroid Build Coastguard Worker }
243*9880d681SAndroid Build Coastguard Worker
244*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
245*9880d681SAndroid Build Coastguard Worker << getBlockExitStateName(CurState) << '\n');
246*9880d681SAndroid Build Coastguard Worker
247*9880d681SAndroid Build Coastguard Worker if (CurState == EXITS_DIRTY)
248*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
249*9880d681SAndroid Build Coastguard Worker SE = MBB.succ_end();
250*9880d681SAndroid Build Coastguard Worker SI != SE; ++SI)
251*9880d681SAndroid Build Coastguard Worker addDirtySuccessor(**SI);
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Worker BlockStates[MBB.getNumber()].ExitState = CurState;
254*9880d681SAndroid Build Coastguard Worker }
255*9880d681SAndroid Build Coastguard Worker
256*9880d681SAndroid Build Coastguard Worker /// Loop over all of the basic blocks, inserting vzeroupper instructions before
257*9880d681SAndroid Build Coastguard Worker /// function calls.
runOnMachineFunction(MachineFunction & MF)258*9880d681SAndroid Build Coastguard Worker bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
259*9880d681SAndroid Build Coastguard Worker const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
260*9880d681SAndroid Build Coastguard Worker if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
261*9880d681SAndroid Build Coastguard Worker return false;
262*9880d681SAndroid Build Coastguard Worker TII = ST.getInstrInfo();
263*9880d681SAndroid Build Coastguard Worker MachineRegisterInfo &MRI = MF.getRegInfo();
264*9880d681SAndroid Build Coastguard Worker EverMadeChange = false;
265*9880d681SAndroid Build Coastguard Worker IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR;
266*9880d681SAndroid Build Coastguard Worker
267*9880d681SAndroid Build Coastguard Worker bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
268*9880d681SAndroid Build Coastguard Worker
269*9880d681SAndroid Build Coastguard Worker // Fast check: if the function doesn't use any ymm registers, we don't need
270*9880d681SAndroid Build Coastguard Worker // to insert any VZEROUPPER instructions. This is constant-time, so it is
271*9880d681SAndroid Build Coastguard Worker // cheap in the common case of no ymm use.
272*9880d681SAndroid Build Coastguard Worker bool YMMUsed = FnHasLiveInYmm;
273*9880d681SAndroid Build Coastguard Worker if (!YMMUsed) {
274*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *RC = &X86::VR256RegClass;
275*9880d681SAndroid Build Coastguard Worker for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
276*9880d681SAndroid Build Coastguard Worker i++) {
277*9880d681SAndroid Build Coastguard Worker if (!MRI.reg_nodbg_empty(*i)) {
278*9880d681SAndroid Build Coastguard Worker YMMUsed = true;
279*9880d681SAndroid Build Coastguard Worker break;
280*9880d681SAndroid Build Coastguard Worker }
281*9880d681SAndroid Build Coastguard Worker }
282*9880d681SAndroid Build Coastguard Worker }
283*9880d681SAndroid Build Coastguard Worker if (!YMMUsed) {
284*9880d681SAndroid Build Coastguard Worker return false;
285*9880d681SAndroid Build Coastguard Worker }
286*9880d681SAndroid Build Coastguard Worker
287*9880d681SAndroid Build Coastguard Worker assert(BlockStates.empty() && DirtySuccessors.empty() &&
288*9880d681SAndroid Build Coastguard Worker "X86VZeroUpper state should be clear");
289*9880d681SAndroid Build Coastguard Worker BlockStates.resize(MF.getNumBlockIDs());
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker // Process all blocks. This will compute block exit states, record the first
292*9880d681SAndroid Build Coastguard Worker // unguarded call in each block, and add successors of dirty blocks to the
293*9880d681SAndroid Build Coastguard Worker // DirtySuccessors list.
294*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock &MBB : MF)
295*9880d681SAndroid Build Coastguard Worker processBasicBlock(MBB);
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Worker // If any YMM regs are live-in to this function, add the entry block to the
298*9880d681SAndroid Build Coastguard Worker // DirtySuccessors list
299*9880d681SAndroid Build Coastguard Worker if (FnHasLiveInYmm)
300*9880d681SAndroid Build Coastguard Worker addDirtySuccessor(MF.front());
301*9880d681SAndroid Build Coastguard Worker
302*9880d681SAndroid Build Coastguard Worker // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
303*9880d681SAndroid Build Coastguard Worker // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
304*9880d681SAndroid Build Coastguard Worker // through PASS_THROUGH blocks.
305*9880d681SAndroid Build Coastguard Worker while (!DirtySuccessors.empty()) {
306*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *DirtySuccessors.back();
307*9880d681SAndroid Build Coastguard Worker DirtySuccessors.pop_back();
308*9880d681SAndroid Build Coastguard Worker BlockState &BBState = BlockStates[MBB.getNumber()];
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Worker // MBB is a successor of a dirty block, so its first call needs to be
311*9880d681SAndroid Build Coastguard Worker // guarded.
312*9880d681SAndroid Build Coastguard Worker if (BBState.FirstUnguardedCall != MBB.end())
313*9880d681SAndroid Build Coastguard Worker insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Worker // If this successor was a pass-through block, then it is now dirty. Its
316*9880d681SAndroid Build Coastguard Worker // successors need to be added to the worklist (if they haven't been
317*9880d681SAndroid Build Coastguard Worker // already).
318*9880d681SAndroid Build Coastguard Worker if (BBState.ExitState == PASS_THROUGH) {
319*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "MBB #" << MBB.getNumber()
320*9880d681SAndroid Build Coastguard Worker << " was Pass-through, is now Dirty-out.\n");
321*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock *Succ : MBB.successors())
322*9880d681SAndroid Build Coastguard Worker addDirtySuccessor(*Succ);
323*9880d681SAndroid Build Coastguard Worker }
324*9880d681SAndroid Build Coastguard Worker }
325*9880d681SAndroid Build Coastguard Worker
326*9880d681SAndroid Build Coastguard Worker BlockStates.clear();
327*9880d681SAndroid Build Coastguard Worker return EverMadeChange;
328*9880d681SAndroid Build Coastguard Worker }
329