1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "LiveDebugVariables.h"
18 #include "RegAllocBase.h"
19 #include "RegAllocEvictionAdvisor.h"
20 #include "RegAllocPriorityAdvisor.h"
21 #include "SpillPlacement.h"
22 #include "SplitKit.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/IndexedMap.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/CodeGen/CalcSpillWeights.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LiveInterval.h"
36 #include "llvm/CodeGen/LiveIntervalUnion.h"
37 #include "llvm/CodeGen/LiveIntervals.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveRegMatrix.h"
40 #include "llvm/CodeGen/LiveStacks.h"
41 #include "llvm/CodeGen/MachineBasicBlock.h"
42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineFrameInfo.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineLoopInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
51 #include "llvm/CodeGen/MachineRegisterInfo.h"
52 #include "llvm/CodeGen/RegAllocRegistry.h"
53 #include "llvm/CodeGen/RegisterClassInfo.h"
54 #include "llvm/CodeGen/SlotIndexes.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/DebugInfoMetadata.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/BlockFrequency.h"
67 #include "llvm/Support/BranchProbability.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/MathExtras.h"
71 #include "llvm/Support/Timer.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77
78 using namespace llvm;
79
80 #define DEBUG_TYPE "regalloc"
81
82 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83 STATISTIC(NumLocalSplits, "Number of split local live ranges");
84 STATISTIC(NumEvicted, "Number of interferences evicted");
85
86 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
87 "split-spill-mode", cl::Hidden,
88 cl::desc("Spill mode for splitting live ranges"),
89 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92 cl::init(SplitEditor::SM_Speed));
93
94 static cl::opt<unsigned>
95 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
96 cl::desc("Last chance recoloring max depth"),
97 cl::init(5));
98
99 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
100 "lcr-max-interf", cl::Hidden,
101 cl::desc("Last chance recoloring maximum number of considered"
102 " interference at a time"),
103 cl::init(8));
104
105 static cl::opt<bool> ExhaustiveSearch(
106 "exhaustive-register-search", cl::NotHidden,
107 cl::desc("Exhaustive Search for registers bypassing the depth "
108 "and interference cutoffs of last chance recoloring"),
109 cl::Hidden);
110
111 static cl::opt<bool> EnableDeferredSpilling(
112 "enable-deferred-spilling", cl::Hidden,
113 cl::desc("Instead of spilling a variable right away, defer the actual "
114 "code insertion to the end of the allocation. That way the "
115 "allocator might still find a suitable coloring for this "
116 "variable because of other evicted variables."),
117 cl::init(false));
118
119 // FIXME: Find a good default for this flag and remove the flag.
120 static cl::opt<unsigned>
121 CSRFirstTimeCost("regalloc-csr-first-time-cost",
122 cl::desc("Cost for first time use of callee-saved register."),
123 cl::init(0), cl::Hidden);
124
125 static cl::opt<unsigned long> GrowRegionComplexityBudget(
126 "grow-region-complexity-budget",
127 cl::desc("growRegion() does not scale with the number of BB edges, so "
128 "limit its budget and bail out once we reach the limit."),
129 cl::init(10000), cl::Hidden);
130
131 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
132 "greedy-regclass-priority-trumps-globalness",
133 cl::desc("Change the greedy register allocator's live range priority "
134 "calculation to make the AllocationPriority of the register class "
135 "more important then whether the range is global"),
136 cl::Hidden);
137
138 static cl::opt<bool> GreedyReverseLocalAssignment(
139 "greedy-reverse-local-assignment",
140 cl::desc("Reverse allocation order of local live ranges, such that "
141 "shorter local live ranges will tend to be allocated first"),
142 cl::Hidden);
143
144 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
145 createGreedyRegisterAllocator);
146
147 char RAGreedy::ID = 0;
148 char &llvm::RAGreedyID = RAGreedy::ID;
149
150 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
151 "Greedy Register Allocator", false, false)
152 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
153 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
154 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
155 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
156 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
157 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
158 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
159 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
160 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
161 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
162 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
163 INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
164 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
165 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
166 INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis)
167 INITIALIZE_PASS_END(RAGreedy, "greedy",
168 "Greedy Register Allocator", false, false)
169
170 #ifndef NDEBUG
171 const char *const RAGreedy::StageName[] = {
172 "RS_New",
173 "RS_Assign",
174 "RS_Split",
175 "RS_Split2",
176 "RS_Spill",
177 "RS_Memory",
178 "RS_Done"
179 };
180 #endif
181
182 // Hysteresis to use when comparing floats.
183 // This helps stabilize decisions based on float comparisons.
184 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
185
createGreedyRegisterAllocator()186 FunctionPass* llvm::createGreedyRegisterAllocator() {
187 return new RAGreedy();
188 }
189
createGreedyRegisterAllocator(RegClassFilterFunc Ftor)190 FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
191 return new RAGreedy(Ftor);
192 }
193
RAGreedy(RegClassFilterFunc F)194 RAGreedy::RAGreedy(RegClassFilterFunc F):
195 MachineFunctionPass(ID),
196 RegAllocBase(F) {
197 }
198
getAnalysisUsage(AnalysisUsage & AU) const199 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
200 AU.setPreservesCFG();
201 AU.addRequired<MachineBlockFrequencyInfo>();
202 AU.addPreserved<MachineBlockFrequencyInfo>();
203 AU.addRequired<LiveIntervals>();
204 AU.addPreserved<LiveIntervals>();
205 AU.addRequired<SlotIndexes>();
206 AU.addPreserved<SlotIndexes>();
207 AU.addRequired<LiveDebugVariables>();
208 AU.addPreserved<LiveDebugVariables>();
209 AU.addRequired<LiveStacks>();
210 AU.addPreserved<LiveStacks>();
211 AU.addRequired<MachineDominatorTree>();
212 AU.addPreserved<MachineDominatorTree>();
213 AU.addRequired<MachineLoopInfo>();
214 AU.addPreserved<MachineLoopInfo>();
215 AU.addRequired<VirtRegMap>();
216 AU.addPreserved<VirtRegMap>();
217 AU.addRequired<LiveRegMatrix>();
218 AU.addPreserved<LiveRegMatrix>();
219 AU.addRequired<EdgeBundles>();
220 AU.addRequired<SpillPlacement>();
221 AU.addRequired<MachineOptimizationRemarkEmitterPass>();
222 AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
223 AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
224 MachineFunctionPass::getAnalysisUsage(AU);
225 }
226
227 //===----------------------------------------------------------------------===//
228 // LiveRangeEdit delegate methods
229 //===----------------------------------------------------------------------===//
230
LRE_CanEraseVirtReg(Register VirtReg)231 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
232 LiveInterval &LI = LIS->getInterval(VirtReg);
233 if (VRM->hasPhys(VirtReg)) {
234 Matrix->unassign(LI);
235 aboutToRemoveInterval(LI);
236 return true;
237 }
238 // Unassigned virtreg is probably in the priority queue.
239 // RegAllocBase will erase it after dequeueing.
240 // Nonetheless, clear the live-range so that the debug
241 // dump will show the right state for that VirtReg.
242 LI.clear();
243 return false;
244 }
245
LRE_WillShrinkVirtReg(Register VirtReg)246 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
247 if (!VRM->hasPhys(VirtReg))
248 return;
249
250 // Register is assigned, put it back on the queue for reassignment.
251 LiveInterval &LI = LIS->getInterval(VirtReg);
252 Matrix->unassign(LI);
253 RegAllocBase::enqueue(&LI);
254 }
255
LRE_DidCloneVirtReg(Register New,Register Old)256 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
257 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
258 }
259
LRE_DidCloneVirtReg(Register New,Register Old)260 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
261 // Cloning a register we haven't even heard about yet? Just ignore it.
262 if (!Info.inBounds(Old))
263 return;
264
265 // LRE may clone a virtual register because dead code elimination causes it to
266 // be split into connected components. The new components are much smaller
267 // than the original, so they should get a new chance at being assigned.
268 // same stage as the parent.
269 Info[Old].Stage = RS_Assign;
270 Info.grow(New.id());
271 Info[New] = Info[Old];
272 }
273
releaseMemory()274 void RAGreedy::releaseMemory() {
275 SpillerInstance.reset();
276 GlobalCand.clear();
277 }
278
enqueueImpl(const LiveInterval * LI)279 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
280
enqueue(PQueue & CurQueue,const LiveInterval * LI)281 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
282 // Prioritize live ranges by size, assigning larger ranges first.
283 // The queue holds (size, reg) pairs.
284 const Register Reg = LI->reg();
285 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
286
287 auto Stage = ExtraInfo->getOrInitStage(Reg);
288 if (Stage == RS_New) {
289 Stage = RS_Assign;
290 ExtraInfo->setStage(Reg, Stage);
291 }
292
293 unsigned Ret = PriorityAdvisor->getPriority(*LI);
294
295 // The virtual register number is a tie breaker for same-sized ranges.
296 // Give lower vreg numbers higher priority to assign them first.
297 CurQueue.push(std::make_pair(Ret, ~Reg));
298 }
299
getPriority(const LiveInterval & LI) const300 unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
301 const unsigned Size = LI.getSize();
302 const Register Reg = LI.reg();
303 unsigned Prio;
304 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
305
306 if (Stage == RS_Split) {
307 // Unsplit ranges that couldn't be allocated immediately are deferred until
308 // everything else has been allocated.
309 Prio = Size;
310 } else if (Stage == RS_Memory) {
311 // Memory operand should be considered last.
312 // Change the priority such that Memory operand are assigned in
313 // the reverse order that they came in.
314 // TODO: Make this a member variable and probably do something about hints.
315 static unsigned MemOp = 0;
316 Prio = MemOp++;
317 } else {
318 // Giant live ranges fall back to the global assignment heuristic, which
319 // prevents excessive spilling in pathological cases.
320 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
321 bool ForceGlobal = RC.GlobalPriority ||
322 (!ReverseLocalAssignment &&
323 (Size / SlotIndex::InstrDist) >
324 (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
325 unsigned GlobalBit = 0;
326
327 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
328 LIS->intervalIsInOneMBB(LI)) {
329 // Allocate original local ranges in linear instruction order. Since they
330 // are singly defined, this produces optimal coloring in the absence of
331 // global interference and other constraints.
332 if (!ReverseLocalAssignment)
333 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
334 else {
335 // Allocating bottom up may allow many short LRGs to be assigned first
336 // to one of the cheap registers. This could be much faster for very
337 // large blocks on targets with many physical registers.
338 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
339 }
340 } else {
341 // Allocate global and split ranges in long->short order. Long ranges that
342 // don't fit should be spilled (or split) ASAP so they don't create
343 // interference. Mark a bit to prioritize global above local ranges.
344 Prio = Size;
345 GlobalBit = 1;
346 }
347
348 // Priority bit layout:
349 // 31 RS_Assign priority
350 // 30 Preference priority
351 // if (RegClassPriorityTrumpsGlobalness)
352 // 29-25 AllocPriority
353 // 24 GlobalBit
354 // else
355 // 29 Global bit
356 // 28-24 AllocPriority
357 // 0-23 Size/Instr distance
358
359 // Clamp the size to fit with the priority masking scheme
360 Prio = std::min(Prio, (unsigned)maxUIntN(24));
361 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
362
363 if (RegClassPriorityTrumpsGlobalness)
364 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
365 else
366 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
367
368 // Mark a higher bit to prioritize global and local above RS_Split.
369 Prio |= (1u << 31);
370
371 // Boost ranges that have a physical register hint.
372 if (VRM->hasKnownPreference(Reg))
373 Prio |= (1u << 30);
374 }
375
376 return Prio;
377 }
378
dequeue()379 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
380
dequeue(PQueue & CurQueue)381 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
382 if (CurQueue.empty())
383 return nullptr;
384 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
385 CurQueue.pop();
386 return LI;
387 }
388
389 //===----------------------------------------------------------------------===//
390 // Direct Assignment
391 //===----------------------------------------------------------------------===//
392
393 /// tryAssign - Try to assign VirtReg to an available register.
tryAssign(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)394 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
395 AllocationOrder &Order,
396 SmallVectorImpl<Register> &NewVRegs,
397 const SmallVirtRegSet &FixedRegisters) {
398 MCRegister PhysReg;
399 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
400 assert(*I);
401 if (!Matrix->checkInterference(VirtReg, *I)) {
402 if (I.isHint())
403 return *I;
404 else
405 PhysReg = *I;
406 }
407 }
408 if (!PhysReg.isValid())
409 return PhysReg;
410
411 // PhysReg is available, but there may be a better choice.
412
413 // If we missed a simple hint, try to cheaply evict interference from the
414 // preferred register.
415 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
416 if (Order.isHint(Hint)) {
417 MCRegister PhysHint = Hint.asMCReg();
418 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
419
420 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
421 FixedRegisters)) {
422 evictInterference(VirtReg, PhysHint, NewVRegs);
423 return PhysHint;
424 }
425 // Record the missed hint, we may be able to recover
426 // at the end if the surrounding allocation changed.
427 SetOfBrokenHints.insert(&VirtReg);
428 }
429
430 // Try to evict interference from a cheaper alternative.
431 uint8_t Cost = RegCosts[PhysReg];
432
433 // Most registers have 0 additional cost.
434 if (!Cost)
435 return PhysReg;
436
437 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
438 << (unsigned)Cost << '\n');
439 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
440 return CheapReg ? CheapReg : PhysReg;
441 }
442
443 //===----------------------------------------------------------------------===//
444 // Interference eviction
445 //===----------------------------------------------------------------------===//
446
canReassign(const LiveInterval & VirtReg,Register PrevReg) const447 Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
448 Register PrevReg) const {
449 auto Order =
450 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
451 MCRegister PhysReg;
452 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
453 if ((*I).id() == PrevReg.id())
454 continue;
455
456 MCRegUnitIterator Units(*I, TRI);
457 for (; Units.isValid(); ++Units) {
458 // Instantiate a "subquery", not to be confused with the Queries array.
459 LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
460 if (subQ.checkInterference())
461 break;
462 }
463 // If no units have interference, break out with the current PhysReg.
464 if (!Units.isValid())
465 PhysReg = *I;
466 }
467 if (PhysReg)
468 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
469 << printReg(PrevReg, TRI) << " to "
470 << printReg(PhysReg, TRI) << '\n');
471 return PhysReg;
472 }
473
474 /// evictInterference - Evict any interferring registers that prevent VirtReg
475 /// from being assigned to Physreg. This assumes that canEvictInterference
476 /// returned true.
evictInterference(const LiveInterval & VirtReg,MCRegister PhysReg,SmallVectorImpl<Register> & NewVRegs)477 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
478 MCRegister PhysReg,
479 SmallVectorImpl<Register> &NewVRegs) {
480 // Make sure that VirtReg has a cascade number, and assign that cascade
481 // number to every evicted register. These live ranges than then only be
482 // evicted by a newer cascade, preventing infinite loops.
483 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
484
485 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
486 << " interference: Cascade " << Cascade << '\n');
487
488 // Collect all interfering virtregs first.
489 SmallVector<const LiveInterval *, 8> Intfs;
490 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
491 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
492 // We usually have the interfering VRegs cached so collectInterferingVRegs()
493 // should be fast, we may need to recalculate if when different physregs
494 // overlap the same register unit so we had different SubRanges queried
495 // against it.
496 ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
497 Intfs.append(IVR.begin(), IVR.end());
498 }
499
500 // Evict them second. This will invalidate the queries.
501 for (const LiveInterval *Intf : Intfs) {
502 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
503 if (!VRM->hasPhys(Intf->reg()))
504 continue;
505
506 Matrix->unassign(*Intf);
507 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
508 VirtReg.isSpillable() < Intf->isSpillable()) &&
509 "Cannot decrease cascade number, illegal eviction");
510 ExtraInfo->setCascade(Intf->reg(), Cascade);
511 ++NumEvicted;
512 NewVRegs.push_back(Intf->reg());
513 }
514 }
515
516 /// Returns true if the given \p PhysReg is a callee saved register and has not
517 /// been used for allocation yet.
isUnusedCalleeSavedReg(MCRegister PhysReg) const518 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
519 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
520 if (!CSR)
521 return false;
522
523 return !Matrix->isPhysRegUsed(PhysReg);
524 }
525
526 std::optional<unsigned>
getOrderLimit(const LiveInterval & VirtReg,const AllocationOrder & Order,unsigned CostPerUseLimit) const527 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
528 const AllocationOrder &Order,
529 unsigned CostPerUseLimit) const {
530 unsigned OrderLimit = Order.getOrder().size();
531
532 if (CostPerUseLimit < uint8_t(~0u)) {
533 // Check of any registers in RC are below CostPerUseLimit.
534 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
535 uint8_t MinCost = RegClassInfo.getMinCost(RC);
536 if (MinCost >= CostPerUseLimit) {
537 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
538 << MinCost << ", no cheaper registers to be found.\n");
539 return std::nullopt;
540 }
541
542 // It is normal for register classes to have a long tail of registers with
543 // the same cost. We don't need to look at them if they're too expensive.
544 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
545 OrderLimit = RegClassInfo.getLastCostChange(RC);
546 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
547 << " regs.\n");
548 }
549 }
550 return OrderLimit;
551 }
552
canAllocatePhysReg(unsigned CostPerUseLimit,MCRegister PhysReg) const553 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
554 MCRegister PhysReg) const {
555 if (RegCosts[PhysReg] >= CostPerUseLimit)
556 return false;
557 // The first use of a callee-saved register in a function has cost 1.
558 // Don't start using a CSR when the CostPerUseLimit is low.
559 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
560 LLVM_DEBUG(
561 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
562 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
563 << '\n');
564 return false;
565 }
566 return true;
567 }
568
569 /// tryEvict - Try to evict all interferences for a physreg.
570 /// @param VirtReg Currently unassigned virtual register.
571 /// @param Order Physregs to try.
572 /// @return Physreg to assign VirtReg, or 0.
tryEvict(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,uint8_t CostPerUseLimit,const SmallVirtRegSet & FixedRegisters)573 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
574 AllocationOrder &Order,
575 SmallVectorImpl<Register> &NewVRegs,
576 uint8_t CostPerUseLimit,
577 const SmallVirtRegSet &FixedRegisters) {
578 NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
579 TimePassesIsEnabled);
580
581 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
582 VirtReg, Order, CostPerUseLimit, FixedRegisters);
583 if (BestPhys.isValid())
584 evictInterference(VirtReg, BestPhys, NewVRegs);
585 return BestPhys;
586 }
587
588 //===----------------------------------------------------------------------===//
589 // Region Splitting
590 //===----------------------------------------------------------------------===//
591
592 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
593 /// interference pattern in Physreg and its aliases. Add the constraints to
594 /// SpillPlacement and return the static cost of this split in Cost, assuming
595 /// that all preferences in SplitConstraints are met.
596 /// Return false if there are no bundles with positive bias.
addSplitConstraints(InterferenceCache::Cursor Intf,BlockFrequency & Cost)597 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
598 BlockFrequency &Cost) {
599 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
600
601 // Reset interference dependent info.
602 SplitConstraints.resize(UseBlocks.size());
603 BlockFrequency StaticCost = 0;
604 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
605 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
606 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
607
608 BC.Number = BI.MBB->getNumber();
609 Intf.moveToBlock(BC.Number);
610 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
611 BC.Exit = (BI.LiveOut &&
612 !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
613 ? SpillPlacement::PrefReg
614 : SpillPlacement::DontCare;
615 BC.ChangesValue = BI.FirstDef.isValid();
616
617 if (!Intf.hasInterference())
618 continue;
619
620 // Number of spill code instructions to insert.
621 unsigned Ins = 0;
622
623 // Interference for the live-in value.
624 if (BI.LiveIn) {
625 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
626 BC.Entry = SpillPlacement::MustSpill;
627 ++Ins;
628 } else if (Intf.first() < BI.FirstInstr) {
629 BC.Entry = SpillPlacement::PrefSpill;
630 ++Ins;
631 } else if (Intf.first() < BI.LastInstr) {
632 ++Ins;
633 }
634
635 // Abort if the spill cannot be inserted at the MBB' start
636 if (((BC.Entry == SpillPlacement::MustSpill) ||
637 (BC.Entry == SpillPlacement::PrefSpill)) &&
638 SlotIndex::isEarlierInstr(BI.FirstInstr,
639 SA->getFirstSplitPoint(BC.Number)))
640 return false;
641 }
642
643 // Interference for the live-out value.
644 if (BI.LiveOut) {
645 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
646 BC.Exit = SpillPlacement::MustSpill;
647 ++Ins;
648 } else if (Intf.last() > BI.LastInstr) {
649 BC.Exit = SpillPlacement::PrefSpill;
650 ++Ins;
651 } else if (Intf.last() > BI.FirstInstr) {
652 ++Ins;
653 }
654 }
655
656 // Accumulate the total frequency of inserted spill code.
657 while (Ins--)
658 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
659 }
660 Cost = StaticCost;
661
662 // Add constraints for use-blocks. Note that these are the only constraints
663 // that may add a positive bias, it is downhill from here.
664 SpillPlacer->addConstraints(SplitConstraints);
665 return SpillPlacer->scanActiveBundles();
666 }
667
668 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
669 /// live-through blocks in Blocks.
addThroughConstraints(InterferenceCache::Cursor Intf,ArrayRef<unsigned> Blocks)670 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
671 ArrayRef<unsigned> Blocks) {
672 const unsigned GroupSize = 8;
673 SpillPlacement::BlockConstraint BCS[GroupSize];
674 unsigned TBS[GroupSize];
675 unsigned B = 0, T = 0;
676
677 for (unsigned Number : Blocks) {
678 Intf.moveToBlock(Number);
679
680 if (!Intf.hasInterference()) {
681 assert(T < GroupSize && "Array overflow");
682 TBS[T] = Number;
683 if (++T == GroupSize) {
684 SpillPlacer->addLinks(ArrayRef(TBS, T));
685 T = 0;
686 }
687 continue;
688 }
689
690 assert(B < GroupSize && "Array overflow");
691 BCS[B].Number = Number;
692
693 // Abort if the spill cannot be inserted at the MBB' start
694 MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
695 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
696 if (FirstNonDebugInstr != MBB->end() &&
697 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
698 SA->getFirstSplitPoint(Number)))
699 return false;
700 // Interference for the live-in value.
701 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
702 BCS[B].Entry = SpillPlacement::MustSpill;
703 else
704 BCS[B].Entry = SpillPlacement::PrefSpill;
705
706 // Interference for the live-out value.
707 if (Intf.last() >= SA->getLastSplitPoint(Number))
708 BCS[B].Exit = SpillPlacement::MustSpill;
709 else
710 BCS[B].Exit = SpillPlacement::PrefSpill;
711
712 if (++B == GroupSize) {
713 SpillPlacer->addConstraints(ArrayRef(BCS, B));
714 B = 0;
715 }
716 }
717
718 SpillPlacer->addConstraints(ArrayRef(BCS, B));
719 SpillPlacer->addLinks(ArrayRef(TBS, T));
720 return true;
721 }
722
growRegion(GlobalSplitCandidate & Cand)723 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
724 // Keep track of through blocks that have not been added to SpillPlacer.
725 BitVector Todo = SA->getThroughBlocks();
726 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
727 unsigned AddedTo = 0;
728 #ifndef NDEBUG
729 unsigned Visited = 0;
730 #endif
731
732 unsigned long Budget = GrowRegionComplexityBudget;
733 while (true) {
734 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
735 // Find new through blocks in the periphery of PrefRegBundles.
736 for (unsigned Bundle : NewBundles) {
737 // Look at all blocks connected to Bundle in the full graph.
738 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
739 // Limit compilation time by bailing out after we use all our budget.
740 if (Blocks.size() >= Budget)
741 return false;
742 Budget -= Blocks.size();
743 for (unsigned Block : Blocks) {
744 if (!Todo.test(Block))
745 continue;
746 Todo.reset(Block);
747 // This is a new through block. Add it to SpillPlacer later.
748 ActiveBlocks.push_back(Block);
749 #ifndef NDEBUG
750 ++Visited;
751 #endif
752 }
753 }
754 // Any new blocks to add?
755 if (ActiveBlocks.size() == AddedTo)
756 break;
757
758 // Compute through constraints from the interference, or assume that all
759 // through blocks prefer spilling when forming compact regions.
760 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
761 if (Cand.PhysReg) {
762 if (!addThroughConstraints(Cand.Intf, NewBlocks))
763 return false;
764 } else
765 // Provide a strong negative bias on through blocks to prevent unwanted
766 // liveness on loop backedges.
767 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
768 AddedTo = ActiveBlocks.size();
769
770 // Perhaps iterating can enable more bundles?
771 SpillPlacer->iterate();
772 }
773 LLVM_DEBUG(dbgs() << ", v=" << Visited);
774 return true;
775 }
776
777 /// calcCompactRegion - Compute the set of edge bundles that should be live
778 /// when splitting the current live range into compact regions. Compact
779 /// regions can be computed without looking at interference. They are the
780 /// regions formed by removing all the live-through blocks from the live range.
781 ///
782 /// Returns false if the current live range is already compact, or if the
783 /// compact regions would form single block regions anyway.
calcCompactRegion(GlobalSplitCandidate & Cand)784 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
785 // Without any through blocks, the live range is already compact.
786 if (!SA->getNumThroughBlocks())
787 return false;
788
789 // Compact regions don't correspond to any physreg.
790 Cand.reset(IntfCache, MCRegister::NoRegister);
791
792 LLVM_DEBUG(dbgs() << "Compact region bundles");
793
794 // Use the spill placer to determine the live bundles. GrowRegion pretends
795 // that all the through blocks have interference when PhysReg is unset.
796 SpillPlacer->prepare(Cand.LiveBundles);
797
798 // The static split cost will be zero since Cand.Intf reports no interference.
799 BlockFrequency Cost;
800 if (!addSplitConstraints(Cand.Intf, Cost)) {
801 LLVM_DEBUG(dbgs() << ", none.\n");
802 return false;
803 }
804
805 if (!growRegion(Cand)) {
806 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
807 return false;
808 }
809
810 SpillPlacer->finish();
811
812 if (!Cand.LiveBundles.any()) {
813 LLVM_DEBUG(dbgs() << ", none.\n");
814 return false;
815 }
816
817 LLVM_DEBUG({
818 for (int I : Cand.LiveBundles.set_bits())
819 dbgs() << " EB#" << I;
820 dbgs() << ".\n";
821 });
822 return true;
823 }
824
825 /// calcSpillCost - Compute how expensive it would be to split the live range in
826 /// SA around all use blocks instead of forming bundle regions.
calcSpillCost()827 BlockFrequency RAGreedy::calcSpillCost() {
828 BlockFrequency Cost = 0;
829 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
830 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
831 unsigned Number = BI.MBB->getNumber();
832 // We normally only need one spill instruction - a load or a store.
833 Cost += SpillPlacer->getBlockFrequency(Number);
834
835 // Unless the value is redefined in the block.
836 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
837 Cost += SpillPlacer->getBlockFrequency(Number);
838 }
839 return Cost;
840 }
841
842 /// calcGlobalSplitCost - Return the global split cost of following the split
843 /// pattern in LiveBundles. This cost should be added to the local cost of the
844 /// interference pattern in SplitConstraints.
845 ///
calcGlobalSplitCost(GlobalSplitCandidate & Cand,const AllocationOrder & Order)846 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
847 const AllocationOrder &Order) {
848 BlockFrequency GlobalCost = 0;
849 const BitVector &LiveBundles = Cand.LiveBundles;
850 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
851 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
852 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
853 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
854 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
855 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
856 unsigned Ins = 0;
857
858 Cand.Intf.moveToBlock(BC.Number);
859
860 if (BI.LiveIn)
861 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
862 if (BI.LiveOut)
863 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
864 while (Ins--)
865 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
866 }
867
868 for (unsigned Number : Cand.ActiveBlocks) {
869 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
870 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
871 if (!RegIn && !RegOut)
872 continue;
873 if (RegIn && RegOut) {
874 // We need double spill code if this block has interference.
875 Cand.Intf.moveToBlock(Number);
876 if (Cand.Intf.hasInterference()) {
877 GlobalCost += SpillPlacer->getBlockFrequency(Number);
878 GlobalCost += SpillPlacer->getBlockFrequency(Number);
879 }
880 continue;
881 }
882 // live-in / stack-out or stack-in live-out.
883 GlobalCost += SpillPlacer->getBlockFrequency(Number);
884 }
885 return GlobalCost;
886 }
887
888 /// splitAroundRegion - Split the current live range around the regions
889 /// determined by BundleCand and GlobalCand.
890 ///
891 /// Before calling this function, GlobalCand and BundleCand must be initialized
892 /// so each bundle is assigned to a valid candidate, or NoCand for the
893 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
894 /// objects must be initialized for the current live range, and intervals
895 /// created for the used candidates.
896 ///
897 /// @param LREdit The LiveRangeEdit object handling the current split.
898 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
899 /// must appear in this list.
splitAroundRegion(LiveRangeEdit & LREdit,ArrayRef<unsigned> UsedCands)900 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
901 ArrayRef<unsigned> UsedCands) {
902 // These are the intervals created for new global ranges. We may create more
903 // intervals for local ranges.
904 const unsigned NumGlobalIntvs = LREdit.size();
905 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
906 << " globals.\n");
907 assert(NumGlobalIntvs && "No global intervals configured");
908
909 // Isolate even single instructions when dealing with a proper sub-class.
910 // That guarantees register class inflation for the stack interval because it
911 // is all copies.
912 Register Reg = SA->getParent().reg();
913 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
914
915 // First handle all the blocks with uses.
916 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
917 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
918 unsigned Number = BI.MBB->getNumber();
919 unsigned IntvIn = 0, IntvOut = 0;
920 SlotIndex IntfIn, IntfOut;
921 if (BI.LiveIn) {
922 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
923 if (CandIn != NoCand) {
924 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
925 IntvIn = Cand.IntvIdx;
926 Cand.Intf.moveToBlock(Number);
927 IntfIn = Cand.Intf.first();
928 }
929 }
930 if (BI.LiveOut) {
931 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
932 if (CandOut != NoCand) {
933 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
934 IntvOut = Cand.IntvIdx;
935 Cand.Intf.moveToBlock(Number);
936 IntfOut = Cand.Intf.last();
937 }
938 }
939
940 // Create separate intervals for isolated blocks with multiple uses.
941 if (!IntvIn && !IntvOut) {
942 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
943 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
944 SE->splitSingleBlock(BI);
945 continue;
946 }
947
948 if (IntvIn && IntvOut)
949 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
950 else if (IntvIn)
951 SE->splitRegInBlock(BI, IntvIn, IntfIn);
952 else
953 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
954 }
955
956 // Handle live-through blocks. The relevant live-through blocks are stored in
957 // the ActiveBlocks list with each candidate. We need to filter out
958 // duplicates.
959 BitVector Todo = SA->getThroughBlocks();
960 for (unsigned UsedCand : UsedCands) {
961 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
962 for (unsigned Number : Blocks) {
963 if (!Todo.test(Number))
964 continue;
965 Todo.reset(Number);
966
967 unsigned IntvIn = 0, IntvOut = 0;
968 SlotIndex IntfIn, IntfOut;
969
970 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
971 if (CandIn != NoCand) {
972 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
973 IntvIn = Cand.IntvIdx;
974 Cand.Intf.moveToBlock(Number);
975 IntfIn = Cand.Intf.first();
976 }
977
978 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
979 if (CandOut != NoCand) {
980 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
981 IntvOut = Cand.IntvIdx;
982 Cand.Intf.moveToBlock(Number);
983 IntfOut = Cand.Intf.last();
984 }
985 if (!IntvIn && !IntvOut)
986 continue;
987 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
988 }
989 }
990
991 ++NumGlobalSplits;
992
993 SmallVector<unsigned, 8> IntvMap;
994 SE->finish(&IntvMap);
995 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
996
997 unsigned OrigBlocks = SA->getNumLiveBlocks();
998
999 // Sort out the new intervals created by splitting. We get four kinds:
1000 // - Remainder intervals should not be split again.
1001 // - Candidate intervals can be assigned to Cand.PhysReg.
1002 // - Block-local splits are candidates for local splitting.
1003 // - DCE leftovers should go back on the queue.
1004 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1005 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1006
1007 // Ignore old intervals from DCE.
1008 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1009 continue;
1010
1011 // Remainder interval. Don't try splitting again, spill if it doesn't
1012 // allocate.
1013 if (IntvMap[I] == 0) {
1014 ExtraInfo->setStage(Reg, RS_Spill);
1015 continue;
1016 }
1017
1018 // Global intervals. Allow repeated splitting as long as the number of live
1019 // blocks is strictly decreasing.
1020 if (IntvMap[I] < NumGlobalIntvs) {
1021 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1022 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1023 << " blocks as original.\n");
1024 // Don't allow repeated splitting as a safe guard against looping.
1025 ExtraInfo->setStage(Reg, RS_Split2);
1026 }
1027 continue;
1028 }
1029
1030 // Other intervals are treated as new. This includes local intervals created
1031 // for blocks with multiple uses, and anything created by DCE.
1032 }
1033
1034 if (VerifyEnabled)
1035 MF->verify(this, "After splitting live range around region");
1036 }
1037
tryRegionSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1038 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1039 AllocationOrder &Order,
1040 SmallVectorImpl<Register> &NewVRegs) {
1041 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1042 return MCRegister::NoRegister;
1043 unsigned NumCands = 0;
1044 BlockFrequency SpillCost = calcSpillCost();
1045 BlockFrequency BestCost;
1046
1047 // Check if we can split this live range around a compact region.
1048 bool HasCompact = calcCompactRegion(GlobalCand.front());
1049 if (HasCompact) {
1050 // Yes, keep GlobalCand[0] as the compact region candidate.
1051 NumCands = 1;
1052 BestCost = BlockFrequency::getMaxFrequency();
1053 } else {
1054 // No benefit from the compact region, our fallback will be per-block
1055 // splitting. Make sure we find a solution that is cheaper than spilling.
1056 BestCost = SpillCost;
1057 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1058 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1059 }
1060
1061 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1062 NumCands, false /*IgnoreCSR*/);
1063
1064 // No solutions found, fall back to single block splitting.
1065 if (!HasCompact && BestCand == NoCand)
1066 return MCRegister::NoRegister;
1067
1068 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1069 }
1070
calculateRegionSplitCost(const LiveInterval & VirtReg,AllocationOrder & Order,BlockFrequency & BestCost,unsigned & NumCands,bool IgnoreCSR)1071 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1072 AllocationOrder &Order,
1073 BlockFrequency &BestCost,
1074 unsigned &NumCands,
1075 bool IgnoreCSR) {
1076 unsigned BestCand = NoCand;
1077 for (MCPhysReg PhysReg : Order) {
1078 assert(PhysReg);
1079 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1080 continue;
1081
1082 // Discard bad candidates before we run out of interference cache cursors.
1083 // This will only affect register classes with a lot of registers (>32).
1084 if (NumCands == IntfCache.getMaxCursors()) {
1085 unsigned WorstCount = ~0u;
1086 unsigned Worst = 0;
1087 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1088 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1089 continue;
1090 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1091 if (Count < WorstCount) {
1092 Worst = CandIndex;
1093 WorstCount = Count;
1094 }
1095 }
1096 --NumCands;
1097 GlobalCand[Worst] = GlobalCand[NumCands];
1098 if (BestCand == NumCands)
1099 BestCand = Worst;
1100 }
1101
1102 if (GlobalCand.size() <= NumCands)
1103 GlobalCand.resize(NumCands+1);
1104 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1105 Cand.reset(IntfCache, PhysReg);
1106
1107 SpillPlacer->prepare(Cand.LiveBundles);
1108 BlockFrequency Cost;
1109 if (!addSplitConstraints(Cand.Intf, Cost)) {
1110 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1111 continue;
1112 }
1113 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1114 MBFI->printBlockFreq(dbgs(), Cost));
1115 if (Cost >= BestCost) {
1116 LLVM_DEBUG({
1117 if (BestCand == NoCand)
1118 dbgs() << " worse than no bundles\n";
1119 else
1120 dbgs() << " worse than "
1121 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1122 });
1123 continue;
1124 }
1125 if (!growRegion(Cand)) {
1126 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1127 continue;
1128 }
1129
1130 SpillPlacer->finish();
1131
1132 // No live bundles, defer to splitSingleBlocks().
1133 if (!Cand.LiveBundles.any()) {
1134 LLVM_DEBUG(dbgs() << " no bundles.\n");
1135 continue;
1136 }
1137
1138 Cost += calcGlobalSplitCost(Cand, Order);
1139 LLVM_DEBUG({
1140 dbgs() << ", total = ";
1141 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1142 for (int I : Cand.LiveBundles.set_bits())
1143 dbgs() << " EB#" << I;
1144 dbgs() << ".\n";
1145 });
1146 if (Cost < BestCost) {
1147 BestCand = NumCands;
1148 BestCost = Cost;
1149 }
1150 ++NumCands;
1151 }
1152
1153 return BestCand;
1154 }
1155
doRegionSplit(const LiveInterval & VirtReg,unsigned BestCand,bool HasCompact,SmallVectorImpl<Register> & NewVRegs)1156 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1157 bool HasCompact,
1158 SmallVectorImpl<Register> &NewVRegs) {
1159 SmallVector<unsigned, 8> UsedCands;
1160 // Prepare split editor.
1161 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1162 SE->reset(LREdit, SplitSpillMode);
1163
1164 // Assign all edge bundles to the preferred candidate, or NoCand.
1165 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1166
1167 // Assign bundles for the best candidate region.
1168 if (BestCand != NoCand) {
1169 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1170 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1171 UsedCands.push_back(BestCand);
1172 Cand.IntvIdx = SE->openIntv();
1173 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1174 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1175 (void)B;
1176 }
1177 }
1178
1179 // Assign bundles for the compact region.
1180 if (HasCompact) {
1181 GlobalSplitCandidate &Cand = GlobalCand.front();
1182 assert(!Cand.PhysReg && "Compact region has no physreg");
1183 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1184 UsedCands.push_back(0);
1185 Cand.IntvIdx = SE->openIntv();
1186 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1187 << " bundles, intv " << Cand.IntvIdx << ".\n");
1188 (void)B;
1189 }
1190 }
1191
1192 splitAroundRegion(LREdit, UsedCands);
1193 return 0;
1194 }
1195
1196 //===----------------------------------------------------------------------===//
1197 // Per-Block Splitting
1198 //===----------------------------------------------------------------------===//
1199
1200 /// tryBlockSplit - Split a global live range around every block with uses. This
1201 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1202 /// they don't allocate.
tryBlockSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1203 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1204 AllocationOrder &Order,
1205 SmallVectorImpl<Register> &NewVRegs) {
1206 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1207 Register Reg = VirtReg.reg();
1208 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1209 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1210 SE->reset(LREdit, SplitSpillMode);
1211 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1212 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1213 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1214 SE->splitSingleBlock(BI);
1215 }
1216 // No blocks were split.
1217 if (LREdit.empty())
1218 return 0;
1219
1220 // We did split for some blocks.
1221 SmallVector<unsigned, 8> IntvMap;
1222 SE->finish(&IntvMap);
1223
1224 // Tell LiveDebugVariables about the new ranges.
1225 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1226
1227 // Sort out the new intervals created by splitting. The remainder interval
1228 // goes straight to spilling, the new local ranges get to stay RS_New.
1229 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1230 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1231 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1232 ExtraInfo->setStage(LI, RS_Spill);
1233 }
1234
1235 if (VerifyEnabled)
1236 MF->verify(this, "After splitting live range around basic blocks");
1237 return 0;
1238 }
1239
1240 //===----------------------------------------------------------------------===//
1241 // Per-Instruction Splitting
1242 //===----------------------------------------------------------------------===//
1243
1244 /// Get the number of allocatable registers that match the constraints of \p Reg
1245 /// on \p MI and that are also in \p SuperRC.
getNumAllocatableRegsForConstraints(const MachineInstr * MI,Register Reg,const TargetRegisterClass * SuperRC,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI,const RegisterClassInfo & RCI)1246 static unsigned getNumAllocatableRegsForConstraints(
1247 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1248 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1249 const RegisterClassInfo &RCI) {
1250 assert(SuperRC && "Invalid register class");
1251
1252 const TargetRegisterClass *ConstrainedRC =
1253 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1254 /* ExploreBundle */ true);
1255 if (!ConstrainedRC)
1256 return 0;
1257 return RCI.getNumAllocatableRegs(ConstrainedRC);
1258 }
1259
getInstReadLaneMask(const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const MachineInstr & MI,Register Reg)1260 static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
1261 const TargetRegisterInfo &TRI,
1262 const MachineInstr &MI, Register Reg) {
1263 LaneBitmask Mask;
1264 for (const MachineOperand &MO : MI.operands()) {
1265 if (!MO.isReg() || MO.getReg() != Reg)
1266 continue;
1267
1268 unsigned SubReg = MO.getSubReg();
1269 if (SubReg == 0 && MO.isUse()) {
1270 Mask |= MRI.getMaxLaneMaskForVReg(Reg);
1271 continue;
1272 }
1273
1274 LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1275 if (MO.isDef()) {
1276 if (!MO.isUndef())
1277 Mask |= ~SubRegMask;
1278 } else
1279 Mask |= SubRegMask;
1280 }
1281
1282 return Mask;
1283 }
1284
1285 /// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1286 /// VirtReg.
readsLaneSubset(const MachineRegisterInfo & MRI,const MachineInstr * MI,const LiveInterval & VirtReg,const TargetRegisterInfo * TRI,SlotIndex Use)1287 static bool readsLaneSubset(const MachineRegisterInfo &MRI,
1288 const MachineInstr *MI, const LiveInterval &VirtReg,
1289 const TargetRegisterInfo *TRI, SlotIndex Use) {
1290 // Early check the common case.
1291 if (MI->isCopy() &&
1292 MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
1293 return false;
1294
1295 // FIXME: We're only considering uses, but should be consider defs too?
1296 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1297
1298 LaneBitmask LiveAtMask;
1299 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1300 if (S.liveAt(Use))
1301 LiveAtMask |= S.LaneMask;
1302 }
1303
1304 // If the live lanes aren't different from the lanes used by the instruction,
1305 // this doesn't help.
1306 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1307 }
1308
1309 /// tryInstructionSplit - Split a live range around individual instructions.
1310 /// This is normally not worthwhile since the spiller is doing essentially the
1311 /// same thing. However, when the live range is in a constrained register
1312 /// class, it may help to insert copies such that parts of the live range can
1313 /// be moved to a larger register class.
1314 ///
1315 /// This is similar to spilling to a larger register class.
tryInstructionSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1316 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1317 AllocationOrder &Order,
1318 SmallVectorImpl<Register> &NewVRegs) {
1319 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1320 // There is no point to this if there are no larger sub-classes.
1321
1322 bool SplitSubClass = true;
1323 if (!RegClassInfo.isProperSubClass(CurRC)) {
1324 if (!VirtReg.hasSubRanges())
1325 return 0;
1326 SplitSubClass = false;
1327 }
1328
1329 // Always enable split spill mode, since we're effectively spilling to a
1330 // register.
1331 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1332 SE->reset(LREdit, SplitEditor::SM_Size);
1333
1334 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1335 if (Uses.size() <= 1)
1336 return 0;
1337
1338 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1339 << " individual instrs.\n");
1340
1341 const TargetRegisterClass *SuperRC =
1342 TRI->getLargestLegalSuperClass(CurRC, *MF);
1343 unsigned SuperRCNumAllocatableRegs =
1344 RegClassInfo.getNumAllocatableRegs(SuperRC);
1345 // Split around every non-copy instruction if this split will relax
1346 // the constraints on the virtual register.
1347 // Otherwise, splitting just inserts uncoalescable copies that do not help
1348 // the allocation.
1349 for (const SlotIndex Use : Uses) {
1350 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1351 if (MI->isFullCopy() ||
1352 (SplitSubClass &&
1353 SuperRCNumAllocatableRegs ==
1354 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1355 TII, TRI, RegClassInfo)) ||
1356 // TODO: Handle split for subranges with subclass constraints?
1357 (!SplitSubClass && VirtReg.hasSubRanges() &&
1358 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
1359 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1360 continue;
1361 }
1362 }
1363 SE->openIntv();
1364 SlotIndex SegStart = SE->enterIntvBefore(Use);
1365 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1366 SE->useIntv(SegStart, SegStop);
1367 }
1368
1369 if (LREdit.empty()) {
1370 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1371 return 0;
1372 }
1373
1374 SmallVector<unsigned, 8> IntvMap;
1375 SE->finish(&IntvMap);
1376 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1377 // Assign all new registers to RS_Spill. This was the last chance.
1378 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1379 return 0;
1380 }
1381
1382 //===----------------------------------------------------------------------===//
1383 // Local Splitting
1384 //===----------------------------------------------------------------------===//
1385
1386 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1387 /// in order to use PhysReg between two entries in SA->UseSlots.
1388 ///
1389 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1390 ///
calcGapWeights(MCRegister PhysReg,SmallVectorImpl<float> & GapWeight)1391 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1392 SmallVectorImpl<float> &GapWeight) {
1393 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1394 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1395 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1396 const unsigned NumGaps = Uses.size()-1;
1397
1398 // Start and end points for the interference check.
1399 SlotIndex StartIdx =
1400 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1401 SlotIndex StopIdx =
1402 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1403
1404 GapWeight.assign(NumGaps, 0.0f);
1405
1406 // Add interference from each overlapping register.
1407 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1408 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1409 .checkInterference())
1410 continue;
1411
1412 // We know that VirtReg is a continuous interval from FirstInstr to
1413 // LastInstr, so we don't need InterferenceQuery.
1414 //
1415 // Interference that overlaps an instruction is counted in both gaps
1416 // surrounding the instruction. The exception is interference before
1417 // StartIdx and after StopIdx.
1418 //
1419 LiveIntervalUnion::SegmentIter IntI =
1420 Matrix->getLiveUnions()[*Units] .find(StartIdx);
1421 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1422 // Skip the gaps before IntI.
1423 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1424 if (++Gap == NumGaps)
1425 break;
1426 if (Gap == NumGaps)
1427 break;
1428
1429 // Update the gaps covered by IntI.
1430 const float weight = IntI.value()->weight();
1431 for (; Gap != NumGaps; ++Gap) {
1432 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1433 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1434 break;
1435 }
1436 if (Gap == NumGaps)
1437 break;
1438 }
1439 }
1440
1441 // Add fixed interference.
1442 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1443 const LiveRange &LR = LIS->getRegUnit(*Units);
1444 LiveRange::const_iterator I = LR.find(StartIdx);
1445 LiveRange::const_iterator E = LR.end();
1446
1447 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1448 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1449 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1450 if (++Gap == NumGaps)
1451 break;
1452 if (Gap == NumGaps)
1453 break;
1454
1455 for (; Gap != NumGaps; ++Gap) {
1456 GapWeight[Gap] = huge_valf;
1457 if (Uses[Gap+1].getBaseIndex() >= I->end)
1458 break;
1459 }
1460 if (Gap == NumGaps)
1461 break;
1462 }
1463 }
1464 }
1465
1466 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1467 /// basic block.
1468 ///
tryLocalSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1469 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1470 AllocationOrder &Order,
1471 SmallVectorImpl<Register> &NewVRegs) {
1472 // TODO: the function currently only handles a single UseBlock; it should be
1473 // possible to generalize.
1474 if (SA->getUseBlocks().size() != 1)
1475 return 0;
1476
1477 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1478
1479 // Note that it is possible to have an interval that is live-in or live-out
1480 // while only covering a single block - A phi-def can use undef values from
1481 // predecessors, and the block could be a single-block loop.
1482 // We don't bother doing anything clever about such a case, we simply assume
1483 // that the interval is continuous from FirstInstr to LastInstr. We should
1484 // make sure that we don't do anything illegal to such an interval, though.
1485
1486 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1487 if (Uses.size() <= 2)
1488 return 0;
1489 const unsigned NumGaps = Uses.size()-1;
1490
1491 LLVM_DEBUG({
1492 dbgs() << "tryLocalSplit: ";
1493 for (const auto &Use : Uses)
1494 dbgs() << ' ' << Use;
1495 dbgs() << '\n';
1496 });
1497
1498 // If VirtReg is live across any register mask operands, compute a list of
1499 // gaps with register masks.
1500 SmallVector<unsigned, 8> RegMaskGaps;
1501 if (Matrix->checkRegMaskInterference(VirtReg)) {
1502 // Get regmask slots for the whole block.
1503 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1504 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1505 // Constrain to VirtReg's live range.
1506 unsigned RI =
1507 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1508 unsigned RE = RMS.size();
1509 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1510 // Look for Uses[I] <= RMS <= Uses[I + 1].
1511 assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1512 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1513 continue;
1514 // Skip a regmask on the same instruction as the last use. It doesn't
1515 // overlap the live range.
1516 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1517 break;
1518 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1519 << Uses[I + 1]);
1520 RegMaskGaps.push_back(I);
1521 // Advance ri to the next gap. A regmask on one of the uses counts in
1522 // both gaps.
1523 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1524 ++RI;
1525 }
1526 LLVM_DEBUG(dbgs() << '\n');
1527 }
1528
1529 // Since we allow local split results to be split again, there is a risk of
1530 // creating infinite loops. It is tempting to require that the new live
1531 // ranges have less instructions than the original. That would guarantee
1532 // convergence, but it is too strict. A live range with 3 instructions can be
1533 // split 2+3 (including the COPY), and we want to allow that.
1534 //
1535 // Instead we use these rules:
1536 //
1537 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1538 // noop split, of course).
1539 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1540 // the new ranges must have fewer instructions than before the split.
1541 // 3. New ranges with the same number of instructions are marked RS_Split2,
1542 // smaller ranges are marked RS_New.
1543 //
1544 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1545 // excessive splitting and infinite loops.
1546 //
1547 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1548
1549 // Best split candidate.
1550 unsigned BestBefore = NumGaps;
1551 unsigned BestAfter = 0;
1552 float BestDiff = 0;
1553
1554 const float blockFreq =
1555 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1556 (1.0f / MBFI->getEntryFreq());
1557 SmallVector<float, 8> GapWeight;
1558
1559 for (MCPhysReg PhysReg : Order) {
1560 assert(PhysReg);
1561 // Keep track of the largest spill weight that would need to be evicted in
1562 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1563 calcGapWeights(PhysReg, GapWeight);
1564
1565 // Remove any gaps with regmask clobbers.
1566 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1567 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1568 GapWeight[RegMaskGaps[I]] = huge_valf;
1569
1570 // Try to find the best sequence of gaps to close.
1571 // The new spill weight must be larger than any gap interference.
1572
1573 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1574 unsigned SplitBefore = 0, SplitAfter = 1;
1575
1576 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1577 // It is the spill weight that needs to be evicted.
1578 float MaxGap = GapWeight[0];
1579
1580 while (true) {
1581 // Live before/after split?
1582 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1583 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1584
1585 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1586 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1587
1588 // Stop before the interval gets so big we wouldn't be making progress.
1589 if (!LiveBefore && !LiveAfter) {
1590 LLVM_DEBUG(dbgs() << " all\n");
1591 break;
1592 }
1593 // Should the interval be extended or shrunk?
1594 bool Shrink = true;
1595
1596 // How many gaps would the new range have?
1597 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1598
1599 // Legally, without causing looping?
1600 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1601
1602 if (Legal && MaxGap < huge_valf) {
1603 // Estimate the new spill weight. Each instruction reads or writes the
1604 // register. Conservatively assume there are no read-modify-write
1605 // instructions.
1606 //
1607 // Try to guess the size of the new interval.
1608 const float EstWeight = normalizeSpillWeight(
1609 blockFreq * (NewGaps + 1),
1610 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1611 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1612 1);
1613 // Would this split be possible to allocate?
1614 // Never allocate all gaps, we wouldn't be making progress.
1615 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1616 if (EstWeight * Hysteresis >= MaxGap) {
1617 Shrink = false;
1618 float Diff = EstWeight - MaxGap;
1619 if (Diff > BestDiff) {
1620 LLVM_DEBUG(dbgs() << " (best)");
1621 BestDiff = Hysteresis * Diff;
1622 BestBefore = SplitBefore;
1623 BestAfter = SplitAfter;
1624 }
1625 }
1626 }
1627
1628 // Try to shrink.
1629 if (Shrink) {
1630 if (++SplitBefore < SplitAfter) {
1631 LLVM_DEBUG(dbgs() << " shrink\n");
1632 // Recompute the max when necessary.
1633 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1634 MaxGap = GapWeight[SplitBefore];
1635 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1636 MaxGap = std::max(MaxGap, GapWeight[I]);
1637 }
1638 continue;
1639 }
1640 MaxGap = 0;
1641 }
1642
1643 // Try to extend the interval.
1644 if (SplitAfter >= NumGaps) {
1645 LLVM_DEBUG(dbgs() << " end\n");
1646 break;
1647 }
1648
1649 LLVM_DEBUG(dbgs() << " extend\n");
1650 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1651 }
1652 }
1653
1654 // Didn't find any candidates?
1655 if (BestBefore == NumGaps)
1656 return 0;
1657
1658 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1659 << Uses[BestAfter] << ", " << BestDiff << ", "
1660 << (BestAfter - BestBefore + 1) << " instrs\n");
1661
1662 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1663 SE->reset(LREdit);
1664
1665 SE->openIntv();
1666 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1667 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1668 SE->useIntv(SegStart, SegStop);
1669 SmallVector<unsigned, 8> IntvMap;
1670 SE->finish(&IntvMap);
1671 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1672 // If the new range has the same number of instructions as before, mark it as
1673 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1674 // leave the new intervals as RS_New so they can compete.
1675 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1676 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1677 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1678 if (NewGaps >= NumGaps) {
1679 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1680 assert(!ProgressRequired && "Didn't make progress when it was required.");
1681 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1682 if (IntvMap[I] == 1) {
1683 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1684 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1685 }
1686 LLVM_DEBUG(dbgs() << '\n');
1687 }
1688 ++NumLocalSplits;
1689
1690 return 0;
1691 }
1692
1693 //===----------------------------------------------------------------------===//
1694 // Live Range Splitting
1695 //===----------------------------------------------------------------------===//
1696
1697 /// trySplit - Try to split VirtReg or one of its interferences, making it
1698 /// assignable.
1699 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
trySplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)1700 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1701 SmallVectorImpl<Register> &NewVRegs,
1702 const SmallVirtRegSet &FixedRegisters) {
1703 // Ranges must be Split2 or less.
1704 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1705 return 0;
1706
1707 // Local intervals are handled separately.
1708 if (LIS->intervalIsInOneMBB(VirtReg)) {
1709 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1710 TimerGroupDescription, TimePassesIsEnabled);
1711 SA->analyze(&VirtReg);
1712 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1713 if (PhysReg || !NewVRegs.empty())
1714 return PhysReg;
1715 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1716 }
1717
1718 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1719 TimerGroupDescription, TimePassesIsEnabled);
1720
1721 SA->analyze(&VirtReg);
1722
1723 // First try to split around a region spanning multiple blocks. RS_Split2
1724 // ranges already made dubious progress with region splitting, so they go
1725 // straight to single block splitting.
1726 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1727 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1728 if (PhysReg || !NewVRegs.empty())
1729 return PhysReg;
1730 }
1731
1732 // Then isolate blocks.
1733 return tryBlockSplit(VirtReg, Order, NewVRegs);
1734 }
1735
1736 //===----------------------------------------------------------------------===//
1737 // Last Chance Recoloring
1738 //===----------------------------------------------------------------------===//
1739
1740 /// Return true if \p reg has any tied def operand.
hasTiedDef(MachineRegisterInfo * MRI,unsigned reg)1741 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1742 for (const MachineOperand &MO : MRI->def_operands(reg))
1743 if (MO.isTied())
1744 return true;
1745
1746 return false;
1747 }
1748
1749 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1750 /// same, as \p PhysReg.
assignedRegPartiallyOverlaps(const TargetRegisterInfo & TRI,const VirtRegMap & VRM,MCRegister PhysReg,const LiveInterval & Intf)1751 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1752 const VirtRegMap &VRM,
1753 MCRegister PhysReg,
1754 const LiveInterval &Intf) {
1755 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1756 if (PhysReg == AssignedReg)
1757 return false;
1758 return TRI.regsOverlap(PhysReg, AssignedReg);
1759 }
1760
1761 /// mayRecolorAllInterferences - Check if the virtual registers that
1762 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1763 /// recolored to free \p PhysReg.
1764 /// When true is returned, \p RecoloringCandidates has been augmented with all
1765 /// the live intervals that need to be recolored in order to free \p PhysReg
1766 /// for \p VirtReg.
1767 /// \p FixedRegisters contains all the virtual registers that cannot be
1768 /// recolored.
mayRecolorAllInterferences(MCRegister PhysReg,const LiveInterval & VirtReg,SmallLISet & RecoloringCandidates,const SmallVirtRegSet & FixedRegisters)1769 bool RAGreedy::mayRecolorAllInterferences(
1770 MCRegister PhysReg, const LiveInterval &VirtReg,
1771 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1772 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1773
1774 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1775 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1776 // If there is LastChanceRecoloringMaxInterference or more interferences,
1777 // chances are one would not be recolorable.
1778 if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
1779 LastChanceRecoloringMaxInterference &&
1780 !ExhaustiveSearch) {
1781 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1782 CutOffInfo |= CO_Interf;
1783 return false;
1784 }
1785 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1786 // If Intf is done and sits on the same register class as VirtReg, it
1787 // would not be recolorable as it is in the same state as
1788 // VirtReg. However there are at least two exceptions.
1789 //
1790 // If VirtReg has tied defs and Intf doesn't, then
1791 // there is still a point in examining if it can be recolorable.
1792 //
1793 // Additionally, if the register class has overlapping tuple members, it
1794 // may still be recolorable using a different tuple. This is more likely
1795 // if the existing assignment aliases with the candidate.
1796 //
1797 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1798 MRI->getRegClass(Intf->reg()) == CurRC &&
1799 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1800 !(hasTiedDef(MRI, VirtReg.reg()) &&
1801 !hasTiedDef(MRI, Intf->reg()))) ||
1802 FixedRegisters.count(Intf->reg())) {
1803 LLVM_DEBUG(
1804 dbgs() << "Early abort: the interference is not recolorable.\n");
1805 return false;
1806 }
1807 RecoloringCandidates.insert(Intf);
1808 }
1809 }
1810 return true;
1811 }
1812
1813 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1814 /// its interferences.
1815 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
1816 /// virtual register that was using it. The recoloring process may recursively
1817 /// use the last chance recoloring. Therefore, when a virtual register has been
1818 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1819 /// be last-chance-recolored again during this recoloring "session".
1820 /// E.g.,
1821 /// Let
1822 /// vA can use {R1, R2 }
1823 /// vB can use { R2, R3}
1824 /// vC can use {R1 }
1825 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
1826 /// instance) and they all interfere.
1827 ///
1828 /// vA is assigned R1
1829 /// vB is assigned R2
1830 /// vC tries to evict vA but vA is already done.
1831 /// Regular register allocation fails.
1832 ///
1833 /// Last chance recoloring kicks in:
1834 /// vC does as if vA was evicted => vC uses R1.
1835 /// vC is marked as fixed.
1836 /// vA needs to find a color.
1837 /// None are available.
1838 /// vA cannot evict vC: vC is a fixed virtual register now.
1839 /// vA does as if vB was evicted => vA uses R2.
1840 /// vB needs to find a color.
1841 /// R3 is available.
1842 /// Recoloring => vC = R1, vA = R2, vB = R3
1843 ///
1844 /// \p Order defines the preferred allocation order for \p VirtReg.
1845 /// \p NewRegs will contain any new virtual register that have been created
1846 /// (split, spill) during the process and that must be assigned.
1847 /// \p FixedRegisters contains all the virtual registers that cannot be
1848 /// recolored.
1849 ///
1850 /// \p RecolorStack tracks the original assignments of successfully recolored
1851 /// registers.
1852 ///
1853 /// \p Depth gives the current depth of the last chance recoloring.
1854 /// \return a physical register that can be used for VirtReg or ~0u if none
1855 /// exists.
tryLastChanceRecoloring(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)1856 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1857 AllocationOrder &Order,
1858 SmallVectorImpl<Register> &NewVRegs,
1859 SmallVirtRegSet &FixedRegisters,
1860 RecoloringStack &RecolorStack,
1861 unsigned Depth) {
1862 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1863 return ~0u;
1864
1865 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1866
1867 const ssize_t EntryStackSize = RecolorStack.size();
1868
1869 // Ranges must be Done.
1870 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1871 "Last chance recoloring should really be last chance");
1872 // Set the max depth to LastChanceRecoloringMaxDepth.
1873 // We may want to reconsider that if we end up with a too large search space
1874 // for target with hundreds of registers.
1875 // Indeed, in that case we may want to cut the search space earlier.
1876 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
1877 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1878 CutOffInfo |= CO_Depth;
1879 return ~0u;
1880 }
1881
1882 // Set of Live intervals that will need to be recolored.
1883 SmallLISet RecoloringCandidates;
1884
1885 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1886 // this recoloring "session".
1887 assert(!FixedRegisters.count(VirtReg.reg()));
1888 FixedRegisters.insert(VirtReg.reg());
1889 SmallVector<Register, 4> CurrentNewVRegs;
1890
1891 for (MCRegister PhysReg : Order) {
1892 assert(PhysReg.isValid());
1893 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1894 << printReg(PhysReg, TRI) << '\n');
1895 RecoloringCandidates.clear();
1896 CurrentNewVRegs.clear();
1897
1898 // It is only possible to recolor virtual register interference.
1899 if (Matrix->checkInterference(VirtReg, PhysReg) >
1900 LiveRegMatrix::IK_VirtReg) {
1901 LLVM_DEBUG(
1902 dbgs() << "Some interferences are not with virtual registers.\n");
1903
1904 continue;
1905 }
1906
1907 // Early give up on this PhysReg if it is obvious we cannot recolor all
1908 // the interferences.
1909 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1910 FixedRegisters)) {
1911 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1912 continue;
1913 }
1914
1915 // RecoloringCandidates contains all the virtual registers that interfere
1916 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1917 // recoloring and perform the actual recoloring.
1918 PQueue RecoloringQueue;
1919 for (const LiveInterval *RC : RecoloringCandidates) {
1920 Register ItVirtReg = RC->reg();
1921 enqueue(RecoloringQueue, RC);
1922 assert(VRM->hasPhys(ItVirtReg) &&
1923 "Interferences are supposed to be with allocated variables");
1924
1925 // Record the current allocation.
1926 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1927
1928 // unset the related struct.
1929 Matrix->unassign(*RC);
1930 }
1931
1932 // Do as if VirtReg was assigned to PhysReg so that the underlying
1933 // recoloring has the right information about the interferes and
1934 // available colors.
1935 Matrix->assign(VirtReg, PhysReg);
1936
1937 // Save the current recoloring state.
1938 // If we cannot recolor all the interferences, we will have to start again
1939 // at this point for the next physical register.
1940 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
1941 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
1942 FixedRegisters, RecolorStack, Depth)) {
1943 // Push the queued vregs into the main queue.
1944 for (Register NewVReg : CurrentNewVRegs)
1945 NewVRegs.push_back(NewVReg);
1946 // Do not mess up with the global assignment process.
1947 // I.e., VirtReg must be unassigned.
1948 Matrix->unassign(VirtReg);
1949 return PhysReg;
1950 }
1951
1952 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
1953 << printReg(PhysReg, TRI) << '\n');
1954
1955 // The recoloring attempt failed, undo the changes.
1956 FixedRegisters = SaveFixedRegisters;
1957 Matrix->unassign(VirtReg);
1958
1959 // For a newly created vreg which is also in RecoloringCandidates,
1960 // don't add it to NewVRegs because its physical register will be restored
1961 // below. Other vregs in CurrentNewVRegs are created by calling
1962 // selectOrSplit and should be added into NewVRegs.
1963 for (Register &R : CurrentNewVRegs) {
1964 if (RecoloringCandidates.count(&LIS->getInterval(R)))
1965 continue;
1966 NewVRegs.push_back(R);
1967 }
1968
1969 // Roll back our unsuccessful recoloring. Also roll back any successful
1970 // recolorings in any recursive recoloring attempts, since it's possible
1971 // they would have introduced conflicts with assignments we will be
1972 // restoring further up the stack. Perform all unassignments prior to
1973 // reassigning, since sub-recolorings may have conflicted with the registers
1974 // we are going to restore to their original assignments.
1975 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
1976 const LiveInterval *LI;
1977 MCRegister PhysReg;
1978 std::tie(LI, PhysReg) = RecolorStack[I];
1979
1980 if (VRM->hasPhys(LI->reg()))
1981 Matrix->unassign(*LI);
1982 }
1983
1984 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
1985 const LiveInterval *LI;
1986 MCRegister PhysReg;
1987 std::tie(LI, PhysReg) = RecolorStack[I];
1988 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
1989 Matrix->assign(*LI, PhysReg);
1990 }
1991
1992 // Pop the stack of recoloring attempts.
1993 RecolorStack.resize(EntryStackSize);
1994 }
1995
1996 // Last chance recoloring did not worked either, give up.
1997 return ~0u;
1998 }
1999
2000 /// tryRecoloringCandidates - Try to assign a new color to every register
2001 /// in \RecoloringQueue.
2002 /// \p NewRegs will contain any new virtual register created during the
2003 /// recoloring process.
2004 /// \p FixedRegisters[in/out] contains all the registers that have been
2005 /// recolored.
2006 /// \return true if all virtual registers in RecoloringQueue were successfully
2007 /// recolored, false otherwise.
tryRecoloringCandidates(PQueue & RecoloringQueue,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)2008 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2009 SmallVectorImpl<Register> &NewVRegs,
2010 SmallVirtRegSet &FixedRegisters,
2011 RecoloringStack &RecolorStack,
2012 unsigned Depth) {
2013 while (!RecoloringQueue.empty()) {
2014 const LiveInterval *LI = dequeue(RecoloringQueue);
2015 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2016 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2017 RecolorStack, Depth + 1);
2018 // When splitting happens, the live-range may actually be empty.
2019 // In that case, this is okay to continue the recoloring even
2020 // if we did not find an alternative color for it. Indeed,
2021 // there will not be anything to color for LI in the end.
2022 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2023 return false;
2024
2025 if (!PhysReg) {
2026 assert(LI->empty() && "Only empty live-range do not require a register");
2027 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2028 << " succeeded. Empty LI.\n");
2029 continue;
2030 }
2031 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2032 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2033
2034 Matrix->assign(*LI, PhysReg);
2035 FixedRegisters.insert(LI->reg());
2036 }
2037 return true;
2038 }
2039
2040 //===----------------------------------------------------------------------===//
2041 // Main Entry Point
2042 //===----------------------------------------------------------------------===//
2043
selectOrSplit(const LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs)2044 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
2045 SmallVectorImpl<Register> &NewVRegs) {
2046 CutOffInfo = CO_None;
2047 LLVMContext &Ctx = MF->getFunction().getContext();
2048 SmallVirtRegSet FixedRegisters;
2049 RecoloringStack RecolorStack;
2050 MCRegister Reg =
2051 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2052 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2053 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2054 if (CutOffEncountered == CO_Depth)
2055 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2056 "reached. Use -fexhaustive-register-search to skip "
2057 "cutoffs");
2058 else if (CutOffEncountered == CO_Interf)
2059 Ctx.emitError("register allocation failed: maximum interference for "
2060 "recoloring reached. Use -fexhaustive-register-search "
2061 "to skip cutoffs");
2062 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2063 Ctx.emitError("register allocation failed: maximum interference and "
2064 "depth for recoloring reached. Use "
2065 "-fexhaustive-register-search to skip cutoffs");
2066 }
2067 return Reg;
2068 }
2069
2070 /// Using a CSR for the first time has a cost because it causes push|pop
2071 /// to be added to prologue|epilogue. Splitting a cold section of the live
2072 /// range can have lower cost than using the CSR for the first time;
2073 /// Spilling a live range in the cold path can have lower cost than using
2074 /// the CSR for the first time. Returns the physical register if we decide
2075 /// to use the CSR; otherwise return 0.
tryAssignCSRFirstTime(const LiveInterval & VirtReg,AllocationOrder & Order,MCRegister PhysReg,uint8_t & CostPerUseLimit,SmallVectorImpl<Register> & NewVRegs)2076 MCRegister RAGreedy::tryAssignCSRFirstTime(
2077 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2078 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2079 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2080 // We choose spill over using the CSR for the first time if the spill cost
2081 // is lower than CSRCost.
2082 SA->analyze(&VirtReg);
2083 if (calcSpillCost() >= CSRCost)
2084 return PhysReg;
2085
2086 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2087 // we will not use a callee-saved register in tryEvict.
2088 CostPerUseLimit = 1;
2089 return 0;
2090 }
2091 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2092 // We choose pre-splitting over using the CSR for the first time if
2093 // the cost of splitting is lower than CSRCost.
2094 SA->analyze(&VirtReg);
2095 unsigned NumCands = 0;
2096 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2097 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2098 NumCands, true /*IgnoreCSR*/);
2099 if (BestCand == NoCand)
2100 // Use the CSR if we can't find a region split below CSRCost.
2101 return PhysReg;
2102
2103 // Perform the actual pre-splitting.
2104 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2105 return 0;
2106 }
2107 return PhysReg;
2108 }
2109
aboutToRemoveInterval(const LiveInterval & LI)2110 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2111 // Do not keep invalid information around.
2112 SetOfBrokenHints.remove(&LI);
2113 }
2114
initializeCSRCost()2115 void RAGreedy::initializeCSRCost() {
2116 // We use the larger one out of the command-line option and the value report
2117 // by TRI.
2118 CSRCost = BlockFrequency(
2119 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2120 if (!CSRCost.getFrequency())
2121 return;
2122
2123 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2124 uint64_t ActualEntry = MBFI->getEntryFreq();
2125 if (!ActualEntry) {
2126 CSRCost = 0;
2127 return;
2128 }
2129 uint64_t FixedEntry = 1 << 14;
2130 if (ActualEntry < FixedEntry)
2131 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2132 else if (ActualEntry <= UINT32_MAX)
2133 // Invert the fraction and divide.
2134 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2135 else
2136 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2137 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2138 }
2139
2140 /// Collect the hint info for \p Reg.
2141 /// The results are stored into \p Out.
2142 /// \p Out is not cleared before being populated.
collectHintInfo(Register Reg,HintsInfo & Out)2143 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2144 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2145 if (!Instr.isFullCopy())
2146 continue;
2147 // Look for the other end of the copy.
2148 Register OtherReg = Instr.getOperand(0).getReg();
2149 if (OtherReg == Reg) {
2150 OtherReg = Instr.getOperand(1).getReg();
2151 if (OtherReg == Reg)
2152 continue;
2153 }
2154 // Get the current assignment.
2155 MCRegister OtherPhysReg =
2156 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2157 // Push the collected information.
2158 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2159 OtherPhysReg));
2160 }
2161 }
2162
2163 /// Using the given \p List, compute the cost of the broken hints if
2164 /// \p PhysReg was used.
2165 /// \return The cost of \p List for \p PhysReg.
getBrokenHintFreq(const HintsInfo & List,MCRegister PhysReg)2166 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2167 MCRegister PhysReg) {
2168 BlockFrequency Cost = 0;
2169 for (const HintInfo &Info : List) {
2170 if (Info.PhysReg != PhysReg)
2171 Cost += Info.Freq;
2172 }
2173 return Cost;
2174 }
2175
2176 /// Using the register assigned to \p VirtReg, try to recolor
2177 /// all the live ranges that are copy-related with \p VirtReg.
2178 /// The recoloring is then propagated to all the live-ranges that have
2179 /// been recolored and so on, until no more copies can be coalesced or
2180 /// it is not profitable.
2181 /// For a given live range, profitability is determined by the sum of the
2182 /// frequencies of the non-identity copies it would introduce with the old
2183 /// and new register.
tryHintRecoloring(const LiveInterval & VirtReg)2184 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2185 // We have a broken hint, check if it is possible to fix it by
2186 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2187 // some register and PhysReg may be available for the other live-ranges.
2188 SmallSet<Register, 4> Visited;
2189 SmallVector<unsigned, 2> RecoloringCandidates;
2190 HintsInfo Info;
2191 Register Reg = VirtReg.reg();
2192 MCRegister PhysReg = VRM->getPhys(Reg);
2193 // Start the recoloring algorithm from the input live-interval, then
2194 // it will propagate to the ones that are copy-related with it.
2195 Visited.insert(Reg);
2196 RecoloringCandidates.push_back(Reg);
2197
2198 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2199 << '(' << printReg(PhysReg, TRI) << ")\n");
2200
2201 do {
2202 Reg = RecoloringCandidates.pop_back_val();
2203
2204 // We cannot recolor physical register.
2205 if (Reg.isPhysical())
2206 continue;
2207
2208 // This may be a skipped class
2209 if (!VRM->hasPhys(Reg)) {
2210 assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
2211 "We have an unallocated variable which should have been handled");
2212 continue;
2213 }
2214
2215 // Get the live interval mapped with this virtual register to be able
2216 // to check for the interference with the new color.
2217 LiveInterval &LI = LIS->getInterval(Reg);
2218 MCRegister CurrPhys = VRM->getPhys(Reg);
2219 // Check that the new color matches the register class constraints and
2220 // that it is free for this live range.
2221 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2222 Matrix->checkInterference(LI, PhysReg)))
2223 continue;
2224
2225 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2226 << ") is recolorable.\n");
2227
2228 // Gather the hint info.
2229 Info.clear();
2230 collectHintInfo(Reg, Info);
2231 // Check if recoloring the live-range will increase the cost of the
2232 // non-identity copies.
2233 if (CurrPhys != PhysReg) {
2234 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2235 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2236 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2237 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2238 << "\nNew Cost: " << NewCopiesCost.getFrequency()
2239 << '\n');
2240 if (OldCopiesCost < NewCopiesCost) {
2241 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2242 continue;
2243 }
2244 // At this point, the cost is either cheaper or equal. If it is
2245 // equal, we consider this is profitable because it may expose
2246 // more recoloring opportunities.
2247 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2248 // Recolor the live-range.
2249 Matrix->unassign(LI);
2250 Matrix->assign(LI, PhysReg);
2251 }
2252 // Push all copy-related live-ranges to keep reconciling the broken
2253 // hints.
2254 for (const HintInfo &HI : Info) {
2255 if (Visited.insert(HI.Reg).second)
2256 RecoloringCandidates.push_back(HI.Reg);
2257 }
2258 } while (!RecoloringCandidates.empty());
2259 }
2260
2261 /// Try to recolor broken hints.
2262 /// Broken hints may be repaired by recoloring when an evicted variable
2263 /// freed up a register for a larger live-range.
2264 /// Consider the following example:
2265 /// BB1:
2266 /// a =
2267 /// b =
2268 /// BB2:
2269 /// ...
2270 /// = b
2271 /// = a
2272 /// Let us assume b gets split:
2273 /// BB1:
2274 /// a =
2275 /// b =
2276 /// BB2:
2277 /// c = b
2278 /// ...
2279 /// d = c
2280 /// = d
2281 /// = a
2282 /// Because of how the allocation work, b, c, and d may be assigned different
2283 /// colors. Now, if a gets evicted later:
2284 /// BB1:
2285 /// a =
2286 /// st a, SpillSlot
2287 /// b =
2288 /// BB2:
2289 /// c = b
2290 /// ...
2291 /// d = c
2292 /// = d
2293 /// e = ld SpillSlot
2294 /// = e
2295 /// This is likely that we can assign the same register for b, c, and d,
2296 /// getting rid of 2 copies.
tryHintsRecoloring()2297 void RAGreedy::tryHintsRecoloring() {
2298 for (const LiveInterval *LI : SetOfBrokenHints) {
2299 assert(LI->reg().isVirtual() &&
2300 "Recoloring is possible only for virtual registers");
2301 // Some dead defs may be around (e.g., because of debug uses).
2302 // Ignore those.
2303 if (!VRM->hasPhys(LI->reg()))
2304 continue;
2305 tryHintRecoloring(*LI);
2306 }
2307 }
2308
selectOrSplitImpl(const LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)2309 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2310 SmallVectorImpl<Register> &NewVRegs,
2311 SmallVirtRegSet &FixedRegisters,
2312 RecoloringStack &RecolorStack,
2313 unsigned Depth) {
2314 uint8_t CostPerUseLimit = uint8_t(~0u);
2315 // First try assigning a free register.
2316 auto Order =
2317 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2318 if (MCRegister PhysReg =
2319 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2320 // When NewVRegs is not empty, we may have made decisions such as evicting
2321 // a virtual register, go with the earlier decisions and use the physical
2322 // register.
2323 if (CSRCost.getFrequency() &&
2324 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2325 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2326 CostPerUseLimit, NewVRegs);
2327 if (CSRReg || !NewVRegs.empty())
2328 // Return now if we decide to use a CSR or create new vregs due to
2329 // pre-splitting.
2330 return CSRReg;
2331 } else
2332 return PhysReg;
2333 }
2334
2335 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2336 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2337 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2338
2339 // Try to evict a less worthy live range, but only for ranges from the primary
2340 // queue. The RS_Split ranges already failed to do this, and they should not
2341 // get a second chance until they have been split.
2342 if (Stage != RS_Split)
2343 if (Register PhysReg =
2344 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2345 FixedRegisters)) {
2346 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2347 // If VirtReg has a hint and that hint is broken record this
2348 // virtual register as a recoloring candidate for broken hint.
2349 // Indeed, since we evicted a variable in its neighborhood it is
2350 // likely we can at least partially recolor some of the
2351 // copy-related live-ranges.
2352 if (Hint && Hint != PhysReg)
2353 SetOfBrokenHints.insert(&VirtReg);
2354 return PhysReg;
2355 }
2356
2357 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2358
2359 // The first time we see a live range, don't try to split or spill.
2360 // Wait until the second time, when all smaller ranges have been allocated.
2361 // This gives a better picture of the interference to split around.
2362 if (Stage < RS_Split) {
2363 ExtraInfo->setStage(VirtReg, RS_Split);
2364 LLVM_DEBUG(dbgs() << "wait for second round\n");
2365 NewVRegs.push_back(VirtReg.reg());
2366 return 0;
2367 }
2368
2369 if (Stage < RS_Spill) {
2370 // Try splitting VirtReg or interferences.
2371 unsigned NewVRegSizeBefore = NewVRegs.size();
2372 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2373 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2374 return PhysReg;
2375 }
2376
2377 // If we couldn't allocate a register from spilling, there is probably some
2378 // invalid inline assembly. The base class will report it.
2379 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2380 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2381 RecolorStack, Depth);
2382 }
2383
2384 // Finally spill VirtReg itself.
2385 if ((EnableDeferredSpilling ||
2386 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2387 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2388 // TODO: This is experimental and in particular, we do not model
2389 // the live range splitting done by spilling correctly.
2390 // We would need a deep integration with the spiller to do the
2391 // right thing here. Anyway, that is still good for early testing.
2392 ExtraInfo->setStage(VirtReg, RS_Memory);
2393 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2394 NewVRegs.push_back(VirtReg.reg());
2395 } else {
2396 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2397 TimerGroupDescription, TimePassesIsEnabled);
2398 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2399 spiller().spill(LRE);
2400 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2401
2402 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2403 // the new regs are kept in LDV (still mapping to the old register), until
2404 // we rewrite spilled locations in LDV at a later stage.
2405 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2406
2407 if (VerifyEnabled)
2408 MF->verify(this, "After spilling");
2409 }
2410
2411 // The live virtual register requesting allocation was spilled, so tell
2412 // the caller not to allocate anything during this round.
2413 return 0;
2414 }
2415
report(MachineOptimizationRemarkMissed & R)2416 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2417 using namespace ore;
2418 if (Spills) {
2419 R << NV("NumSpills", Spills) << " spills ";
2420 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2421 }
2422 if (FoldedSpills) {
2423 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2424 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2425 << " total folded spills cost ";
2426 }
2427 if (Reloads) {
2428 R << NV("NumReloads", Reloads) << " reloads ";
2429 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2430 }
2431 if (FoldedReloads) {
2432 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2433 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2434 << " total folded reloads cost ";
2435 }
2436 if (ZeroCostFoldedReloads)
2437 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2438 << " zero cost folded reloads ";
2439 if (Copies) {
2440 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2441 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2442 }
2443 }
2444
computeStats(MachineBasicBlock & MBB)2445 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2446 RAGreedyStats Stats;
2447 const MachineFrameInfo &MFI = MF->getFrameInfo();
2448 int FI;
2449
2450 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2451 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2452 A->getPseudoValue())->getFrameIndex());
2453 };
2454 auto isPatchpointInstr = [](const MachineInstr &MI) {
2455 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2456 MI.getOpcode() == TargetOpcode::STACKMAP ||
2457 MI.getOpcode() == TargetOpcode::STATEPOINT;
2458 };
2459 for (MachineInstr &MI : MBB) {
2460 if (MI.isCopy()) {
2461 const MachineOperand &Dest = MI.getOperand(0);
2462 const MachineOperand &Src = MI.getOperand(1);
2463 Register SrcReg = Src.getReg();
2464 Register DestReg = Dest.getReg();
2465 // Only count `COPY`s with a virtual register as source or destination.
2466 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2467 if (SrcReg.isVirtual()) {
2468 SrcReg = VRM->getPhys(SrcReg);
2469 if (Src.getSubReg())
2470 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2471 }
2472 if (DestReg.isVirtual()) {
2473 DestReg = VRM->getPhys(DestReg);
2474 if (Dest.getSubReg())
2475 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2476 }
2477 if (SrcReg != DestReg)
2478 ++Stats.Copies;
2479 }
2480 continue;
2481 }
2482
2483 SmallVector<const MachineMemOperand *, 2> Accesses;
2484 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2485 ++Stats.Reloads;
2486 continue;
2487 }
2488 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2489 ++Stats.Spills;
2490 continue;
2491 }
2492 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2493 llvm::any_of(Accesses, isSpillSlotAccess)) {
2494 if (!isPatchpointInstr(MI)) {
2495 Stats.FoldedReloads += Accesses.size();
2496 continue;
2497 }
2498 // For statepoint there may be folded and zero cost folded stack reloads.
2499 std::pair<unsigned, unsigned> NonZeroCostRange =
2500 TII->getPatchpointUnfoldableRange(MI);
2501 SmallSet<unsigned, 16> FoldedReloads;
2502 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2503 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2504 MachineOperand &MO = MI.getOperand(Idx);
2505 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2506 continue;
2507 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2508 FoldedReloads.insert(MO.getIndex());
2509 else
2510 ZeroCostFoldedReloads.insert(MO.getIndex());
2511 }
2512 // If stack slot is used in folded reload it is not zero cost then.
2513 for (unsigned Slot : FoldedReloads)
2514 ZeroCostFoldedReloads.erase(Slot);
2515 Stats.FoldedReloads += FoldedReloads.size();
2516 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2517 continue;
2518 }
2519 Accesses.clear();
2520 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2521 llvm::any_of(Accesses, isSpillSlotAccess)) {
2522 Stats.FoldedSpills += Accesses.size();
2523 }
2524 }
2525 // Set cost of collected statistic by multiplication to relative frequency of
2526 // this basic block.
2527 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2528 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2529 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2530 Stats.SpillsCost = RelFreq * Stats.Spills;
2531 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2532 Stats.CopiesCost = RelFreq * Stats.Copies;
2533 return Stats;
2534 }
2535
reportStats(MachineLoop * L)2536 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2537 RAGreedyStats Stats;
2538
2539 // Sum up the spill and reloads in subloops.
2540 for (MachineLoop *SubLoop : *L)
2541 Stats.add(reportStats(SubLoop));
2542
2543 for (MachineBasicBlock *MBB : L->getBlocks())
2544 // Handle blocks that were not included in subloops.
2545 if (Loops->getLoopFor(MBB) == L)
2546 Stats.add(computeStats(*MBB));
2547
2548 if (!Stats.isEmpty()) {
2549 using namespace ore;
2550
2551 ORE->emit([&]() {
2552 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2553 L->getStartLoc(), L->getHeader());
2554 Stats.report(R);
2555 R << "generated in loop";
2556 return R;
2557 });
2558 }
2559 return Stats;
2560 }
2561
reportStats()2562 void RAGreedy::reportStats() {
2563 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2564 return;
2565 RAGreedyStats Stats;
2566 for (MachineLoop *L : *Loops)
2567 Stats.add(reportStats(L));
2568 // Process non-loop blocks.
2569 for (MachineBasicBlock &MBB : *MF)
2570 if (!Loops->getLoopFor(&MBB))
2571 Stats.add(computeStats(MBB));
2572 if (!Stats.isEmpty()) {
2573 using namespace ore;
2574
2575 ORE->emit([&]() {
2576 DebugLoc Loc;
2577 if (auto *SP = MF->getFunction().getSubprogram())
2578 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2579 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2580 &MF->front());
2581 Stats.report(R);
2582 R << "generated in function";
2583 return R;
2584 });
2585 }
2586 }
2587
hasVirtRegAlloc()2588 bool RAGreedy::hasVirtRegAlloc() {
2589 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2590 Register Reg = Register::index2VirtReg(I);
2591 if (MRI->reg_nodbg_empty(Reg))
2592 continue;
2593 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2594 if (!RC)
2595 continue;
2596 if (ShouldAllocateClass(*TRI, *RC))
2597 return true;
2598 }
2599
2600 return false;
2601 }
2602
runOnMachineFunction(MachineFunction & mf)2603 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2604 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2605 << "********** Function: " << mf.getName() << '\n');
2606
2607 MF = &mf;
2608 TII = MF->getSubtarget().getInstrInfo();
2609
2610 if (VerifyEnabled)
2611 MF->verify(this, "Before greedy register allocator");
2612
2613 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2614 getAnalysis<LiveIntervals>(),
2615 getAnalysis<LiveRegMatrix>());
2616
2617 // Early return if there is no virtual register to be allocated to a
2618 // physical register.
2619 if (!hasVirtRegAlloc())
2620 return false;
2621
2622 Indexes = &getAnalysis<SlotIndexes>();
2623 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2624 DomTree = &getAnalysis<MachineDominatorTree>();
2625 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2626 Loops = &getAnalysis<MachineLoopInfo>();
2627 Bundles = &getAnalysis<EdgeBundles>();
2628 SpillPlacer = &getAnalysis<SpillPlacement>();
2629 DebugVars = &getAnalysis<LiveDebugVariables>();
2630
2631 initializeCSRCost();
2632
2633 RegCosts = TRI->getRegisterCosts(*MF);
2634 RegClassPriorityTrumpsGlobalness =
2635 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2636 ? GreedyRegClassPriorityTrumpsGlobalness
2637 : TRI->regClassPriorityTrumpsGlobalness(*MF);
2638
2639 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2640 ? GreedyReverseLocalAssignment
2641 : TRI->reverseLocalAssignment();
2642
2643 ExtraInfo.emplace();
2644 EvictAdvisor =
2645 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2646 PriorityAdvisor =
2647 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2648
2649 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2650 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2651
2652 VRAI->calculateSpillWeightsAndHints();
2653
2654 LLVM_DEBUG(LIS->dump());
2655
2656 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2657 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2658
2659 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2660 GlobalCand.resize(32); // This will grow as needed.
2661 SetOfBrokenHints.clear();
2662
2663 allocatePhysRegs();
2664 tryHintsRecoloring();
2665
2666 if (VerifyEnabled)
2667 MF->verify(this, "Before post optimization");
2668 postOptimization();
2669 reportStats();
2670
2671 releaseMemory();
2672 return true;
2673 }
2674