1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/SparseMultiSet.h" 21 #include "llvm/ADT/SparseSet.h" 22 #include "llvm/ADT/identity.h" 23 #include "llvm/CodeGen/LiveRegUnits.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSchedule.h" 28 #include "llvm/MC/LaneBitmask.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <list> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class AAResults; 39 class LiveIntervals; 40 class MachineFrameInfo; 41 class MachineFunction; 42 class MachineInstr; 43 class MachineLoopInfo; 44 class MachineOperand; 45 struct MCSchedClassDesc; 46 class PressureDiffs; 47 class PseudoSourceValue; 48 class RegPressureTracker; 49 class UndefValue; 50 class Value; 51 52 /// An individual mapping from virtual register number to SUnit. 53 struct VReg2SUnit { 54 unsigned VirtReg; 55 LaneBitmask LaneMask; 56 SUnit *SU; 57 VReg2SUnitVReg2SUnit58 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) 59 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 60 getSparseSetIndexVReg2SUnit61 unsigned getSparseSetIndex() const { 62 return Register::virtReg2Index(VirtReg); 63 } 64 }; 65 66 /// Mapping from virtual register to SUnit including an operand index. 67 struct VReg2SUnitOperIdx : public VReg2SUnit { 68 unsigned OperandIndex; 69 VReg2SUnitOperIdxVReg2SUnitOperIdx70 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, 71 unsigned OperandIndex, SUnit *SU) 72 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 73 }; 74 75 /// Record a physical register access. 76 /// For non-data-dependent uses, OpIdx == -1. 77 struct PhysRegSUOper { 78 SUnit *SU; 79 int OpIdx; 80 unsigned RegUnit; 81 PhysRegSUOperPhysRegSUOper82 PhysRegSUOper(SUnit *su, int op, unsigned R) 83 : SU(su), OpIdx(op), RegUnit(R) {} 84 getSparseSetIndexPhysRegSUOper85 unsigned getSparseSetIndex() const { return RegUnit; } 86 }; 87 88 /// Use a SparseMultiSet to track physical registers. Storage is only 89 /// allocated once for the pass. It can be cleared in constant time and reused 90 /// without any frees. 91 using RegUnit2SUnitsMap = 92 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 93 94 /// Use SparseSet as a SparseMap by relying on the fact that it never 95 /// compares ValueT's, only unsigned keys. This allows the set to be cleared 96 /// between scheduling regions in constant time as long as ValueT does not 97 /// require a destructor. 98 using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; 99 100 /// Track local uses of virtual registers. These uses are gathered by the DAG 101 /// builder and may be consulted by the scheduler to avoid iterating an entire 102 /// vreg use list. 103 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 104 105 using VReg2SUnitOperIdxMultiMap = 106 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 107 108 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 109 110 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject111 UnderlyingObject(ValueType V, bool MayAlias) 112 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 113 getValueUnderlyingObject114 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject115 bool mayAlias() const { return getInt(); } 116 }; 117 118 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 119 120 /// A ScheduleDAG for scheduling lists of MachineInstr. 121 class ScheduleDAGInstrs : public ScheduleDAG { 122 protected: 123 const MachineLoopInfo *MLI = nullptr; 124 const MachineFrameInfo &MFI; 125 126 /// TargetSchedModel provides an interface to the machine model. 127 TargetSchedModel SchedModel; 128 129 /// True if the DAG builder should remove kill flags (in preparation for 130 /// rescheduling). 131 bool RemoveKillFlags; 132 133 /// The standard DAG builder does not normally include terminators as DAG 134 /// nodes because it does not create the necessary dependencies to prevent 135 /// reordering. A specialized scheduler can override 136 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 137 /// it has taken responsibility for scheduling the terminator correctly. 138 bool CanHandleTerminators = false; 139 140 /// Whether lane masks should get tracked. 141 bool TrackLaneMasks = false; 142 143 // State specific to the current scheduling region. 144 // ------------------------------------------------ 145 146 /// The block in which to insert instructions 147 MachineBasicBlock *BB = nullptr; 148 149 /// The beginning of the range to be scheduled. 150 MachineBasicBlock::iterator RegionBegin; 151 152 /// The end of the range to be scheduled. 153 MachineBasicBlock::iterator RegionEnd; 154 155 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 156 unsigned NumRegionInstrs = 0; 157 158 /// After calling BuildSchedGraph, each machine instruction in the current 159 /// scheduling region is mapped to an SUnit. 160 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 161 162 // State internal to DAG building. 163 // ------------------------------- 164 165 /// Defs, Uses - Remember where defs and uses of each register are as we 166 /// iterate upward through the instructions. This is allocated here instead 167 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 168 /// destructed for each block. 169 RegUnit2SUnitsMap Defs; 170 RegUnit2SUnitsMap Uses; 171 172 /// Tracks the last instruction(s) in this region defining each virtual 173 /// register. There may be multiple current definitions for a register with 174 /// disjunct lanemasks. 175 VReg2SUnitMultiMap CurrentVRegDefs; 176 /// Tracks the last instructions in this region using each virtual register. 177 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 178 179 AAResults *AAForDep = nullptr; 180 181 /// Remember a generic side-effecting instruction as we proceed. 182 /// No other SU ever gets scheduled around it (except in the special 183 /// case of a huge region that gets reduced). 184 SUnit *BarrierChain = nullptr; 185 186 public: 187 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 188 /// Note: to gain speed it might be worth investigating an optimized 189 /// implementation of this data structure, such as a singly linked list 190 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 191 /// applicable). 192 using SUList = std::list<SUnit *>; 193 194 /// The direction that should be used to dump the scheduled Sequence. 195 enum DumpDirection { 196 TopDown, 197 BottomUp, 198 Bidirectional, 199 NotSet, 200 }; 201 setDumpDirection(DumpDirection D)202 void setDumpDirection(DumpDirection D) { DumpDir = D; } 203 204 protected: 205 DumpDirection DumpDir = NotSet; 206 207 /// A map from ValueType to SUList, used during DAG construction, as 208 /// a means of remembering which SUs depend on which memory locations. 209 class Value2SUsMap; 210 211 /// Reduces maps in FIFO order, by N SUs. This is better than turning 212 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 213 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 214 /// and those below it. 215 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 216 Value2SUsMap &loads, unsigned N); 217 218 /// Adds a chain edge between SUa and SUb, but only if both 219 /// AAResults and Target fail to deny the dependency. 220 void addChainDependency(SUnit *SUa, SUnit *SUb, 221 unsigned Latency = 0); 222 223 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)224 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 225 for (SUnit *Entry : SUs) 226 addChainDependency(SU, Entry, Latency); 227 } 228 229 /// Adds dependencies as needed from all SUs in map, to SU. 230 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 231 232 /// Adds dependencies as needed to SU, from all SUs mapped to V. 233 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 234 ValueType V); 235 236 /// Adds barrier chain edges from all SUs in map, and then clear the map. 237 /// This is equivalent to insertBarrierChain(), but optimized for the common 238 /// case where the new BarrierChain (a global memory object) has a higher 239 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 240 /// before calling this. 241 void addBarrierChain(Value2SUsMap &map); 242 243 /// Inserts a barrier chain in a huge region, far below current SU. 244 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 245 /// this new BarrierChain, and remove them from map. It is assumed 246 /// BarrierChain has been set before calling this. 247 void insertBarrierChain(Value2SUsMap &map); 248 249 /// For an unanalyzable memory access, this Value is used in maps. 250 UndefValue *UnknownValue; 251 252 253 /// Topo - A topological ordering for SUnits which permits fast IsReachable 254 /// and similar queries. 255 ScheduleDAGTopologicalSort Topo; 256 257 using DbgValueVector = 258 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 259 /// Remember instruction that precedes DBG_VALUE. 260 /// These are generated by buildSchedGraph but persist so they can be 261 /// referenced when emitting the final schedule. 262 DbgValueVector DbgValues; 263 MachineInstr *FirstDbgValue = nullptr; 264 265 /// Set of live physical registers for updating kill flags. 266 LiveRegUnits LiveRegs; 267 268 public: 269 explicit ScheduleDAGInstrs(MachineFunction &mf, 270 const MachineLoopInfo *mli, 271 bool RemoveKillFlags = false); 272 273 ~ScheduleDAGInstrs() override = default; 274 275 /// Gets the machine model for instruction scheduling. getSchedModel()276 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 277 278 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)279 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 280 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 281 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 282 return SU->SchedClass; 283 } 284 285 /// IsReachable - Checks if SU is reachable from TargetSU. IsReachable(SUnit * SU,SUnit * TargetSU)286 bool IsReachable(SUnit *SU, SUnit *TargetSU) { 287 return Topo.IsReachable(SU, TargetSU); 288 } 289 290 /// Returns an iterator to the top of the current scheduling region. begin()291 MachineBasicBlock::iterator begin() const { return RegionBegin; } 292 293 /// Returns an iterator to the bottom of the current scheduling region. end()294 MachineBasicBlock::iterator end() const { return RegionEnd; } 295 296 /// Creates a new SUnit and return a ptr to it. 297 SUnit *newSUnit(MachineInstr *MI); 298 299 /// Returns an existing SUnit for this MI, or nullptr. 300 SUnit *getSUnit(MachineInstr *MI) const; 301 302 /// If this method returns true, handling of the scheduling regions 303 /// themselves (in case of a scheduling boundary in MBB) will be done 304 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()305 virtual bool doMBBSchedRegionsTopDown() const { return false; } 306 307 /// Prepares to perform scheduling in the given block. 308 virtual void startBlock(MachineBasicBlock *BB); 309 310 /// Cleans up after scheduling in the given block. 311 virtual void finishBlock(); 312 313 /// Initialize the DAG and common scheduler state for a new 314 /// scheduling region. This does not actually create the DAG, only clears 315 /// it. The scheduling driver may call BuildSchedGraph multiple times per 316 /// scheduling region. 317 virtual void enterRegion(MachineBasicBlock *bb, 318 MachineBasicBlock::iterator begin, 319 MachineBasicBlock::iterator end, 320 unsigned regioninstrs); 321 322 /// Called when the scheduler has finished scheduling the current region. 323 virtual void exitRegion(); 324 325 /// Builds SUnits for the current region. 326 /// If \p RPTracker is non-null, compute register pressure as a side effect. 327 /// The DAG builder is an efficient place to do it because it already visits 328 /// operands. 329 void buildSchedGraph(AAResults *AA, 330 RegPressureTracker *RPTracker = nullptr, 331 PressureDiffs *PDiffs = nullptr, 332 LiveIntervals *LIS = nullptr, 333 bool TrackLaneMasks = false); 334 335 /// Adds dependencies from instructions in the current list of 336 /// instructions being scheduled to scheduling barrier. We want to make sure 337 /// instructions which define registers that are either used by the 338 /// terminator or are live-out are properly scheduled. This is especially 339 /// important when the definition latency of the return value(s) are too 340 /// high to be hidden by the branch or when the liveout registers used by 341 /// instructions in the fallthrough block. 342 void addSchedBarrierDeps(); 343 344 /// Orders nodes according to selected style. 345 /// 346 /// Typically, a scheduling algorithm will implement schedule() without 347 /// overriding enterRegion() or exitRegion(). 348 virtual void schedule() = 0; 349 350 /// Allow targets to perform final scheduling actions at the level of the 351 /// whole MachineFunction. By default does nothing. finalizeSchedule()352 virtual void finalizeSchedule() {} 353 354 void dumpNode(const SUnit &SU) const override; 355 void dump() const override; 356 357 /// Returns a label for a DAG node that points to an instruction. 358 std::string getGraphNodeLabel(const SUnit *SU) const override; 359 360 /// Returns a label for the region of code covered by the DAG. 361 std::string getDAGName() const override; 362 363 /// Fixes register kill flags that scheduling has made invalid. 364 void fixupKills(MachineBasicBlock &MBB); 365 366 /// True if an edge can be added from PredSU to SuccSU without creating 367 /// a cycle. 368 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 369 370 /// Add a DAG edge to the given SU with the given predecessor 371 /// dependence data. 372 /// 373 /// \returns true if the edge may be added without creating a cycle OR if an 374 /// equivalent edge already existed (false indicates failure). 375 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 376 377 protected: 378 void initSUnits(); 379 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 380 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 381 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 382 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 383 384 /// Returns a mask for which lanes get read/written by the given (register) 385 /// machine operand. 386 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 387 388 /// Returns true if the def register in \p MO has no uses. 389 bool deadDefHasNoUse(const MachineOperand &MO); 390 }; 391 392 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)393 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 394 #ifndef NDEBUG 395 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 396 #endif 397 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 398 assert((Addr == nullptr || Addr == &SUnits[0]) && 399 "SUnits std::vector reallocated on the fly!"); 400 return &SUnits.back(); 401 } 402 403 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)404 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 405 return MISUnitMap.lookup(MI); 406 } 407 408 } // end namespace llvm 409 410 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 411