1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
10 /// for a MachineInstr-based dependency graph.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PointerIntPair.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseMultiSet.h"
21 #include "llvm/ADT/SparseSet.h"
22 #include "llvm/ADT/identity.h"
23 #include "llvm/CodeGen/LiveRegUnits.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSchedule.h"
28 #include "llvm/MC/LaneBitmask.h"
29 #include <cassert>
30 #include <cstdint>
31 #include <list>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38   class AAResults;
39   class LiveIntervals;
40   class MachineFrameInfo;
41   class MachineFunction;
42   class MachineInstr;
43   class MachineLoopInfo;
44   class MachineOperand;
45   struct MCSchedClassDesc;
46   class PressureDiffs;
47   class PseudoSourceValue;
48   class RegPressureTracker;
49   class UndefValue;
50   class Value;
51 
52   /// An individual mapping from virtual register number to SUnit.
53   struct VReg2SUnit {
54     unsigned VirtReg;
55     LaneBitmask LaneMask;
56     SUnit *SU;
57 
VReg2SUnitVReg2SUnit58     VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
59       : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
60 
getSparseSetIndexVReg2SUnit61     unsigned getSparseSetIndex() const {
62       return Register::virtReg2Index(VirtReg);
63     }
64   };
65 
66   /// Mapping from virtual register to SUnit including an operand index.
67   struct VReg2SUnitOperIdx : public VReg2SUnit {
68     unsigned OperandIndex;
69 
VReg2SUnitOperIdxVReg2SUnitOperIdx70     VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
71                       unsigned OperandIndex, SUnit *SU)
72       : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
73   };
74 
75   /// Record a physical register access.
76   /// For non-data-dependent uses, OpIdx == -1.
77   struct PhysRegSUOper {
78     SUnit *SU;
79     int OpIdx;
80     unsigned RegUnit;
81 
PhysRegSUOperPhysRegSUOper82     PhysRegSUOper(SUnit *su, int op, unsigned R)
83         : SU(su), OpIdx(op), RegUnit(R) {}
84 
getSparseSetIndexPhysRegSUOper85     unsigned getSparseSetIndex() const { return RegUnit; }
86   };
87 
88   /// Use a SparseMultiSet to track physical registers. Storage is only
89   /// allocated once for the pass. It can be cleared in constant time and reused
90   /// without any frees.
91   using RegUnit2SUnitsMap =
92       SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
93 
94   /// Use SparseSet as a SparseMap by relying on the fact that it never
95   /// compares ValueT's, only unsigned keys. This allows the set to be cleared
96   /// between scheduling regions in constant time as long as ValueT does not
97   /// require a destructor.
98   using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;
99 
100   /// Track local uses of virtual registers. These uses are gathered by the DAG
101   /// builder and may be consulted by the scheduler to avoid iterating an entire
102   /// vreg use list.
103   using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
104 
105   using VReg2SUnitOperIdxMultiMap =
106       SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
107 
108   using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
109 
110   struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
UnderlyingObjectUnderlyingObject111     UnderlyingObject(ValueType V, bool MayAlias)
112         : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
113 
getValueUnderlyingObject114     ValueType getValue() const { return getPointer(); }
mayAliasUnderlyingObject115     bool mayAlias() const { return getInt(); }
116   };
117 
118   using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
119 
120   /// A ScheduleDAG for scheduling lists of MachineInstr.
121   class ScheduleDAGInstrs : public ScheduleDAG {
122   protected:
123     const MachineLoopInfo *MLI = nullptr;
124     const MachineFrameInfo &MFI;
125 
126     /// TargetSchedModel provides an interface to the machine model.
127     TargetSchedModel SchedModel;
128 
129     /// True if the DAG builder should remove kill flags (in preparation for
130     /// rescheduling).
131     bool RemoveKillFlags;
132 
133     /// The standard DAG builder does not normally include terminators as DAG
134     /// nodes because it does not create the necessary dependencies to prevent
135     /// reordering. A specialized scheduler can override
136     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
137     /// it has taken responsibility for scheduling the terminator correctly.
138     bool CanHandleTerminators = false;
139 
140     /// Whether lane masks should get tracked.
141     bool TrackLaneMasks = false;
142 
143     // State specific to the current scheduling region.
144     // ------------------------------------------------
145 
146     /// The block in which to insert instructions
147     MachineBasicBlock *BB = nullptr;
148 
149     /// The beginning of the range to be scheduled.
150     MachineBasicBlock::iterator RegionBegin;
151 
152     /// The end of the range to be scheduled.
153     MachineBasicBlock::iterator RegionEnd;
154 
155     /// Instructions in this region (distance(RegionBegin, RegionEnd)).
156     unsigned NumRegionInstrs = 0;
157 
158     /// After calling BuildSchedGraph, each machine instruction in the current
159     /// scheduling region is mapped to an SUnit.
160     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
161 
162     // State internal to DAG building.
163     // -------------------------------
164 
165     /// Defs, Uses - Remember where defs and uses of each register are as we
166     /// iterate upward through the instructions. This is allocated here instead
167     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
168     /// destructed for each block.
169     RegUnit2SUnitsMap Defs;
170     RegUnit2SUnitsMap Uses;
171 
172     /// Tracks the last instruction(s) in this region defining each virtual
173     /// register. There may be multiple current definitions for a register with
174     /// disjunct lanemasks.
175     VReg2SUnitMultiMap CurrentVRegDefs;
176     /// Tracks the last instructions in this region using each virtual register.
177     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
178 
179     AAResults *AAForDep = nullptr;
180 
181     /// Remember a generic side-effecting instruction as we proceed.
182     /// No other SU ever gets scheduled around it (except in the special
183     /// case of a huge region that gets reduced).
184     SUnit *BarrierChain = nullptr;
185 
186   public:
187     /// A list of SUnits, used in Value2SUsMap, during DAG construction.
188     /// Note: to gain speed it might be worth investigating an optimized
189     /// implementation of this data structure, such as a singly linked list
190     /// with a memory pool (SmallVector was tried but slow and SparseSet is not
191     /// applicable).
192     using SUList = std::list<SUnit *>;
193 
194     /// The direction that should be used to dump the scheduled Sequence.
195     enum DumpDirection {
196       TopDown,
197       BottomUp,
198       Bidirectional,
199       NotSet,
200     };
201 
setDumpDirection(DumpDirection D)202     void setDumpDirection(DumpDirection D) { DumpDir = D; }
203 
204   protected:
205     DumpDirection DumpDir = NotSet;
206 
207     /// A map from ValueType to SUList, used during DAG construction, as
208     /// a means of remembering which SUs depend on which memory locations.
209     class Value2SUsMap;
210 
211     /// Reduces maps in FIFO order, by N SUs. This is better than turning
212     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
213     /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
214     /// and those below it.
215     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
216                                Value2SUsMap &loads, unsigned N);
217 
218     /// Adds a chain edge between SUa and SUb, but only if both
219     /// AAResults and Target fail to deny the dependency.
220     void addChainDependency(SUnit *SUa, SUnit *SUb,
221                             unsigned Latency = 0);
222 
223     /// Adds dependencies as needed from all SUs in list to SU.
addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)224     void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
225       for (SUnit *Entry : SUs)
226         addChainDependency(SU, Entry, Latency);
227     }
228 
229     /// Adds dependencies as needed from all SUs in map, to SU.
230     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
231 
232     /// Adds dependencies as needed to SU, from all SUs mapped to V.
233     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
234                               ValueType V);
235 
236     /// Adds barrier chain edges from all SUs in map, and then clear the map.
237     /// This is equivalent to insertBarrierChain(), but optimized for the common
238     /// case where the new BarrierChain (a global memory object) has a higher
239     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
240     /// before calling this.
241     void addBarrierChain(Value2SUsMap &map);
242 
243     /// Inserts a barrier chain in a huge region, far below current SU.
244     /// Adds barrier chain edges from all SUs in map with higher NodeNums than
245     /// this new BarrierChain, and remove them from map. It is assumed
246     /// BarrierChain has been set before calling this.
247     void insertBarrierChain(Value2SUsMap &map);
248 
249     /// For an unanalyzable memory access, this Value is used in maps.
250     UndefValue *UnknownValue;
251 
252 
253     /// Topo - A topological ordering for SUnits which permits fast IsReachable
254     /// and similar queries.
255     ScheduleDAGTopologicalSort Topo;
256 
257     using DbgValueVector =
258         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
259     /// Remember instruction that precedes DBG_VALUE.
260     /// These are generated by buildSchedGraph but persist so they can be
261     /// referenced when emitting the final schedule.
262     DbgValueVector DbgValues;
263     MachineInstr *FirstDbgValue = nullptr;
264 
265     /// Set of live physical registers for updating kill flags.
266     LiveRegUnits LiveRegs;
267 
268   public:
269     explicit ScheduleDAGInstrs(MachineFunction &mf,
270                                const MachineLoopInfo *mli,
271                                bool RemoveKillFlags = false);
272 
273     ~ScheduleDAGInstrs() override = default;
274 
275     /// Gets the machine model for instruction scheduling.
getSchedModel()276     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
277 
278     /// Resolves and cache a resolved scheduling class for an SUnit.
getSchedClass(SUnit * SU)279     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
280       if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
281         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
282       return SU->SchedClass;
283     }
284 
285     /// IsReachable - Checks if SU is reachable from TargetSU.
IsReachable(SUnit * SU,SUnit * TargetSU)286     bool IsReachable(SUnit *SU, SUnit *TargetSU) {
287       return Topo.IsReachable(SU, TargetSU);
288     }
289 
290     /// Returns an iterator to the top of the current scheduling region.
begin()291     MachineBasicBlock::iterator begin() const { return RegionBegin; }
292 
293     /// Returns an iterator to the bottom of the current scheduling region.
end()294     MachineBasicBlock::iterator end() const { return RegionEnd; }
295 
296     /// Creates a new SUnit and return a ptr to it.
297     SUnit *newSUnit(MachineInstr *MI);
298 
299     /// Returns an existing SUnit for this MI, or nullptr.
300     SUnit *getSUnit(MachineInstr *MI) const;
301 
302     /// If this method returns true, handling of the scheduling regions
303     /// themselves (in case of a scheduling boundary in MBB) will be done
304     /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()305     virtual bool doMBBSchedRegionsTopDown() const { return false; }
306 
307     /// Prepares to perform scheduling in the given block.
308     virtual void startBlock(MachineBasicBlock *BB);
309 
310     /// Cleans up after scheduling in the given block.
311     virtual void finishBlock();
312 
313     /// Initialize the DAG and common scheduler state for a new
314     /// scheduling region. This does not actually create the DAG, only clears
315     /// it. The scheduling driver may call BuildSchedGraph multiple times per
316     /// scheduling region.
317     virtual void enterRegion(MachineBasicBlock *bb,
318                              MachineBasicBlock::iterator begin,
319                              MachineBasicBlock::iterator end,
320                              unsigned regioninstrs);
321 
322     /// Called when the scheduler has finished scheduling the current region.
323     virtual void exitRegion();
324 
325     /// Builds SUnits for the current region.
326     /// If \p RPTracker is non-null, compute register pressure as a side effect.
327     /// The DAG builder is an efficient place to do it because it already visits
328     /// operands.
329     void buildSchedGraph(AAResults *AA,
330                          RegPressureTracker *RPTracker = nullptr,
331                          PressureDiffs *PDiffs = nullptr,
332                          LiveIntervals *LIS = nullptr,
333                          bool TrackLaneMasks = false);
334 
335     /// Adds dependencies from instructions in the current list of
336     /// instructions being scheduled to scheduling barrier. We want to make sure
337     /// instructions which define registers that are either used by the
338     /// terminator or are live-out are properly scheduled. This is especially
339     /// important when the definition latency of the return value(s) are too
340     /// high to be hidden by the branch or when the liveout registers used by
341     /// instructions in the fallthrough block.
342     void addSchedBarrierDeps();
343 
344     /// Orders nodes according to selected style.
345     ///
346     /// Typically, a scheduling algorithm will implement schedule() without
347     /// overriding enterRegion() or exitRegion().
348     virtual void schedule() = 0;
349 
350     /// Allow targets to perform final scheduling actions at the level of the
351     /// whole MachineFunction. By default does nothing.
finalizeSchedule()352     virtual void finalizeSchedule() {}
353 
354     void dumpNode(const SUnit &SU) const override;
355     void dump() const override;
356 
357     /// Returns a label for a DAG node that points to an instruction.
358     std::string getGraphNodeLabel(const SUnit *SU) const override;
359 
360     /// Returns a label for the region of code covered by the DAG.
361     std::string getDAGName() const override;
362 
363     /// Fixes register kill flags that scheduling has made invalid.
364     void fixupKills(MachineBasicBlock &MBB);
365 
366     /// True if an edge can be added from PredSU to SuccSU without creating
367     /// a cycle.
368     bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
369 
370     /// Add a DAG edge to the given SU with the given predecessor
371     /// dependence data.
372     ///
373     /// \returns true if the edge may be added without creating a cycle OR if an
374     /// equivalent edge already existed (false indicates failure).
375     bool addEdge(SUnit *SuccSU, const SDep &PredDep);
376 
377   protected:
378     void initSUnits();
379     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
380     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
381     void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
382     void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
383 
384     /// Returns a mask for which lanes get read/written by the given (register)
385     /// machine operand.
386     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
387 
388     /// Returns true if the def register in \p MO has no uses.
389     bool deadDefHasNoUse(const MachineOperand &MO);
390   };
391 
392   /// Creates a new SUnit and return a ptr to it.
newSUnit(MachineInstr * MI)393   inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
394 #ifndef NDEBUG
395     const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
396 #endif
397     SUnits.emplace_back(MI, (unsigned)SUnits.size());
398     assert((Addr == nullptr || Addr == &SUnits[0]) &&
399            "SUnits std::vector reallocated on the fly!");
400     return &SUnits.back();
401   }
402 
403   /// Returns an existing SUnit for this MI, or nullptr.
getSUnit(MachineInstr * MI)404   inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
405     return MISUnitMap.lookup(MI);
406   }
407 
408 } // end namespace llvm
409 
410 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
411