1//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Implements the ScheduleDAGInstrs class, which implements scheduling
10/// for a MachineInstr-based dependency graph.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
15#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
16
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PointerIntPair.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/SparseMultiSet.h"
21#include "llvm/ADT/identity.h"
22#include "llvm/Analysis/AliasAnalysis.h"
23#include "llvm/CodeGen/LiveRegUnits.h"
24#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/ScheduleDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/CodeGen/TargetSchedule.h"
28#include "llvm/MC/LaneBitmask.h"
29#include "llvm/Support/Compiler.h"
30#include <cassert>
31#include <cstdint>
32#include <list>
33#include <string>
34#include <utility>
35#include <vector>
36
37namespace llvm {
38
39 class AAResults;
40 class LiveIntervals;
41 class MachineFrameInfo;
42 class MachineFunction;
43 class MachineInstr;
44 class MachineLoopInfo;
45 class MachineOperand;
46 struct MCSchedClassDesc;
47 class PressureDiffs;
48 class PseudoSourceValue;
49 class RegPressureTracker;
50 class UndefValue;
51 class Value;
52
53 /// An individual mapping from virtual register number to SUnit.
54 struct VReg2SUnit {
55 Register VirtReg;
56 LaneBitmask LaneMask;
57 SUnit *SU;
58
59 VReg2SUnit(Register VReg, LaneBitmask LaneMask, SUnit *SU)
60 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
61
62 unsigned getSparseSetIndex() const {
63 return VirtReg.virtRegIndex();
64 }
65 };
66
67 /// Mapping from virtual register to SUnit including an operand index.
68 struct VReg2SUnitOperIdx : public VReg2SUnit {
69 unsigned OperandIndex;
70
71 VReg2SUnitOperIdx(Register VReg, LaneBitmask LaneMask,
72 unsigned OperandIndex, SUnit *SU)
73 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
74 };
75
76 /// Record a physical register access.
77 /// For non-data-dependent uses, OpIdx == -1.
78 struct PhysRegSUOper {
79 SUnit *SU;
80 int OpIdx;
81 unsigned RegUnit;
82
83 PhysRegSUOper(SUnit *su, int op, unsigned R)
84 : SU(su), OpIdx(op), RegUnit(R) {}
85
86 unsigned getSparseSetIndex() const { return RegUnit; }
87 };
88
89 /// Use a SparseMultiSet to track physical registers. Storage is only
90 /// allocated once for the pass. It can be cleared in constant time and reused
91 /// without any frees.
92 using RegUnit2SUnitsMap =
93 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
94
95 /// Track local uses of virtual registers. These uses are gathered by the DAG
96 /// builder and may be consulted by the scheduler to avoid iterating an entire
97 /// vreg use list.
98 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
99
100 using VReg2SUnitOperIdxMultiMap =
101 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
102
103 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
104
105 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
106 UnderlyingObject(ValueType V, bool MayAlias)
107 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
108
109 ValueType getValue() const { return getPointer(); }
110 bool mayAlias() const { return getInt(); }
111 };
112
113 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
114
115 /// A ScheduleDAG for scheduling lists of MachineInstr.
116 class LLVM_ABI ScheduleDAGInstrs : public ScheduleDAG {
117 protected:
118 const MachineLoopInfo *MLI = nullptr;
119 const MachineFrameInfo &MFI;
120
121 /// TargetSchedModel provides an interface to the machine model.
122 TargetSchedModel SchedModel;
123
124 /// True if the DAG builder should remove kill flags (in preparation for
125 /// rescheduling).
126 bool RemoveKillFlags;
127
128 /// True if regions with a single MI should be scheduled.
129 bool ScheduleSingleMIRegions = false;
130
131 /// The standard DAG builder does not normally include terminators as DAG
132 /// nodes because it does not create the necessary dependencies to prevent
133 /// reordering. A specialized scheduler can override
134 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
135 /// it has taken responsibility for scheduling the terminator correctly.
136 bool CanHandleTerminators = false;
137
138 /// Whether lane masks should get tracked.
139 bool TrackLaneMasks = false;
140
141 // State specific to the current scheduling region.
142 // ------------------------------------------------
143
144 /// The block in which to insert instructions
145 MachineBasicBlock *BB = nullptr;
146
147 /// The beginning of the range to be scheduled.
148 MachineBasicBlock::iterator RegionBegin;
149
150 /// The end of the range to be scheduled.
151 MachineBasicBlock::iterator RegionEnd;
152
153 /// Instructions in this region (distance(RegionBegin, RegionEnd)).
154 unsigned NumRegionInstrs = 0;
155
156 /// After calling BuildSchedGraph, each machine instruction in the current
157 /// scheduling region is mapped to an SUnit.
158 DenseMap<MachineInstr*, SUnit*> MISUnitMap;
159
160 // State internal to DAG building.
161 // -------------------------------
162
163 /// Defs, Uses - Remember where defs and uses of each register are as we
164 /// iterate upward through the instructions. This is allocated here instead
165 /// of inside BuildSchedGraph to avoid the need for it to be initialized and
166 /// destructed for each block.
167 RegUnit2SUnitsMap Defs;
168 RegUnit2SUnitsMap Uses;
169
170 /// Tracks the last instruction(s) in this region defining each virtual
171 /// register. There may be multiple current definitions for a register with
172 /// disjunct lanemasks.
173 VReg2SUnitMultiMap CurrentVRegDefs;
174 /// Tracks the last instructions in this region using each virtual register.
175 VReg2SUnitOperIdxMultiMap CurrentVRegUses;
176
177 mutable std::optional<BatchAAResults> AAForDep;
178
179 /// Remember a generic side-effecting instruction as we proceed.
180 /// No other SU ever gets scheduled around it (except in the special
181 /// case of a huge region that gets reduced).
182 SUnit *BarrierChain = nullptr;
183
184 SmallVector<ClusterInfo> Clusters;
185
186 public:
187 /// A list of SUnits, used in Value2SUsMap, during DAG construction.
188 /// Note: to gain speed it might be worth investigating an optimized
189 /// implementation of this data structure, such as a singly linked list
190 /// with a memory pool (SmallVector was tried but slow and SparseSet is not
191 /// applicable).
192 using SUList = std::list<SUnit *>;
193
194 /// The direction that should be used to dump the scheduled Sequence.
195 enum DumpDirection {
196 TopDown,
197 BottomUp,
198 Bidirectional,
199 NotSet,
200 };
201
202 void setDumpDirection(DumpDirection D) { DumpDir = D; }
203
204 protected:
205 DumpDirection DumpDir = NotSet;
206
207 /// A map from ValueType to SUList, used during DAG construction, as
208 /// a means of remembering which SUs depend on which memory locations.
209 class Value2SUsMap;
210
211 /// Returns a (possibly null) pointer to the current BatchAAResults.
212 BatchAAResults *getAAForDep() const {
213 if (AAForDep.has_value())
214 return &AAForDep.value();
215 return nullptr;
216 }
217
218 /// Reduces maps in FIFO order, by N SUs. This is better than turning
219 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
220 /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
221 /// and those below it.
222 void reduceHugeMemNodeMaps(Value2SUsMap &stores,
223 Value2SUsMap &loads, unsigned N);
224
225 /// Adds a chain edge between SUa and SUb, but only if both
226 /// AAResults and Target fail to deny the dependency.
227 void addChainDependency(SUnit *SUa, SUnit *SUb,
228 unsigned Latency = 0);
229
230 /// Adds dependencies as needed from all SUs in list to SU.
231 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
232 for (SUnit *Entry : SUs)
233 addChainDependency(SUa: SU, SUb: Entry, Latency);
234 }
235
236 /// Adds dependencies as needed from all SUs in map, to SU.
237 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
238
239 /// Adds dependencies as needed to SU, from all SUs mapped to V.
240 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
241 ValueType V);
242
243 /// Adds barrier chain edges from all SUs in map, and then clear the map.
244 /// This is equivalent to insertBarrierChain(), but optimized for the common
245 /// case where the new BarrierChain (a global memory object) has a higher
246 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
247 /// before calling this.
248 void addBarrierChain(Value2SUsMap &map);
249
250 /// Inserts a barrier chain in a huge region, far below current SU.
251 /// Adds barrier chain edges from all SUs in map with higher NodeNums than
252 /// this new BarrierChain, and remove them from map. It is assumed
253 /// BarrierChain has been set before calling this.
254 void insertBarrierChain(Value2SUsMap &map);
255
256 /// For an unanalyzable memory access, this Value is used in maps.
257 UndefValue *UnknownValue;
258
259
260 /// Topo - A topological ordering for SUnits which permits fast IsReachable
261 /// and similar queries.
262 ScheduleDAGTopologicalSort Topo;
263
264 using DbgValueVector =
265 std::vector<std::pair<MachineInstr *, MachineInstr *>>;
266 /// Remember instruction that precedes DBG_VALUE.
267 /// These are generated by buildSchedGraph but persist so they can be
268 /// referenced when emitting the final schedule.
269 DbgValueVector DbgValues;
270 MachineInstr *FirstDbgValue = nullptr;
271
272 /// Set of live physical registers for updating kill flags.
273 LiveRegUnits LiveRegs;
274
275 public:
276 explicit ScheduleDAGInstrs(MachineFunction &mf,
277 const MachineLoopInfo *mli,
278 bool RemoveKillFlags = false);
279
280 ~ScheduleDAGInstrs() override = default;
281
282 /// Gets the machine model for instruction scheduling.
283 const TargetSchedModel *getSchedModel() const { return &SchedModel; }
284
285 /// Resolves and cache a resolved scheduling class for an SUnit.
286 const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
287 if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
288 SU->SchedClass = SchedModel.resolveSchedClass(MI: SU->getInstr());
289 return SU->SchedClass;
290 }
291
292 /// IsReachable - Checks if SU is reachable from TargetSU.
293 bool IsReachable(SUnit *SU, SUnit *TargetSU) {
294 return Topo.IsReachable(SU, TargetSU);
295 }
296
297 /// Whether regions with a single MI should be scheduled.
298 bool shouldScheduleSingleMIRegions() const {
299 return ScheduleSingleMIRegions;
300 }
301
302 /// Returns an iterator to the top of the current scheduling region.
303 MachineBasicBlock::iterator begin() const { return RegionBegin; }
304
305 /// Returns an iterator to the bottom of the current scheduling region.
306 MachineBasicBlock::iterator end() const { return RegionEnd; }
307
308 /// Creates a new SUnit and return a ptr to it.
309 SUnit *newSUnit(MachineInstr *MI);
310
311 /// Returns an existing SUnit for this MI, or nullptr.
312 SUnit *getSUnit(MachineInstr *MI) const;
313
314 /// If this method returns true, handling of the scheduling regions
315 /// themselves (in case of a scheduling boundary in MBB) will be done
316 /// beginning with the topmost region of MBB.
317 virtual bool doMBBSchedRegionsTopDown() const { return false; }
318
319 /// Prepares to perform scheduling in the given block.
320 virtual void startBlock(MachineBasicBlock *BB);
321
322 /// Cleans up after scheduling in the given block.
323 virtual void finishBlock();
324
325 /// Initialize the DAG and common scheduler state for a new
326 /// scheduling region. This does not actually create the DAG, only clears
327 /// it. The scheduling driver may call BuildSchedGraph multiple times per
328 /// scheduling region.
329 virtual void enterRegion(MachineBasicBlock *bb,
330 MachineBasicBlock::iterator begin,
331 MachineBasicBlock::iterator end,
332 unsigned regioninstrs);
333
334 /// Called when the scheduler has finished scheduling the current region.
335 virtual void exitRegion();
336
337 /// Builds SUnits for the current region.
338 /// If \p RPTracker is non-null, compute register pressure as a side effect.
339 /// The DAG builder is an efficient place to do it because it already visits
340 /// operands.
341 void buildSchedGraph(AAResults *AA,
342 RegPressureTracker *RPTracker = nullptr,
343 PressureDiffs *PDiffs = nullptr,
344 LiveIntervals *LIS = nullptr,
345 bool TrackLaneMasks = false);
346
347 /// Adds dependencies from instructions in the current list of
348 /// instructions being scheduled to scheduling barrier. We want to make sure
349 /// instructions which define registers that are either used by the
350 /// terminator or are live-out are properly scheduled. This is especially
351 /// important when the definition latency of the return value(s) are too
352 /// high to be hidden by the branch or when the liveout registers used by
353 /// instructions in the fallthrough block.
354 void addSchedBarrierDeps();
355
356 /// Orders nodes according to selected style.
357 ///
358 /// Typically, a scheduling algorithm will implement schedule() without
359 /// overriding enterRegion() or exitRegion().
360 virtual void schedule() = 0;
361
362 /// Allow targets to perform final scheduling actions at the level of the
363 /// whole MachineFunction. By default does nothing.
364 virtual void finalizeSchedule() {}
365
366 void dumpNode(const SUnit &SU) const override;
367 void dump() const override;
368
369 /// Returns a label for a DAG node that points to an instruction.
370 std::string getGraphNodeLabel(const SUnit *SU) const override;
371
372 /// Returns a label for the region of code covered by the DAG.
373 std::string getDAGName() const override;
374
375 /// Fixes register kill flags that scheduling has made invalid.
376 void fixupKills(MachineBasicBlock &MBB);
377
378 /// True if an edge can be added from PredSU to SuccSU without creating
379 /// a cycle.
380 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
381
382 /// Add a DAG edge to the given SU with the given predecessor
383 /// dependence data.
384 ///
385 /// \returns true if the edge may be added without creating a cycle OR if an
386 /// equivalent edge already existed (false indicates failure).
387 bool addEdge(SUnit *SuccSU, const SDep &PredDep);
388
389 /// Returns the array of the clusters.
390 SmallVector<ClusterInfo> &getClusters() { return Clusters; }
391
392 /// Get the specific cluster, return nullptr for InvalidClusterId.
393 ClusterInfo *getCluster(unsigned Idx) {
394 return Idx != InvalidClusterId ? &Clusters[Idx] : nullptr;
395 }
396
397 protected:
398 void initSUnits();
399 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
400 void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
401 void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
402 void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
403
404 /// Returns a mask for which lanes get read/written by the given (register)
405 /// machine operand.
406 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
407
408 /// Returns true if the def register in \p MO has no uses.
409 bool deadDefHasNoUse(const MachineOperand &MO);
410 };
411
412 /// Creates a new SUnit and return a ptr to it.
413 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
414#ifndef NDEBUG
415 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
416#endif
417 SUnits.emplace_back(args&: MI, args: (unsigned)SUnits.size());
418 assert((Addr == nullptr || Addr == &SUnits[0]) &&
419 "SUnits std::vector reallocated on the fly!");
420 return &SUnits.back();
421 }
422
423 /// Returns an existing SUnit for this MI, or nullptr.
424 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
425 return MISUnitMap.lookup(Val: MI);
426 }
427
428} // end namespace llvm
429
430#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
431