1//===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides an interface for customizing the standard MachineScheduler
10// pass. Note that the entire pass may be replaced as follows:
11//
12// <Target>TargetMachine::createPassConfig(PassManagerBase &PM) {
13// PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID);
14// ...}
15//
16// The MachineScheduler pass is only responsible for choosing the regions to be
17// scheduled. Targets can override the DAG builder and scheduler without
18// replacing the pass as follows:
19//
20// ScheduleDAGInstrs *<Target>TargetMachine::
21// createMachineScheduler(MachineSchedContext *C) {
22// return new CustomMachineScheduler(C);
23// }
24//
25// The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
26// scheduling while updating the instruction stream, register pressure, and live
27// intervals. Most targets don't need to override the DAG builder and list
28// scheduler, but subtargets that require custom scheduling heuristics may
29// plugin an alternate MachineSchedStrategy. The strategy is responsible for
30// selecting the highest priority node from the list:
31//
32// ScheduleDAGInstrs *<Target>TargetMachine::
33// createMachineScheduler(MachineSchedContext *C) {
34// return new ScheduleDAGMILive(C, CustomStrategy(C));
35// }
36//
37// The DAG builder can also be customized in a sense by adding DAG mutations
38// that will run after DAG building and before list scheduling. DAG mutations
39// can adjust dependencies based on target-specific knowledge or add weak edges
40// to aid heuristics:
41//
42// ScheduleDAGInstrs *<Target>TargetMachine::
43// createMachineScheduler(MachineSchedContext *C) {
44// ScheduleDAGMI *DAG = createSchedLive(C);
45// DAG->addMutation(new CustomDAGMutation(...));
46// return DAG;
47// }
48//
49// A target that supports alternative schedulers can use the
50// MachineSchedRegistry to allow command line selection. This can be done by
51// implementing the following boilerplate:
52//
53// static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
54// return new CustomMachineScheduler(C);
55// }
56// static MachineSchedRegistry
57// SchedCustomRegistry("custom", "Run my target's custom scheduler",
58// createCustomMachineSched);
59//
60//
61// Finally, subtargets that don't need to implement custom heuristics but would
62// like to configure the GenericScheduler's policy for a given scheduler region,
63// including scheduling direction and register pressure tracking policy, can do
64// this:
65//
66// void <SubTarget>Subtarget::
67// overrideSchedPolicy(MachineSchedPolicy &Policy,
68// const SchedRegion &Region) const {
69// Policy.<Flag> = true;
70// }
71//
72//===----------------------------------------------------------------------===//
73
74#ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
75#define LLVM_CODEGEN_MACHINESCHEDULER_H
76
77#include "llvm/ADT/APInt.h"
78#include "llvm/ADT/ArrayRef.h"
79#include "llvm/ADT/BitVector.h"
80#include "llvm/ADT/STLExtras.h"
81#include "llvm/ADT/SmallVector.h"
82#include "llvm/ADT/StringRef.h"
83#include "llvm/ADT/Twine.h"
84#include "llvm/CodeGen/MachineBasicBlock.h"
85#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
86#include "llvm/CodeGen/MachinePassRegistry.h"
87#include "llvm/CodeGen/RegisterPressure.h"
88#include "llvm/CodeGen/ScheduleDAG.h"
89#include "llvm/CodeGen/ScheduleDAGInstrs.h"
90#include "llvm/CodeGen/ScheduleDAGMutation.h"
91#include "llvm/CodeGen/TargetSchedule.h"
92#include "llvm/Support/CommandLine.h"
93#include "llvm/Support/Compiler.h"
94#include "llvm/Support/ErrorHandling.h"
95#include <algorithm>
96#include <cassert>
97#include <llvm/Support/raw_ostream.h>
98#include <memory>
99#include <string>
100#include <vector>
101
102namespace llvm {
103namespace impl_detail {
104// FIXME: Remove these declarations once RegisterClassInfo is queryable as an
105// analysis.
106class MachineSchedulerImpl;
107class PostMachineSchedulerImpl;
108} // namespace impl_detail
109
110namespace MISched {
111enum Direction {
112 Unspecified,
113 TopDown,
114 BottomUp,
115 Bidirectional,
116};
117} // namespace MISched
118
119LLVM_ABI extern cl::opt<MISched::Direction> PreRADirection;
120LLVM_ABI extern cl::opt<bool> VerifyScheduling;
121
122#ifndef NDEBUG
123extern cl::opt<bool> ViewMISchedDAGs;
124extern cl::opt<bool> PrintDAGs;
125#else
126LLVM_ABI extern const bool ViewMISchedDAGs;
127LLVM_ABI extern const bool PrintDAGs;
128#endif
129
130class AAResults;
131class LiveIntervals;
132class MachineDominatorTree;
133class MachineFunction;
134class MachineInstr;
135class MachineLoopInfo;
136class RegisterClassInfo;
137class SchedDFSResult;
138class ScheduleHazardRecognizer;
139class TargetInstrInfo;
140class TargetPassConfig;
141class TargetRegisterInfo;
142
143/// MachineSchedContext provides enough context from the MachineScheduler pass
144/// for the target to instantiate a scheduler.
145struct LLVM_ABI MachineSchedContext {
146 MachineFunction *MF = nullptr;
147 const MachineLoopInfo *MLI = nullptr;
148 const MachineDominatorTree *MDT = nullptr;
149 const TargetMachine *TM = nullptr;
150 AAResults *AA = nullptr;
151 LiveIntervals *LIS = nullptr;
152 MachineBlockFrequencyInfo *MBFI = nullptr;
153
154 RegisterClassInfo *RegClassInfo;
155
156 MachineSchedContext();
157 MachineSchedContext &operator=(const MachineSchedContext &other) = delete;
158 MachineSchedContext(const MachineSchedContext &other) = delete;
159 virtual ~MachineSchedContext();
160};
161
162/// MachineSchedRegistry provides a selection of available machine instruction
163/// schedulers.
164class MachineSchedRegistry
165 : public MachinePassRegistryNode<
166 ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
167public:
168 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
169
170 // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
171 using FunctionPassCtor = ScheduleDAGCtor;
172
173 LLVM_ABI static MachinePassRegistry<ScheduleDAGCtor> Registry;
174
175 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
176 : MachinePassRegistryNode(N, D, C) {
177 Registry.Add(Node: this);
178 }
179
180 ~MachineSchedRegistry() { Registry.Remove(Node: this); }
181
182 // Accessors.
183 //
184 MachineSchedRegistry *getNext() const {
185 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
186 }
187
188 static MachineSchedRegistry *getList() {
189 return (MachineSchedRegistry *)Registry.getList();
190 }
191
192 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
193 Registry.setListener(L);
194 }
195};
196
197class ScheduleDAGMI;
198
199/// Define a generic scheduling policy for targets that don't provide their own
200/// MachineSchedStrategy. This can be overriden for each scheduling region
201/// before building the DAG.
202struct MachineSchedPolicy {
203 // Allow the scheduler to disable register pressure tracking.
204 bool ShouldTrackPressure = false;
205 /// Track LaneMasks to allow reordering of independent subregister writes
206 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks()
207 bool ShouldTrackLaneMasks = false;
208
209 // Allow the scheduler to force top-down or bottom-up scheduling. If neither
210 // is true, the scheduler runs in both directions and converges.
211 bool OnlyTopDown = false;
212 bool OnlyBottomUp = false;
213
214 // Disable heuristic that tries to fetch nodes from long dependency chains
215 // first.
216 bool DisableLatencyHeuristic = false;
217
218 // Compute DFSResult for use in scheduling heuristics.
219 bool ComputeDFSResult = false;
220
221 // If enabled, some extra cases of physreg defs will be biased towards user.
222 bool BiasPRegsExtra = false;
223
224 MachineSchedPolicy() = default;
225};
226
227/// A region of an MBB for scheduling.
228struct SchedRegion {
229 /// RegionBegin is the first instruction in the scheduling region, and
230 /// RegionEnd is either MBB->end() or the scheduling boundary after the
231 /// last instruction in the scheduling region. These iterators cannot refer
232 /// to instructions outside of the identified scheduling region because
233 /// those may be reordered before scheduling this region.
234 MachineBasicBlock::iterator RegionBegin;
235 MachineBasicBlock::iterator RegionEnd;
236 unsigned NumRegionInstrs;
237
238 SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
239 unsigned N)
240 : RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
241};
242
243/// MachineSchedStrategy - Interface to the scheduling algorithm used by
244/// ScheduleDAGMI.
245///
246/// Initialization sequence:
247/// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
248class LLVM_ABI MachineSchedStrategy {
249 virtual void anchor();
250
251public:
252 virtual ~MachineSchedStrategy() = default;
253
254 /// Optionally override the per-region scheduling policy.
255 virtual void initPolicy(MachineBasicBlock::iterator Begin,
256 MachineBasicBlock::iterator End,
257 unsigned NumRegionInstrs) {}
258
259 virtual MachineSchedPolicy getPolicy() const { return {}; }
260 virtual void dumpPolicy() const {}
261
262 /// Check if pressure tracking is needed before building the DAG and
263 /// initializing this strategy. Called after initPolicy.
264 virtual bool shouldTrackPressure() const { return true; }
265
266 /// Returns true if lanemasks should be tracked. LaneMask tracking is
267 /// necessary to reorder independent subregister defs for the same vreg.
268 /// This has to be enabled in combination with shouldTrackPressure().
269 virtual bool shouldTrackLaneMasks() const { return false; }
270
271 // If this method returns true, handling of the scheduling regions
272 // themselves (in case of a scheduling boundary in MBB) will be done
273 // beginning with the topmost region of MBB.
274 virtual bool doMBBSchedRegionsTopDown() const { return false; }
275
276 /// Initialize the strategy after building the DAG for a new region.
277 virtual void initialize(ScheduleDAGMI *DAG) = 0;
278
279 /// Tell the strategy that MBB is about to be processed.
280 virtual void enterMBB(MachineBasicBlock *MBB) {};
281
282 /// Tell the strategy that current MBB is done.
283 virtual void leaveMBB() {};
284
285 /// Notify this strategy that all roots have been released (including those
286 /// that depend on EntrySU or ExitSU).
287 virtual void registerRoots() {}
288
289 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
290 /// schedule the node at the top of the unscheduled region. Otherwise it will
291 /// be scheduled at the bottom.
292 virtual SUnit *pickNode(bool &IsTopNode) = 0;
293
294 /// Scheduler callback to notify that a new subtree is scheduled.
295 virtual void scheduleTree(unsigned SubtreeID) {}
296
297 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
298 /// instruction and updated scheduled/remaining flags in the DAG nodes.
299 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
300
301 /// When all predecessor dependencies have been resolved, free this node for
302 /// top-down scheduling.
303 virtual void releaseTopNode(SUnit *SU) = 0;
304
305 /// When all successor dependencies have been resolved, free this node for
306 /// bottom-up scheduling.
307 virtual void releaseBottomNode(SUnit *SU) = 0;
308};
309
310/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
311/// schedules machine instructions according to the given MachineSchedStrategy
312/// without much extra book-keeping. This is the common functionality between
313/// PreRA and PostRA MachineScheduler.
314class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs {
315protected:
316 AAResults *AA;
317 LiveIntervals *LIS;
318 MachineBlockFrequencyInfo *MBFI;
319 std::unique_ptr<MachineSchedStrategy> SchedImpl;
320
321 /// Ordered list of DAG postprocessing steps.
322 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
323
324 /// The top of the unscheduled zone.
325 MachineBasicBlock::iterator CurrentTop;
326
327 /// The bottom of the unscheduled zone.
328 MachineBasicBlock::iterator CurrentBottom;
329
330#if LLVM_ENABLE_ABI_BREAKING_CHECKS
331 /// The number of instructions scheduled so far. Used to cut off the
332 /// scheduler at the point determined by misched-cutoff.
333 unsigned NumInstrsScheduled = 0;
334#endif
335
336public:
337 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
338 bool RemoveKillFlags)
339 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
340 LIS(C->LIS), MBFI(C->MBFI), SchedImpl(std::move(S)) {}
341
342 // Provide a vtable anchor
343 ~ScheduleDAGMI() override;
344
345 /// If this method returns true, handling of the scheduling regions
346 /// themselves (in case of a scheduling boundary in MBB) will be done
347 /// beginning with the topmost region of MBB.
348 bool doMBBSchedRegionsTopDown() const override {
349 return SchedImpl->doMBBSchedRegionsTopDown();
350 }
351
352 // Returns LiveIntervals instance for use in DAG mutators and such.
353 LiveIntervals *getLIS() const { return LIS; }
354
355 /// Return true if this DAG supports VReg liveness and RegPressure.
356 virtual bool hasVRegLiveness() const { return false; }
357
358 /// Add a postprocessing step to the DAG builder.
359 /// Mutations are applied in the order that they are added after normal DAG
360 /// building and before MachineSchedStrategy initialization.
361 ///
362 /// ScheduleDAGMI takes ownership of the Mutation object.
363 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
364 if (Mutation)
365 Mutations.push_back(x: std::move(Mutation));
366 }
367
368 MachineBasicBlock::iterator top() const { return CurrentTop; }
369 MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
370
371 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
372 /// region. This covers all instructions in a block, while schedule() may only
373 /// cover a subset.
374 void enterRegion(MachineBasicBlock *bb,
375 MachineBasicBlock::iterator begin,
376 MachineBasicBlock::iterator end,
377 unsigned regioninstrs) override;
378
379 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
380 /// reorderable instructions.
381 void schedule() override;
382
383 void startBlock(MachineBasicBlock *bb) override;
384 void finishBlock() override;
385
386 /// Change the position of an instruction within the basic block and update
387 /// live ranges and region boundary iterators.
388 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
389
390 void viewGraph(const Twine &Name, const Twine &Title) override;
391 void viewGraph() override;
392
393protected:
394 // Top-Level entry points for the schedule() driver...
395
396 /// Apply each ScheduleDAGMutation step in order. This allows different
397 /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
398 void postProcessDAG();
399
400 /// Release ExitSU predecessors and setup scheduler queues.
401 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
402
403 /// Update scheduler DAG and queues after scheduling an instruction.
404 void updateQueues(SUnit *SU, bool IsTopNode);
405
406 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
407 void placeDebugValues();
408
409 /// dump the scheduled Sequence.
410 void dumpSchedule() const;
411 /// Print execution trace of the schedule top-down or bottom-up.
412 void dumpScheduleTraceTopDown() const;
413 void dumpScheduleTraceBottomUp() const;
414
415 // Lesser helpers...
416 bool checkSchedLimit();
417
418 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
419 SmallVectorImpl<SUnit*> &BotRoots);
420
421 void releaseSucc(SUnit *SU, SDep *SuccEdge);
422 void releaseSuccessors(SUnit *SU);
423 void releasePred(SUnit *SU, SDep *PredEdge);
424 void releasePredecessors(SUnit *SU);
425};
426
427/// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
428/// machine instructions while updating LiveIntervals and tracking regpressure.
429class LLVM_ABI ScheduleDAGMILive : public ScheduleDAGMI {
430protected:
431 RegisterClassInfo *RegClassInfo;
432
433 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
434 /// will be empty.
435 SchedDFSResult *DFSResult = nullptr;
436 BitVector ScheduledTrees;
437
438 MachineBasicBlock::iterator LiveRegionEnd;
439
440 /// Maps vregs to the SUnits of their uses in the current scheduling region.
441 VReg2SUnitMultiMap VRegUses;
442
443 // Map each SU to its summary of pressure changes. This array is updated for
444 // liveness during bottom-up scheduling. Top-down scheduling may proceed but
445 // has no affect on the pressure diffs.
446 PressureDiffs SUPressureDiffs;
447
448 /// Register pressure in this region computed by initRegPressure.
449 bool ShouldTrackPressure = false;
450 bool ShouldTrackLaneMasks = false;
451 IntervalPressure RegPressure;
452 RegPressureTracker RPTracker;
453
454 /// List of pressure sets that exceed the target's pressure limit before
455 /// scheduling, listed in increasing set ID order. Each pressure set is paired
456 /// with its max pressure in the currently scheduled regions.
457 std::vector<PressureChange> RegionCriticalPSets;
458
459 /// The top of the unscheduled zone.
460 IntervalPressure TopPressure;
461 RegPressureTracker TopRPTracker;
462
463 /// The bottom of the unscheduled zone.
464 IntervalPressure BotPressure;
465 RegPressureTracker BotRPTracker;
466
467public:
468 ScheduleDAGMILive(MachineSchedContext *C,
469 std::unique_ptr<MachineSchedStrategy> S)
470 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
471 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure),
472 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
473
474 ~ScheduleDAGMILive() override;
475
476 /// Return true if this DAG supports VReg liveness and RegPressure.
477 bool hasVRegLiveness() const override { return true; }
478
479 /// Return true if register pressure tracking is enabled.
480 bool isTrackingPressure() const { return ShouldTrackPressure; }
481
482 /// Get current register pressure for the top scheduled instructions.
483 const IntervalPressure &getTopPressure() const { return TopPressure; }
484 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
485
486 /// Get current register pressure for the bottom scheduled instructions.
487 const IntervalPressure &getBotPressure() const { return BotPressure; }
488 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
489
490 /// Get register pressure for the entire scheduling region before scheduling.
491 const IntervalPressure &getRegPressure() const { return RegPressure; }
492
493 const std::vector<PressureChange> &getRegionCriticalPSets() const {
494 return RegionCriticalPSets;
495 }
496
497 PressureDiff &getPressureDiff(const SUnit *SU) {
498 return SUPressureDiffs[SU->NodeNum];
499 }
500 const PressureDiff &getPressureDiff(const SUnit *SU) const {
501 return SUPressureDiffs[SU->NodeNum];
502 }
503
504 /// Compute a DFSResult after DAG building is complete, and before any
505 /// queue comparisons.
506 void computeDFSResult();
507
508 /// Return a non-null DFS result if the scheduling strategy initialized it.
509 const SchedDFSResult *getDFSResult() const { return DFSResult; }
510
511 BitVector &getScheduledTrees() { return ScheduledTrees; }
512
513 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
514 /// region. This covers all instructions in a block, while schedule() may only
515 /// cover a subset.
516 void enterRegion(MachineBasicBlock *bb,
517 MachineBasicBlock::iterator begin,
518 MachineBasicBlock::iterator end,
519 unsigned regioninstrs) override;
520
521 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
522 /// reorderable instructions.
523 void schedule() override;
524
525 /// Compute the cyclic critical path through the DAG.
526 unsigned computeCyclicCriticalPath();
527
528 void dump() const override;
529
530protected:
531 // Top-Level entry points for the schedule() driver...
532
533 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
534 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG
535 /// region, TopTracker and BottomTracker will be initialized to the top and
536 /// bottom of the DAG region without covereing any unscheduled instruction.
537 void buildDAGWithRegPressure();
538
539 /// Release ExitSU predecessors and setup scheduler queues. Re-position
540 /// the Top RP tracker in case the region beginning has changed.
541 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
542
543 /// Move an instruction and update register pressure.
544 void scheduleMI(SUnit *SU, bool IsTopNode);
545
546 // Lesser helpers...
547
548 void initRegPressure();
549
550 void updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses);
551
552 void updateScheduledPressure(const SUnit *SU,
553 const std::vector<unsigned> &NewMaxPressure);
554
555 void collectVRegUses(SUnit &SU);
556};
557
558//===----------------------------------------------------------------------===//
559///
560/// Helpers for implementing custom MachineSchedStrategy classes. These take
561/// care of the book-keeping associated with list scheduling heuristics.
562///
563//===----------------------------------------------------------------------===//
564
565/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
566/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
567/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
568///
569/// This is a convenience class that may be used by implementations of
570/// MachineSchedStrategy.
571class ReadyQueue {
572 unsigned ID;
573 std::string Name;
574 std::vector<SUnit*> Queue;
575
576public:
577 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
578
579 unsigned getID() const { return ID; }
580
581 StringRef getName() const { return Name; }
582
583 // SU is in this queue if it's NodeQueueID is a superset of this ID.
584 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
585
586 bool empty() const { return Queue.empty(); }
587
588 void clear() { Queue.clear(); }
589
590 unsigned size() const { return Queue.size(); }
591
592 using iterator = std::vector<SUnit*>::iterator;
593
594 iterator begin() { return Queue.begin(); }
595
596 iterator end() { return Queue.end(); }
597
598 ArrayRef<SUnit*> elements() { return Queue; }
599
600 iterator find(SUnit *SU) { return llvm::find(Range&: Queue, Val: SU); }
601
602 void push(SUnit *SU) {
603 Queue.push_back(x: SU);
604 SU->NodeQueueId |= ID;
605 }
606
607 iterator remove(iterator I) {
608 (*I)->NodeQueueId &= ~ID;
609 *I = Queue.back();
610 unsigned idx = I - Queue.begin();
611 Queue.pop_back();
612 return Queue.begin() + idx;
613 }
614
615 LLVM_ABI void dump() const;
616};
617
618/// Summarize the unscheduled region.
619struct SchedRemainder {
620 // Critical path through the DAG in expected latency.
621 unsigned CriticalPath;
622 unsigned CyclicCritPath;
623
624 // Scaled count of micro-ops left to schedule.
625 unsigned RemIssueCount;
626
627 bool IsAcyclicLatencyLimited;
628
629 // Unscheduled resources
630 SmallVector<unsigned, 16> RemainingCounts;
631
632 SchedRemainder() { reset(); }
633
634 void reset() {
635 CriticalPath = 0;
636 CyclicCritPath = 0;
637 RemIssueCount = 0;
638 IsAcyclicLatencyLimited = false;
639 RemainingCounts.clear();
640 }
641
642 LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
643};
644
645/// ResourceSegments are a collection of intervals closed on the
646/// left and opened on the right:
647///
648/// list{ [a1, b1), [a2, b2), ..., [a_N, b_N) }
649///
650/// The collection has the following properties:
651///
652/// 1. The list is ordered: a_i < b_i and b_i < a_(i+1)
653///
654/// 2. The intervals in the collection do not intersect each other.
655///
656/// A \ref ResourceSegments instance represents the cycle
657/// reservation history of the instance of and individual resource.
658class ResourceSegments {
659public:
660 /// Represents an interval of discrete integer values closed on
661 /// the left and open on the right: [a, b).
662 typedef std::pair<int64_t, int64_t> IntervalTy;
663
664 /// Adds an interval [a, b) to the collection of the instance.
665 ///
666 /// When adding [a, b[ to the collection, the operation merges the
667 /// adjacent intervals. For example
668 ///
669 /// 0 1 2 3 4 5 6 7 8 9 10
670 /// [-----) [--) [--)
671 /// + [--)
672 /// = [-----------) [--)
673 ///
674 /// To be able to debug duplicate resource usage, the function has
675 /// assertion that checks that no interval should be added if it
676 /// overlaps any of the intervals in the collection. We can
677 /// require this because by definition a \ref ResourceSegments is
678 /// attached only to an individual resource instance.
679 LLVM_ABI void add(IntervalTy A, const unsigned CutOff = 10);
680
681public:
682 /// Checks whether intervals intersect.
683 LLVM_ABI static bool intersects(IntervalTy A, IntervalTy B);
684
685 /// These function return the interval used by a resource in bottom and top
686 /// scheduling.
687 ///
688 /// Consider an instruction that uses resources X0, X1 and X2 as follows:
689 ///
690 /// X0 X1 X1 X2 +--------+-------------+--------------+
691 /// |Resource|AcquireAtCycle|ReleaseAtCycle|
692 /// +--------+-------------+--------------+
693 /// | X0 | 0 | 1 |
694 /// +--------+-------------+--------------+
695 /// | X1 | 1 | 3 |
696 /// +--------+-------------+--------------+
697 /// | X2 | 3 | 4 |
698 /// +--------+-------------+--------------+
699 ///
700 /// If we can schedule the instruction at cycle C, we need to
701 /// compute the interval of the resource as follows:
702 ///
703 /// # TOP DOWN SCHEDULING
704 ///
705 /// Cycles scheduling flows to the _right_, in the same direction
706 /// of time.
707 ///
708 /// C 1 2 3 4 5 ...
709 /// ------|------|------|------|------|------|----->
710 /// X0 X1 X1 X2 ---> direction of time
711 /// X0 [C, C+1)
712 /// X1 [C+1, C+3)
713 /// X2 [C+3, C+4)
714 ///
715 /// Therefore, the formula to compute the interval for a resource
716 /// of an instruction that can be scheduled at cycle C in top-down
717 /// scheduling is:
718 ///
719 /// [C+AcquireAtCycle, C+ReleaseAtCycle)
720 ///
721 ///
722 /// # BOTTOM UP SCHEDULING
723 ///
724 /// Cycles scheduling flows to the _left_, in opposite direction
725 /// of time.
726 ///
727 /// In bottom up scheduling, the scheduling happens in opposite
728 /// direction to the execution of the cycles of the
729 /// instruction. When the instruction is scheduled at cycle `C`,
730 /// the resources are allocated in the past relative to `C`:
731 ///
732 /// 2 1 C -1 -2 -3 -4 -5 ...
733 /// <-----|------|------|------|------|------|------|------|---
734 /// X0 X1 X1 X2 ---> direction of time
735 /// X0 (C+1, C]
736 /// X1 (C, C-2]
737 /// X2 (C-2, C-3]
738 ///
739 /// Therefore, the formula to compute the interval for a resource
740 /// of an instruction that can be scheduled at cycle C in bottom-up
741 /// scheduling is:
742 ///
743 /// [C-ReleaseAtCycle+1, C-AcquireAtCycle+1)
744 ///
745 ///
746 /// NOTE: In both cases, the number of cycles booked by a
747 /// resources is the value (ReleaseAtCycle - AcquireAtCycle).
748 static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle,
749 unsigned ReleaseAtCycle) {
750 return std::make_pair<long, long>(x: (long)C - (long)ReleaseAtCycle + 1L,
751 y: (long)C - (long)AcquireAtCycle + 1L);
752 }
753 static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle,
754 unsigned ReleaseAtCycle) {
755 return std::make_pair<long, long>(x: (long)C + (long)AcquireAtCycle,
756 y: (long)C + (long)ReleaseAtCycle);
757 }
758
759private:
760 /// Finds the first cycle in which a resource can be allocated.
761 ///
762 /// The function uses the \param IntervalBuider [*] to build a
763 /// resource interval [a, b[ out of the input parameters \param
764 /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle.
765 ///
766 /// The function then loops through the intervals in the ResourceSegments
767 /// and shifts the interval [a, b[ and the ReturnCycle to the
768 /// right until there is no intersection between the intervals of
769 /// the \ref ResourceSegments instance and the new shifted [a, b[. When
770 /// this condition is met, the ReturnCycle (which
771 /// correspond to the cycle in which the resource can be
772 /// allocated) is returned.
773 ///
774 /// c = CurrCycle in input
775 /// c 1 2 3 4 5 6 7 8 9 10 ... ---> (time
776 /// flow)
777 /// ResourceSegments... [---) [-------) [-----------)
778 /// c [1 3[ -> AcquireAtCycle=1, ReleaseAtCycle=3
779 /// ++c [1 3)
780 /// ++c [1 3)
781 /// ++c [1 3)
782 /// ++c [1 3)
783 /// ++c [1 3) ---> returns c
784 /// incremented by 5 (c+5)
785 ///
786 ///
787 /// Notice that for bottom-up scheduling the diagram is slightly
788 /// different because the current cycle c is always on the right
789 /// of the interval [a, b) (see \ref
790 /// `getResourceIntervalBottom`). This is because the cycle
791 /// increments for bottom-up scheduling moved in the direction
792 /// opposite to the direction of time:
793 ///
794 /// --------> direction of time.
795 /// XXYZZZ (resource usage)
796 /// --------> direction of top-down execution cycles.
797 /// <-------- direction of bottom-up execution cycles.
798 ///
799 /// Even though bottom-up scheduling moves against the flow of
800 /// time, the algorithm used to find the first free slot in between
801 /// intervals is the same as for top-down scheduling.
802 ///
803 /// [*] See \ref `getResourceIntervalTop` and
804 /// \ref `getResourceIntervalBottom` to see how such resource intervals
805 /// are built.
806 LLVM_ABI unsigned getFirstAvailableAt(
807 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
808 std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder)
809 const;
810
811public:
812 /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop
813 /// should be merged in a single function in which a function that
814 /// creates the `NewInterval` is passed as a parameter.
815 unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle,
816 unsigned AcquireAtCycle,
817 unsigned ReleaseAtCycle) const {
818 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
819 IntervalBuilder: getResourceIntervalBottom);
820 }
821 unsigned getFirstAvailableAtFromTop(unsigned CurrCycle,
822 unsigned AcquireAtCycle,
823 unsigned ReleaseAtCycle) const {
824 return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle,
825 IntervalBuilder: getResourceIntervalTop);
826 }
827
828private:
829 std::list<IntervalTy> _Intervals;
830 /// Merge all adjacent intervals in the collection. For all pairs
831 /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c).
832 ///
833 /// Before performing the merge operation, the intervals are
834 /// sorted with \ref sort_predicate.
835 LLVM_ABI void sortAndMerge();
836
837public:
838 // constructor for empty set
839 explicit ResourceSegments() = default;
840 bool empty() const { return _Intervals.empty(); }
841 explicit ResourceSegments(const std::list<IntervalTy> &Intervals)
842 : _Intervals(Intervals) {
843 sortAndMerge();
844 }
845
846 friend bool operator==(const ResourceSegments &c1,
847 const ResourceSegments &c2) {
848 return c1._Intervals == c2._Intervals;
849 }
850 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
851 const ResourceSegments &Segments) {
852 os << "{ ";
853 for (auto p : Segments._Intervals)
854 os << "[" << p.first << ", " << p.second << "), ";
855 os << "}\n";
856 return os;
857 }
858};
859
860/// Each Scheduling boundary is associated with ready queues. It tracks the
861/// current cycle in the direction of movement, and maintains the state
862/// of "hazards" and other interlocks at the current cycle.
863class SchedBoundary {
864public:
865 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
866 enum {
867 TopQID = 1,
868 BotQID = 2,
869 LogMaxQID = 2
870 };
871
872 ScheduleDAGMI *DAG = nullptr;
873 const TargetSchedModel *SchedModel = nullptr;
874 SchedRemainder *Rem = nullptr;
875
876 ReadyQueue Available;
877 ReadyQueue Pending;
878
879 ScheduleHazardRecognizer *HazardRec = nullptr;
880
881private:
882 /// True if the pending Q should be checked/updated before scheduling another
883 /// instruction.
884 bool CheckPending;
885
886 /// Number of cycles it takes to issue the instructions scheduled in this
887 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
888 /// See getStalls().
889 unsigned CurrCycle;
890
891 /// Micro-ops issued in the current cycle
892 unsigned CurrMOps;
893
894 /// MinReadyCycle - Cycle of the soonest available instruction.
895 unsigned MinReadyCycle;
896
897 // The expected latency of the critical path in this scheduled zone.
898 unsigned ExpectedLatency;
899
900 // The latency of dependence chains leading into this zone.
901 // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
902 // For each cycle scheduled: DLat -= 1.
903 unsigned DependentLatency;
904
905 /// Count the scheduled (issued) micro-ops that can be retired by
906 /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
907 unsigned RetiredMOps;
908
909 // Count scheduled resources that have been executed. Resources are
910 // considered executed if they become ready in the time that it takes to
911 // saturate any resource including the one in question. Counts are scaled
912 // for direct comparison with other resources. Counts can be compared with
913 // MOps * getMicroOpFactor and Latency * getLatencyFactor.
914 SmallVector<unsigned, 16> ExecutedResCounts;
915
916 /// Cache the max count for a single resource.
917 unsigned MaxExecutedResCount;
918
919 // Cache the critical resources ID in this scheduled zone.
920 unsigned ZoneCritResIdx;
921
922 // Is the scheduled region resource limited vs. latency limited.
923 bool IsResourceLimited;
924
925public:
926private:
927 /// Record how resources have been allocated across the cycles of
928 /// the execution.
929 std::map<unsigned, ResourceSegments> ReservedResourceSegments;
930 std::vector<unsigned> ReservedCycles;
931 /// For each PIdx, stores first index into ReservedResourceSegments that
932 /// corresponds to it.
933 ///
934 /// For example, consider the following 3 resources (ResourceCount =
935 /// 3):
936 ///
937 /// +------------+--------+
938 /// |ResourceName|NumUnits|
939 /// +------------+--------+
940 /// | X | 2 |
941 /// +------------+--------+
942 /// | Y | 3 |
943 /// +------------+--------+
944 /// | Z | 1 |
945 /// +------------+--------+
946 ///
947 /// In this case, the total number of resource instances is 6. The
948 /// vector \ref ReservedResourceSegments will have a slot for each instance.
949 /// The vector \ref ReservedCyclesIndex will track at what index the first
950 /// instance of the resource is found in the vector of \ref
951 /// ReservedResourceSegments:
952 ///
953 /// Indexes of instances in
954 /// ReservedResourceSegments
955 ///
956 /// 0 1 2 3 4 5
957 /// ReservedCyclesIndex[0] = 0; [X0, X1,
958 /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2
959 /// ReservedCyclesIndex[2] = 5; Z
960 SmallVector<unsigned, 16> ReservedCyclesIndex;
961
962 // For each PIdx, stores the resource group IDs of its subunits
963 SmallVector<APInt, 16> ResourceGroupSubUnitMasks;
964
965#if LLVM_ENABLE_ABI_BREAKING_CHECKS
966 // Remember the greatest possible stall as an upper bound on the number of
967 // times we should retry the pending queue because of a hazard.
968 unsigned MaxObservedStall;
969#endif
970
971public:
972 /// Pending queues extend the ready queues with the same ID and the
973 /// PendingFlag set.
974 SchedBoundary(unsigned ID, const Twine &Name):
975 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") {
976 reset();
977 }
978 SchedBoundary &operator=(const SchedBoundary &other) = delete;
979 SchedBoundary(const SchedBoundary &other) = delete;
980 LLVM_ABI ~SchedBoundary();
981
982 LLVM_ABI void reset();
983
984 LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
985 SchedRemainder *rem);
986
987 bool isTop() const {
988 return Available.getID() == TopQID;
989 }
990
991 /// Number of cycles to issue the instructions scheduled in this zone.
992 unsigned getCurrCycle() const { return CurrCycle; }
993
994 /// Micro-ops issued in the current cycle
995 unsigned getCurrMOps() const { return CurrMOps; }
996
997 // The latency of dependence chains leading into this zone.
998 unsigned getDependentLatency() const { return DependentLatency; }
999
1000 /// Get the number of latency cycles "covered" by the scheduled
1001 /// instructions. This is the larger of the critical path within the zone
1002 /// and the number of cycles required to issue the instructions.
1003 unsigned getScheduledLatency() const {
1004 return std::max(a: ExpectedLatency, b: CurrCycle);
1005 }
1006
1007 unsigned getUnscheduledLatency(SUnit *SU) const {
1008 return isTop() ? SU->getHeight() : SU->getDepth();
1009 }
1010
1011 unsigned getResourceCount(unsigned ResIdx) const {
1012 return ExecutedResCounts[ResIdx];
1013 }
1014
1015 /// Get the scaled count of scheduled micro-ops and resources, including
1016 /// executed resources.
1017 unsigned getCriticalCount() const {
1018 if (!ZoneCritResIdx)
1019 return RetiredMOps * SchedModel->getMicroOpFactor();
1020 return getResourceCount(ResIdx: ZoneCritResIdx);
1021 }
1022
1023 /// Get a scaled count for the minimum execution time of the scheduled
1024 /// micro-ops that are ready to execute by getExecutedCount. Notice the
1025 /// feedback loop.
1026 unsigned getExecutedCount() const {
1027 return std::max(a: CurrCycle * SchedModel->getLatencyFactor(),
1028 b: MaxExecutedResCount);
1029 }
1030
1031 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
1032
1033 // Is the scheduled region resource limited vs. latency limited.
1034 bool isResourceLimited() const { return IsResourceLimited; }
1035
1036 /// Get the difference between the given SUnit's ready time and the current
1037 /// cycle.
1038 LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU);
1039
1040 LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
1041 unsigned ReleaseAtCycle,
1042 unsigned AcquireAtCycle);
1043
1044 LLVM_ABI std::pair<unsigned, unsigned>
1045 getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
1046 unsigned ReleaseAtCycle, unsigned AcquireAtCycle);
1047
1048 bool isReservedGroup(unsigned PIdx) const {
1049 return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin &&
1050 !SchedModel->getProcResource(PIdx)->BufferSize;
1051 }
1052
1053 LLVM_ABI bool checkHazard(SUnit *SU);
1054
1055 LLVM_ABI unsigned findMaxLatency(ArrayRef<SUnit *> ReadySUs);
1056
1057 LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx);
1058
1059 /// Release SU to make it ready. If it's not in hazard, remove it from
1060 /// pending queue (if already in) and push into available queue.
1061 /// Otherwise, push the SU into pending queue.
1062 ///
1063 /// @param SU The unit to be released.
1064 /// @param ReadyCycle Until which cycle the unit is ready.
1065 /// @param InPQueue Whether SU is already in pending queue.
1066 /// @param Idx Position offset in pending queue (if in it).
1067 LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
1068 unsigned Idx = 0);
1069
1070 LLVM_ABI void bumpCycle(unsigned NextCycle);
1071
1072 LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count);
1073
1074 LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx,
1075 unsigned Cycles, unsigned ReadyCycle,
1076 unsigned StartAtCycle);
1077
1078 LLVM_ABI void bumpNode(SUnit *SU);
1079
1080 LLVM_ABI void releasePending();
1081
1082 LLVM_ABI void removeReady(SUnit *SU);
1083
1084 /// Call this before applying any other heuristics to the Available queue.
1085 /// Updates the Available/Pending Q's if necessary and returns the single
1086 /// available instruction, or NULL if there are multiple candidates.
1087 LLVM_ABI SUnit *pickOnlyChoice();
1088
1089 /// Dump the state of the information that tracks resource usage.
1090 LLVM_ABI void dumpReservedCycles() const;
1091 LLVM_ABI void dumpScheduledState() const;
1092};
1093
1094/// Base class for GenericScheduler. This class maintains information about
1095/// scheduling candidates based on TargetSchedModel making it easy to implement
1096/// heuristics for either preRA or postRA scheduling.
1097class GenericSchedulerBase : public MachineSchedStrategy {
1098public:
1099 /// Represent the type of SchedCandidate found within a single queue.
1100 /// pickNodeBidirectional depends on these listed by decreasing priority.
1101 enum CandReason : uint8_t {
1102 NoCand,
1103 Only1,
1104 PhysReg,
1105 RegExcess,
1106 RegCritical,
1107 Stall,
1108 Cluster,
1109 Weak,
1110 RegMax,
1111 ResourceReduce,
1112 ResourceDemand,
1113 BotHeightReduce,
1114 BotPathReduce,
1115 TopDepthReduce,
1116 TopPathReduce,
1117 NodeOrder,
1118 FirstValid
1119 };
1120
1121#ifndef NDEBUG
1122 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
1123#endif
1124
1125 /// Policy for scheduling the next instruction in the candidate's zone.
1126 struct CandPolicy {
1127 bool ReduceLatency = false;
1128 unsigned ReduceResIdx = 0;
1129 unsigned DemandResIdx = 0;
1130
1131 CandPolicy() = default;
1132
1133 bool operator==(const CandPolicy &RHS) const {
1134 return ReduceLatency == RHS.ReduceLatency &&
1135 ReduceResIdx == RHS.ReduceResIdx &&
1136 DemandResIdx == RHS.DemandResIdx;
1137 }
1138 bool operator!=(const CandPolicy &RHS) const {
1139 return !(*this == RHS);
1140 }
1141 };
1142
1143 /// Status of an instruction's critical resource consumption.
1144 struct SchedResourceDelta {
1145 // Count critical resources in the scheduled region required by SU.
1146 unsigned CritResources = 0;
1147
1148 // Count critical resources from another region consumed by SU.
1149 unsigned DemandedResources = 0;
1150
1151 SchedResourceDelta() = default;
1152
1153 bool operator==(const SchedResourceDelta &RHS) const {
1154 return CritResources == RHS.CritResources
1155 && DemandedResources == RHS.DemandedResources;
1156 }
1157 bool operator!=(const SchedResourceDelta &RHS) const {
1158 return !operator==(RHS);
1159 }
1160 };
1161
1162 /// Store the state used by GenericScheduler heuristics, required for the
1163 /// lifetime of one invocation of pickNode().
1164 struct SchedCandidate {
1165 CandPolicy Policy;
1166
1167 // The best SUnit candidate.
1168 SUnit *SU;
1169
1170 // The reason for this candidate.
1171 CandReason Reason;
1172
1173 // Whether this candidate should be scheduled at top/bottom.
1174 bool AtTop;
1175
1176 // Register pressure values for the best candidate.
1177 RegPressureDelta RPDelta;
1178
1179 // Critical resource consumption of the best candidate.
1180 SchedResourceDelta ResDelta;
1181
1182 SchedCandidate() { reset(NewPolicy: CandPolicy()); }
1183 SchedCandidate(const CandPolicy &Policy) { reset(NewPolicy: Policy); }
1184
1185 void reset(const CandPolicy &NewPolicy) {
1186 Policy = NewPolicy;
1187 SU = nullptr;
1188 Reason = NoCand;
1189 AtTop = false;
1190 RPDelta = RegPressureDelta();
1191 ResDelta = SchedResourceDelta();
1192 }
1193
1194 bool isValid() const { return SU; }
1195
1196 // Copy the status of another candidate without changing policy.
1197 void setBest(SchedCandidate &Best) {
1198 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1199 SU = Best.SU;
1200 Reason = Best.Reason;
1201 AtTop = Best.AtTop;
1202 RPDelta = Best.RPDelta;
1203 ResDelta = Best.ResDelta;
1204 }
1205
1206 LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG,
1207 const TargetSchedModel *SchedModel);
1208 };
1209
1210protected:
1211 const MachineSchedContext *Context;
1212 const TargetSchedModel *SchedModel = nullptr;
1213 const TargetRegisterInfo *TRI = nullptr;
1214 unsigned TopIdx = 0;
1215 unsigned BotIdx = 0;
1216 unsigned NumRegionInstrs = 0;
1217
1218 MachineSchedPolicy RegionPolicy;
1219
1220 SchedRemainder Rem;
1221
1222 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {}
1223
1224 LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA,
1225 SchedBoundary &CurrZone, SchedBoundary *OtherZone);
1226
1227 MachineSchedPolicy getPolicy() const override { return RegionPolicy; }
1228
1229#ifndef NDEBUG
1230 void traceCandidate(const SchedCandidate &Cand);
1231#endif
1232
1233private:
1234 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
1235 bool ComputeRemLatency, unsigned &RemLatency) const;
1236};
1237
1238// Utility functions used by heuristics in tryCandidate().
1239LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone);
1240LLVM_ABI bool tryLess(int TryVal, int CandVal,
1241 GenericSchedulerBase::SchedCandidate &TryCand,
1242 GenericSchedulerBase::SchedCandidate &Cand,
1243 GenericSchedulerBase::CandReason Reason);
1244LLVM_ABI bool tryGreater(int TryVal, int CandVal,
1245 GenericSchedulerBase::SchedCandidate &TryCand,
1246 GenericSchedulerBase::SchedCandidate &Cand,
1247 GenericSchedulerBase::CandReason Reason);
1248LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
1249 GenericSchedulerBase::SchedCandidate &Cand,
1250 SchedBoundary &Zone);
1251LLVM_ABI bool tryPressure(const PressureChange &TryP,
1252 const PressureChange &CandP,
1253 GenericSchedulerBase::SchedCandidate &TryCand,
1254 GenericSchedulerBase::SchedCandidate &Cand,
1255 GenericSchedulerBase::CandReason Reason,
1256 const TargetRegisterInfo *TRI,
1257 const MachineFunction &MF);
1258LLVM_ABI bool tryBiasPhysRegs(GenericSchedulerBase::SchedCandidate &TryCand,
1259 GenericSchedulerBase::SchedCandidate &Cand,
1260 SchedBoundary *Zone, bool BiasPRegsExtra);
1261LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop);
1262LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop,
1263 bool BiasPRegsExtra = false);
1264
1265/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
1266/// the schedule.
1267class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
1268public:
1269 GenericScheduler(const MachineSchedContext *C):
1270 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1271 Bot(SchedBoundary::BotQID, "BotQ") {}
1272
1273 void initPolicy(MachineBasicBlock::iterator Begin,
1274 MachineBasicBlock::iterator End,
1275 unsigned NumRegionInstrs) override;
1276
1277 void dumpPolicy() const override;
1278
1279 bool shouldTrackPressure() const override {
1280 return RegionPolicy.ShouldTrackPressure;
1281 }
1282
1283 bool shouldTrackLaneMasks() const override {
1284 return RegionPolicy.ShouldTrackLaneMasks;
1285 }
1286
1287 void initialize(ScheduleDAGMI *dag) override;
1288
1289 SUnit *pickNode(bool &IsTopNode) override;
1290
1291 void schedNode(SUnit *SU, bool IsTopNode) override;
1292
1293 void releaseTopNode(SUnit *SU) override {
1294 if (SU->isScheduled)
1295 return;
1296
1297 Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle, InPQueue: false);
1298 TopCand.SU = nullptr;
1299 }
1300
1301 void releaseBottomNode(SUnit *SU) override {
1302 if (SU->isScheduled)
1303 return;
1304
1305 Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle, InPQueue: false);
1306 BotCand.SU = nullptr;
1307 }
1308
1309 void registerRoots() override;
1310
1311protected:
1312 ScheduleDAGMILive *DAG = nullptr;
1313
1314 // State of the top and bottom scheduled instruction boundaries.
1315 SchedBoundary Top;
1316 SchedBoundary Bot;
1317
1318 unsigned TopClusterID;
1319 unsigned BotClusterID;
1320
1321 /// Candidate last picked from Top boundary.
1322 SchedCandidate TopCand;
1323 /// Candidate last picked from Bot boundary.
1324 SchedCandidate BotCand;
1325
1326 void checkAcyclicLatency();
1327
1328 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
1329 const RegPressureTracker &RPTracker,
1330 RegPressureTracker &TempTracker);
1331
1332 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
1333 SchedBoundary *Zone) const;
1334
1335 SUnit *pickNodeBidirectional(bool &IsTopNode);
1336
1337 void pickNodeFromQueue(SchedBoundary &Zone,
1338 const CandPolicy &ZonePolicy,
1339 const RegPressureTracker &RPTracker,
1340 SchedCandidate &Candidate);
1341
1342 void reschedulePhysReg(SUnit *SU, bool isTop);
1343};
1344
1345/// PostGenericScheduler - Interface to the scheduling algorithm used by
1346/// ScheduleDAGMI.
1347///
1348/// Callbacks from ScheduleDAGMI:
1349/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
1350class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
1351protected:
1352 ScheduleDAGMI *DAG = nullptr;
1353 SchedBoundary Top;
1354 SchedBoundary Bot;
1355
1356 /// Candidate last picked from Top boundary.
1357 SchedCandidate TopCand;
1358 /// Candidate last picked from Bot boundary.
1359 SchedCandidate BotCand;
1360
1361 unsigned TopClusterID;
1362 unsigned BotClusterID;
1363
1364public:
1365 PostGenericScheduler(const MachineSchedContext *C)
1366 : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
1367 Bot(SchedBoundary::BotQID, "BotQ") {}
1368
1369 ~PostGenericScheduler() override = default;
1370
1371 void initPolicy(MachineBasicBlock::iterator Begin,
1372 MachineBasicBlock::iterator End,
1373 unsigned NumRegionInstrs) override;
1374
1375 /// PostRA scheduling does not track pressure.
1376 bool shouldTrackPressure() const override { return false; }
1377
1378 void initialize(ScheduleDAGMI *Dag) override;
1379
1380 void registerRoots() override;
1381
1382 SUnit *pickNode(bool &IsTopNode) override;
1383
1384 SUnit *pickNodeBidirectional(bool &IsTopNode);
1385
1386 void scheduleTree(unsigned SubtreeID) override {
1387 llvm_unreachable("PostRA scheduler does not support subtree analysis.");
1388 }
1389
1390 void schedNode(SUnit *SU, bool IsTopNode) override;
1391
1392 void releaseTopNode(SUnit *SU) override {
1393 if (SU->isScheduled)
1394 return;
1395 Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle, InPQueue: false);
1396 TopCand.SU = nullptr;
1397 }
1398
1399 void releaseBottomNode(SUnit *SU) override {
1400 if (SU->isScheduled)
1401 return;
1402 Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle, InPQueue: false);
1403 BotCand.SU = nullptr;
1404 }
1405
1406protected:
1407 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
1408
1409 void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
1410};
1411
1412/// If ReorderWhileClustering is set to true, no attempt will be made to
1413/// reduce reordering due to store clustering.
1414LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1415createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1416 const TargetRegisterInfo *TRI,
1417 bool ReorderWhileClustering = false);
1418
1419/// If ReorderWhileClustering is set to true, no attempt will be made to
1420/// reduce reordering due to store clustering.
1421LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1422createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1423 const TargetRegisterInfo *TRI,
1424 bool ReorderWhileClustering = false);
1425
1426LLVM_ABI std::unique_ptr<ScheduleDAGMutation>
1427createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1428 const TargetRegisterInfo *TRI);
1429
1430/// Create the standard converging machine scheduler. This will be used as the
1431/// default scheduler if the target does not set a default.
1432/// Adds default DAG mutations.
1433template <typename Strategy = GenericScheduler>
1434ScheduleDAGMILive *createSchedLive(MachineSchedContext *C) {
1435 ScheduleDAGMILive *DAG =
1436 new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
1437 // Register DAG post-processors.
1438 //
1439 // FIXME: extend the mutation API to allow earlier mutations to instantiate
1440 // data and pass it to later mutations. Have a single mutation that gathers
1441 // the interesting nodes in one pass.
1442 DAG->addMutation(Mutation: createCopyConstrainDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
1443 return DAG;
1444}
1445
1446/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
1447template <typename Strategy = PostGenericScheduler>
1448ScheduleDAGMI *createSchedPostRA(MachineSchedContext *C) {
1449 return new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
1450 /*RemoveKillFlags=*/true);
1451}
1452
1453class MachineSchedulerPass
1454 : public OptionalPassInfoMixin<MachineSchedulerPass> {
1455 // FIXME: Remove this member once RegisterClassInfo is queryable as an
1456 // analysis.
1457 std::unique_ptr<impl_detail::MachineSchedulerImpl> Impl;
1458 const TargetMachine *TM;
1459
1460public:
1461 LLVM_ABI MachineSchedulerPass(const TargetMachine *TM);
1462 LLVM_ABI MachineSchedulerPass(MachineSchedulerPass &&Other);
1463 LLVM_ABI ~MachineSchedulerPass();
1464 LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1465 MachineFunctionAnalysisManager &MFAM);
1466};
1467
1468class PostMachineSchedulerPass
1469 : public OptionalPassInfoMixin<PostMachineSchedulerPass> {
1470 // FIXME: Remove this member once RegisterClassInfo is queryable as an
1471 // analysis.
1472 std::unique_ptr<impl_detail::PostMachineSchedulerImpl> Impl;
1473 const TargetMachine *TM;
1474
1475public:
1476 LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM);
1477 LLVM_ABI PostMachineSchedulerPass(PostMachineSchedulerPass &&Other);
1478 LLVM_ABI ~PostMachineSchedulerPass();
1479 LLVM_ABI PreservedAnalyses run(MachineFunction &MF,
1480 MachineFunctionAnalysisManager &MFAM);
1481};
1482} // end namespace llvm
1483
1484#endif // LLVM_CODEGEN_MACHINESCHEDULER_H
1485