1//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/CodeGen/RegisterPressure.h"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include <cstdint>
21#include <set>
22#include <vector>
23
24namespace llvm {
25
26class SIInstrInfo;
27class SIRegisterInfo;
28class SIScheduleDAGMI;
29class SIScheduleBlockCreator;
30
31enum SIScheduleCandReason {
32 NoCand,
33 RegUsage,
34 Latency,
35 Successor,
36 Depth,
37 NodeOrder
38};
39
40struct SISchedulerCandidate {
41 // The reason for this candidate.
42 SIScheduleCandReason Reason = NoCand;
43
44 // Set of reasons that apply to multiple candidates.
45 uint32_t RepeatReasonSet = 0;
46
47 SISchedulerCandidate() = default;
48
49 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
51};
52
53enum SIScheduleBlockLinkKind {
54 NoData,
55 Data
56};
57
58class SIScheduleBlock {
59 SIScheduleDAGMI *DAG;
60 SIScheduleBlockCreator *BC;
61
62 std::vector<SUnit*> SUnits;
63 std::map<unsigned, unsigned> NodeNum2Index;
64 std::vector<SUnit*> TopReadySUs;
65 std::vector<SUnit*> ScheduledSUnits;
66
67 /// The top of the unscheduled zone.
68 IntervalPressure TopPressure;
69 RegPressureTracker TopRPTracker;
70
71 // Pressure: number of said class of registers needed to
72 // store the live virtual and real registers.
73 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74 // Pressure of additional registers required inside the block.
75 std::vector<unsigned> InternalAdditionalPressure;
76 // Pressure of input and output registers
77 std::vector<unsigned> LiveInPressure;
78 std::vector<unsigned> LiveOutPressure;
79 // Registers required by the block, and outputs.
80 // We do track only virtual registers.
81 // Note that some registers are not 32 bits,
82 // and thus the pressure is not equal
83 // to the number of live registers.
84 std::set<unsigned> LiveInRegs;
85 std::set<unsigned> LiveOutRegs;
86
87 bool Scheduled = false;
88 bool HighLatencyBlock = false;
89
90 std::vector<unsigned> HasLowLatencyNonWaitedParent;
91
92 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93 unsigned ID;
94
95 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
96 // All blocks successors, and the kind of link
97 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
98 unsigned NumHighLatencySuccessors = 0;
99
100public:
101 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
102 unsigned ID):
103 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
104
105 ~SIScheduleBlock() = default;
106
107 unsigned getID() const { return ID; }
108
109 /// Functions for Block construction.
110 void addUnit(SUnit *SU);
111
112 // When all SUs have been added.
113 void finalizeUnits();
114
115 // Add block pred, which has instruction predecessor of SU.
116 void addPred(SIScheduleBlock *Pred);
117 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
118
119 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
120 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
121 getSuccs() const { return Succs; }
122
123 unsigned Height; // Maximum topdown path length to block without outputs
124 unsigned Depth; // Maximum bottomup path length to block without inputs
125
126 unsigned getNumHighLatencySuccessors() const {
127 return NumHighLatencySuccessors;
128 }
129
130 bool isHighLatencyBlock() { return HighLatencyBlock; }
131
132 // This is approximative.
133 // Ideally should take into accounts some instructions (rcp, etc)
134 // are 4 times slower.
135 int getCost() { return SUnits.size(); }
136
137 // The block Predecessors and Successors must be all registered
138 // before fastSchedule().
139 // Fast schedule with no particular requirement.
140 void fastSchedule();
141
142 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
143
144 // Complete schedule that will try to minimize reg pressure and
145 // low latencies, and will fill liveins and liveouts.
146 // Needs all MIs to be grouped between BeginBlock and EndBlock.
147 // The MIs can be moved after the scheduling,
148 // it is just used to allow correct track of live registers.
149 void schedule(MachineBasicBlock::iterator BeginBlock,
150 MachineBasicBlock::iterator EndBlock);
151
152 bool isScheduled() { return Scheduled; }
153
154 // Needs the block to be scheduled inside
155 // TODO: find a way to compute it.
156 std::vector<unsigned> &getInternalAdditionalRegUsage() {
157 return InternalAdditionalPressure;
158 }
159
160 std::set<unsigned> &getInRegs() { return LiveInRegs; }
161 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
162
163 void printDebug(bool Full);
164
165private:
166 struct SISchedCandidate : SISchedulerCandidate {
167 // The best SUnit candidate.
168 SUnit *SU = nullptr;
169
170 unsigned SGPRUsage;
171 unsigned VGPRUsage;
172 bool IsLowLatency;
173 unsigned LowLatencyOffset;
174 bool HasLowLatencyNonWaitedParent;
175
176 SISchedCandidate() = default;
177
178 bool isValid() const { return SU; }
179
180 // Copy the status of another candidate without changing policy.
181 void setBest(SISchedCandidate &Best) {
182 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
183 SU = Best.SU;
184 Reason = Best.Reason;
185 SGPRUsage = Best.SGPRUsage;
186 VGPRUsage = Best.VGPRUsage;
187 IsLowLatency = Best.IsLowLatency;
188 LowLatencyOffset = Best.LowLatencyOffset;
189 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
190 }
191 };
192
193 void undoSchedule();
194
195 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
196 void releaseSucc(SUnit *SU, SDep *SuccEdge);
197 // InOrOutBlock: restrict to links pointing inside the block (true),
198 // or restrict to links pointing outside the block (false).
199 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
200
201 void nodeScheduled(SUnit *SU);
202 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
204 SUnit* pickNode();
205 void traceCandidate(const SISchedCandidate &Cand);
206 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
207 MachineBasicBlock::iterator EndBlock);
208};
209
210struct SIScheduleBlocks {
211 std::vector<SIScheduleBlock*> Blocks;
212 std::vector<int> TopDownIndex2Block;
213 std::vector<int> TopDownBlock2Index;
214};
215
216enum SISchedulerBlockCreatorVariant {
217 LatenciesAlone,
218 LatenciesGrouped,
219 LatenciesAlonePlusConsecutive
220};
221
222class SIScheduleBlockCreator {
223 SIScheduleDAGMI *DAG;
224 // unique_ptr handles freeing memory for us.
225 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
226 std::map<SISchedulerBlockCreatorVariant,
227 SIScheduleBlocks> Blocks;
228 std::vector<SIScheduleBlock*> CurrentBlocks;
229 std::vector<int> Node2CurrentBlock;
230
231 // Topological sort
232 // Maps topological index to the node number.
233 std::vector<int> TopDownIndex2Block;
234 std::vector<int> TopDownBlock2Index;
235 std::vector<int> BottomUpIndex2Block;
236
237 // 0 -> Color not given.
238 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
239 // Above -> Other groups.
240 int NextReservedID;
241 int NextNonReservedID;
242 std::vector<int> CurrentColoring;
243 std::vector<int> CurrentTopDownReservedDependencyColoring;
244 std::vector<int> CurrentBottomUpReservedDependencyColoring;
245
246public:
247 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
248
249 SIScheduleBlocks
250 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
251
252 bool isSUInBlock(SUnit *SU, unsigned ID);
253
254private:
255 // Give a Reserved color to every high latency.
256 void colorHighLatenciesAlone();
257
258 // Create groups of high latencies with a Reserved color.
259 void colorHighLatenciesGroups();
260
261 // Compute coloring for topdown and bottom traversals with
262 // different colors depending on dependencies on Reserved colors.
263 void colorComputeReservedDependencies();
264
265 // Give color to all non-colored SUs according to Reserved groups dependencies.
266 void colorAccordingToReservedDependencies();
267
268 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
269 // The new colors are computed according to the dependencies on the other blocks
270 // formed with colorAccordingToReservedDependencies.
271 void colorEndsAccordingToDependencies();
272
273 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
274 void colorForceConsecutiveOrderInGroup();
275
276 // Merge Constant loads that have all their users into another group to the group.
277 // (TODO: else if all their users depend on the same group, put them there)
278 void colorMergeConstantLoadsNextGroup();
279
280 // Merge SUs that have all their users into another group to the group
281 void colorMergeIfPossibleNextGroup();
282
283 // Merge SUs that have all their users into another group to the group,
284 // but only for Reserved groups.
285 void colorMergeIfPossibleNextGroupOnlyForReserved();
286
287 // Merge SUs that have all their users into another group to the group,
288 // but only if the group is no more than a few SUs.
289 void colorMergeIfPossibleSmallGroupsToNextGroup();
290
291 // Divides Blocks with important size.
292 // Idea of implementation: attribute new colors depending on topdown and
293 // bottom up links to other blocks.
294 void cutHugeBlocks();
295
296 // Put in one group all instructions with no users in this scheduling region
297 // (we'd want these groups be at the end).
298 void regroupNoUserInstructions();
299
300 // Give Reserved color to export instructions
301 void colorExports();
302
303 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
304
305 void topologicalSort();
306
307 void scheduleInsideBlocks();
308
309 void fillStats();
310};
311
312enum SISchedulerBlockSchedulerVariant {
313 BlockLatencyRegUsage,
314 BlockRegUsageLatency,
315 BlockRegUsage
316};
317
318class SIScheduleBlockScheduler {
319 SIScheduleDAGMI *DAG;
320 SISchedulerBlockSchedulerVariant Variant;
321 std::vector<SIScheduleBlock*> Blocks;
322
323 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
324 std::set<unsigned> LiveRegs;
325 // Num of schedulable unscheduled blocks reading the register.
326 std::map<unsigned, unsigned> LiveRegsConsumers;
327
328 std::vector<unsigned> LastPosHighLatencyParentScheduled;
329 int LastPosWaitedHighLatency;
330
331 std::vector<SIScheduleBlock*> BlocksScheduled;
332 unsigned NumBlockScheduled;
333 std::vector<SIScheduleBlock*> ReadyBlocks;
334
335 unsigned VregCurrentUsage;
336 unsigned SregCurrentUsage;
337
338 // Currently is only approximation.
339 unsigned maxVregUsage;
340 unsigned maxSregUsage;
341
342 std::vector<unsigned> BlockNumPredsLeft;
343 std::vector<unsigned> BlockNumSuccsLeft;
344
345public:
346 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
347 SISchedulerBlockSchedulerVariant Variant,
348 SIScheduleBlocks BlocksStruct);
349 ~SIScheduleBlockScheduler() = default;
350
351 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
352
353 unsigned getVGPRUsage() { return maxVregUsage; }
354 unsigned getSGPRUsage() { return maxSregUsage; }
355
356private:
357 struct SIBlockSchedCandidate : SISchedulerCandidate {
358 // The best Block candidate.
359 SIScheduleBlock *Block = nullptr;
360
361 bool IsHighLatency;
362 int VGPRUsageDiff;
363 unsigned NumSuccessors;
364 unsigned NumHighLatencySuccessors;
365 unsigned LastPosHighLatParentScheduled;
366 unsigned Height;
367
368 SIBlockSchedCandidate() = default;
369
370 bool isValid() const { return Block; }
371
372 // Copy the status of another candidate without changing policy.
373 void setBest(SIBlockSchedCandidate &Best) {
374 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
375 Block = Best.Block;
376 Reason = Best.Reason;
377 IsHighLatency = Best.IsHighLatency;
378 VGPRUsageDiff = Best.VGPRUsageDiff;
379 NumSuccessors = Best.NumSuccessors;
380 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
381 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
382 Height = Best.Height;
383 }
384 };
385
386 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
387 SIBlockSchedCandidate &TryCand);
388 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
389 SIBlockSchedCandidate &TryCand);
390 SIScheduleBlock *pickBlock();
391
392 void addLiveRegs(std::set<unsigned> &Regs);
393 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
394 void releaseBlockSuccs(SIScheduleBlock *Parent);
395 void blockScheduled(SIScheduleBlock *Block);
396
397 // Check register pressure change
398 // by scheduling a block with these LiveIn and LiveOut.
399 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
400 std::set<unsigned> &OutRegs);
401
402 void schedule();
403};
404
405struct SIScheduleBlockResult {
406 std::vector<unsigned> SUs;
407 unsigned MaxSGPRUsage;
408 unsigned MaxVGPRUsage;
409};
410
411class SIScheduler {
412 SIScheduleDAGMI *DAG;
413 SIScheduleBlockCreator BlockCreator;
414
415public:
416 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
417
418 ~SIScheduler() = default;
419
420 struct SIScheduleBlockResult
421 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
422 SISchedulerBlockSchedulerVariant ScheduleVariant);
423};
424
425class SIScheduleDAGMI final : public ScheduleDAGMILive {
426 const SIInstrInfo *SITII;
427 const SIRegisterInfo *SITRI;
428
429 std::vector<SUnit> SUnitsLinksBackup;
430
431 // For moveLowLatencies. After all Scheduling variants are tested.
432 std::vector<unsigned> ScheduledSUnits;
433 std::vector<unsigned> ScheduledSUnitsInv;
434
435public:
436 SIScheduleDAGMI(MachineSchedContext *C);
437
438 ~SIScheduleDAGMI() override;
439
440 // Entry point for the schedule.
441 void schedule() override;
442
443 // To init Block's RPTracker.
444 void initRPTracker(RegPressureTracker &RPTracker) {
445 RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin, TrackLaneMasks: false, TrackUntiedDefs: false);
446 }
447
448 MachineBasicBlock *getBB() { return BB; }
449 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
450 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
451 LiveIntervals *getLIS() { return LIS; }
452 MachineRegisterInfo *getMRI() { return &MRI; }
453 const TargetRegisterInfo *getTRI() { return TRI; }
454 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
455 SUnit &getEntrySU() { return EntrySU; }
456 SUnit& getExitSU() { return ExitSU; }
457
458 void restoreSULinksLeft();
459
460 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
461 _Iterator End,
462 unsigned &VgprUsage,
463 unsigned &SgprUsage);
464
465 std::set<unsigned> getInRegs() {
466 std::set<unsigned> InRegs;
467 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
468 InRegs.insert(x: RegMaskPair.RegUnit);
469 }
470 return InRegs;
471 }
472
473 std::set<unsigned> getOutRegs() {
474 std::set<unsigned> OutRegs;
475 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
476 OutRegs.insert(x: RegMaskPair.RegUnit);
477 }
478 return OutRegs;
479 };
480
481private:
482 void topologicalSort();
483 // After scheduling is done, improve low latency placements.
484 void moveLowLatencies();
485
486public:
487 // Some stats for scheduling inside blocks.
488 std::vector<unsigned> IsLowLatencySU;
489 std::vector<unsigned> LowLatencyOffset;
490 std::vector<unsigned> IsHighLatencySU;
491 // Topological sort
492 // Maps topological index to the node number.
493 std::vector<int> TopDownIndex2SU;
494 std::vector<int> BottomUpIndex2SU;
495};
496
497} // end namespace llvm
498
499#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
500