1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SystemZMachineScheduler.h"
10#include "llvm/CodeGen/MachineLoopInfo.h"
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22static bool isPhysRegDef(const MachineOperand &MO) {
23 return isRegDef(MO) && MO.getReg().isPhysical();
24}
25
26void SystemZPreRASchedStrategy::initializeLatencyReduction() {
27 // Enable latency reduction for a region that has a considerable amount of
28 // data sequences that should be interlaved. These are SUs that only have
29 // one data predecessor / successor edge(s) to their adjacent instruction(s)
30 // in the input order. Disable if region has many SUs relative to the
31 // overall height.
32 unsigned DAGHeight = 0;
33 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
34 DAGHeight = std::max(a: DAGHeight, b: DAG->SUnits[Idx].getHeight());
35 RegionPolicy.DisableLatencyHeuristic =
36 DAG->SUnits.size() >= 3 * std::max(a: DAGHeight, b: 1u);
37 if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
38 unsigned CurrSequence = 0, NumSeqNodes = 0;
39 auto countSequence = [&CurrSequence, &NumSeqNodes]() {
40 if (CurrSequence >= 2)
41 NumSeqNodes += CurrSequence;
42 CurrSequence = 0;
43 };
44 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
45 const SUnit *SU = &DAG->SUnits[Idx];
46 bool InDataSequence = true;
47 // One Data pred to MI just above, or no preds.
48 unsigned NumPreds = 0;
49 for (const SDep &Pred : SU->Preds)
50 if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
51 Pred.getSUnit()->NodeNum != Idx - 1)
52 InDataSequence = false;
53 // One Data succ or no succs (ignoring ExitSU).
54 unsigned NumSuccs = 0;
55 for (const SDep &Succ : SU->Succs)
56 if (Succ.getSUnit() != &DAG->ExitSU &&
57 (++NumSuccs != 1 || Succ.getKind() != SDep::Data))
58 InDataSequence = false;
59 // Another type of node or one that does not have a single data pred
60 // ends any previous sequence.
61 if (!InDataSequence || !NumPreds)
62 countSequence();
63 if (InDataSequence)
64 CurrSequence++;
65 }
66 countSequence();
67 if (NumSeqNodes >= std::max(a: size_t(4), b: DAG->SUnits.size() / 4)) {
68 LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
69 << NumSeqNodes << ". ";);
70 } else
71 HasDataSequences = false;
72 }
73}
74
75bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
76 bool CCDef) const {
77 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
78 (MI->getDesc().hasImplicitDefOfPhysReg(Reg: SystemZ::CC) || !CCDef)) {
79 const MachineOperand &MO0 = MI->getOperand(i: 0);
80 if (isRegDef(MO: MO0) && MO0.getReg() == Cmp0SrcReg)
81 return true;
82 }
83 return false;
84}
85
86static int biasPhysRegExtra(const SUnit *SU) {
87 if (int Res = biasPhysReg(SU, /*isTop=*/false))
88 return Res;
89
90 // Also recognize Load Address. Most of these are with an FI operand.
91 const MachineInstr *MI = SU->getInstr();
92 return MI->getNumOperands() && !MI->isCopy() &&
93 isPhysRegDef(MO: MI->getOperand(i: 0));
94}
95
96bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
97 SchedCandidate &TryCand,
98 SchedBoundary *Zone) const {
99 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
100
101 // Initialize the candidate if needed.
102 if (!Cand.isValid()) {
103 TryCand.Reason = FirstValid;
104 return true;
105 }
106
107 // Bias physreg defs and copies to their uses and definitions respectively.
108 int TryCandPRegBias = biasPhysRegExtra(SU: TryCand.SU);
109 int CandPRegBias = biasPhysRegExtra(SU: Cand.SU);
110 if (tryGreater(TryVal: TryCandPRegBias, CandVal: CandPRegBias, TryCand, Cand, Reason: PhysReg))
111 return TryCand.Reason != NoCand;
112 if (TryCandPRegBias && CandPRegBias) {
113 // Both biased same way.
114 tryGreater(TryVal: TryCand.SU->NodeNum, CandVal: Cand.SU->NodeNum, TryCand, Cand, Reason: NodeOrder);
115 return TryCand.Reason != NoCand;
116 }
117
118 // Don't extend the scheduled latency in regions with many nodes in data
119 // sequences, or for (single block loop) regions that are acyclically
120 // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
121 // is set only after initialization in registerRoots(), which is why it is
122 // checked here instead of earlier.
123 if (!RegionPolicy.DisableLatencyHeuristic &&
124 (HasDataSequences || Rem.IsAcyclicLatencyLimited))
125 if (const SUnit *HigherSU =
126 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
127 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
128 : nullptr)
129 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
130 HigherSU->getDepth() < computeRemLatency(CurrZone&: *Zone)) {
131 // One or both SUs increase the scheduled latency.
132 tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(), TryCand, Cand,
133 Reason: GenericSchedulerBase::BotHeightReduce);
134 return TryCand.Reason != NoCand;
135 }
136
137 // Weak edges help copy coalescing.
138 if (tryLess(TryVal: TryCand.SU->WeakSuccsLeft, CandVal: Cand.SU->WeakSuccsLeft, TryCand, Cand,
139 Reason: Weak))
140 return TryCand.Reason != NoCand;
141
142 // Help compare with zero elimination.
143 if (tryGreater(TryVal: definesCmp0Src(MI: TryCand.SU->getInstr()),
144 CandVal: definesCmp0Src(MI: Cand.SU->getInstr()), TryCand, Cand, Reason: Weak))
145 return TryCand.Reason != NoCand;
146
147 // Fall through to original instruction order.
148 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
149 TryCand.Reason = NodeOrder;
150 return true;
151 }
152
153 return false;
154}
155
156void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
157 MachineBasicBlock::iterator End,
158 unsigned NumRegionInstrs) {
159 // Avoid setting up the register pressure tracker for small regions to save
160 // compile time. Currently only used for computeCyclicCriticalPath() which
161 // is used for single block loops.
162 MachineBasicBlock *MBB = Begin->getParent();
163 RegionPolicy.ShouldTrackPressure =
164 MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;
165
166 // These heuristics has so far seemed to work better without adding a
167 // top-down boundary.
168 RegionPolicy.OnlyBottomUp = true;
169 BotIdx = NumRegionInstrs - 1;
170 this->NumRegionInstrs = NumRegionInstrs;
171}
172
173void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
174 GenericScheduler::initialize(dag);
175
176 Cmp0SrcReg = SystemZ::NoRegister;
177
178 initializeLatencyReduction();
179 LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
180 << "enabled for data sequences.\n";);
181}
182
183void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
184 GenericScheduler::schedNode(SU, IsTopNode);
185
186 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
187 MachineInstr *MI = SU->getInstr();
188 if (TII->isCompareZero(Compare: *MI))
189 Cmp0SrcReg = TII->getCompareSourceReg(Compare: *MI);
190 else if (MI->getDesc().hasImplicitDefOfPhysReg(Reg: SystemZ::CC) ||
191 definesCmp0Src(MI, /*CCDef=*/false))
192 Cmp0SrcReg = SystemZ::NoRegister;
193}
194
195/// Post-RA scheduling ///
196
197#ifndef NDEBUG
198// Print the set of SUs
199void SystemZPostRASchedStrategy::SUSet::
200dump(SystemZHazardRecognizer &HazardRec) const {
201 dbgs() << "{";
202 for (auto &SU : *this) {
203 HazardRec.dumpSU(SU, dbgs());
204 if (SU != *rbegin())
205 dbgs() << ", ";
206 }
207 dbgs() << "}\n";
208}
209#endif
210
211// Try to find a single predecessor that would be interesting for the
212// scheduler in the top-most region of MBB.
213static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
214 const MachineLoop *Loop) {
215 MachineBasicBlock *PredMBB = nullptr;
216 if (MBB->pred_size() == 1)
217 PredMBB = *MBB->pred_begin();
218
219 // The loop header has two predecessors, return the latch, but not for a
220 // single block loop.
221 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
222 for (MachineBasicBlock *Pred : MBB->predecessors())
223 if (Loop->contains(BB: Pred))
224 PredMBB = (Pred == MBB ? nullptr : Pred);
225 }
226
227 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
228 && "Loop MBB should not consider predecessor outside of loop.");
229
230 return PredMBB;
231}
232
233void SystemZPostRASchedStrategy::
234advanceTo(MachineBasicBlock::iterator NextBegin) {
235 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
236 MachineBasicBlock::iterator I =
237 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
238 std::next(x: LastEmittedMI) : MBB->begin());
239
240 for (; I != NextBegin; ++I) {
241 if (I->isPosition() || I->isDebugInstr())
242 continue;
243 HazardRec->emitInstruction(MI: &*I);
244 }
245}
246
247void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
248 Available.clear(); // -misched-cutoff.
249 LLVM_DEBUG(HazardRec->dumpState(););
250}
251
252void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
253 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
254 "Entering MBB twice?");
255 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
256
257 MBB = NextMBB;
258
259 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
260 /// point to it.
261 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
262 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
263 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
264 dbgs() << ":\n";);
265
266 // Try to take over the state from a single predecessor, if it has been
267 // scheduled. If this is not possible, we are done.
268 MachineBasicBlock *SinglePredMBB =
269 getSingleSchedPred(MBB, Loop: MLI->getLoopFor(BB: MBB));
270 if (SinglePredMBB == nullptr)
271 return;
272 auto It = SchedStates.find(x: SinglePredMBB);
273 if (It == SchedStates.end())
274 return;
275
276 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
277 << printMBBReference(*SinglePredMBB) << "\n";);
278
279 HazardRec->copyState(Incoming: It->second);
280 LLVM_DEBUG(HazardRec->dumpState(););
281
282 // Emit incoming terminator(s). Be optimistic and assume that branch
283 // prediction will generally do "the right thing".
284 for (MachineInstr &MI : SinglePredMBB->terminators()) {
285 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
286 bool TakenBranch = (MI.isBranch() &&
287 (TII->getBranchInfo(MI).isIndirect() ||
288 TII->getBranchInfo(MI).getMBBTarget() == MBB));
289 HazardRec->emitInstruction(MI: &MI, TakenBranch);
290 if (TakenBranch)
291 break;
292 }
293}
294
295void SystemZPostRASchedStrategy::leaveMBB() {
296 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
297
298 // Advance to first terminator. The successor block will handle terminators
299 // dependent on CFG layout (T/NT branch etc).
300 advanceTo(NextBegin: MBB->getFirstTerminator());
301}
302
303SystemZPostRASchedStrategy::
304SystemZPostRASchedStrategy(const MachineSchedContext *C)
305 : MLI(C->MLI),
306 TII(static_cast<const SystemZInstrInfo *>
307 (C->MF->getSubtarget().getInstrInfo())),
308 MBB(nullptr), HazardRec(nullptr) {
309 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
310 SchedModel.init(TSInfo: ST);
311}
312
313SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
314 // Delete hazard recognizers kept around for each MBB.
315 for (auto I : SchedStates) {
316 SystemZHazardRecognizer *hazrec = I.second;
317 delete hazrec;
318 }
319}
320
321void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
322 MachineBasicBlock::iterator End,
323 unsigned NumRegionInstrs) {
324 // Don't emit the terminators.
325 if (Begin->isTerminator())
326 return;
327
328 // Emit any instructions before start of region.
329 advanceTo(NextBegin: Begin);
330}
331
332// Pick the next node to schedule.
333SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
334 // Only scheduling top-down.
335 IsTopNode = true;
336
337 if (Available.empty())
338 return nullptr;
339
340 // If only one choice, return it.
341 if (Available.size() == 1) {
342 LLVM_DEBUG(dbgs() << "** Only one: ";
343 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
344 return *Available.begin();
345 }
346
347 // All nodes that are possible to schedule are stored in the Available set.
348 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
349
350 Candidate Best;
351 for (auto *SU : Available) {
352
353 // SU is the next candidate to be compared against current Best.
354 Candidate c(SU, *HazardRec);
355
356 // Remeber which SU is the best candidate.
357 if (Best.SU == nullptr || c < Best) {
358 Best = c;
359 LLVM_DEBUG(dbgs() << "** Best so far: ";);
360 } else
361 LLVM_DEBUG(dbgs() << "** Tried : ";);
362 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
363 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
364
365 // Once we know we have seen all SUs that affect grouping or use unbuffered
366 // resources, we can stop iterating if Best looks good.
367 if (!SU->isScheduleHigh && Best.noCost())
368 break;
369 }
370
371 assert (Best.SU != nullptr);
372 return Best.SU;
373}
374
375SystemZPostRASchedStrategy::Candidate::
376Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
377 SU = SU_;
378
379 // Check the grouping cost. For a node that must begin / end a
380 // group, it is positive if it would do so prematurely, or negative
381 // if it would fit naturally into the schedule.
382 GroupingCost = HazardRec.groupingCost(SU);
383
384 // Check the resources cost for this SU.
385 ResourcesCost = HazardRec.resourcesCost(SU);
386}
387
388bool SystemZPostRASchedStrategy::Candidate::
389operator<(const Candidate &other) {
390
391 // Check decoder grouping.
392 if (GroupingCost < other.GroupingCost)
393 return true;
394 if (GroupingCost > other.GroupingCost)
395 return false;
396
397 // Compare the use of resources.
398 if (ResourcesCost < other.ResourcesCost)
399 return true;
400 if (ResourcesCost > other.ResourcesCost)
401 return false;
402
403 // Higher SU is otherwise generally better.
404 if (SU->getHeight() > other.SU->getHeight())
405 return true;
406 if (SU->getHeight() < other.SU->getHeight())
407 return false;
408
409 // If all same, fall back to original order.
410 if (SU->NodeNum < other.SU->NodeNum)
411 return true;
412
413 return false;
414}
415
416void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
417 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
418 if (Available.size() == 1) dbgs() << "(only one) ";
419 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
420
421 // Remove SU from Available set and update HazardRec.
422 Available.erase(x: SU);
423 HazardRec->EmitInstruction(SU);
424}
425
426void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
427 // Set isScheduleHigh flag on all SUs that we want to consider first in
428 // pickNode().
429 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
430 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
431 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
432
433 // Put all released SUs in the Available set.
434 Available.insert(x: SU);
435}
436