1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SystemZMachineScheduler.h"
10#include "llvm/CodeGen/MachineLoopInfo.h"
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
23 bool CCDef) const {
24 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
25 (MI->getDesc().hasImplicitDefOfPhysReg(Reg: SystemZ::CC) || !CCDef)) {
26 const MachineOperand &MO0 = MI->getOperand(i: 0);
27 if (isRegDef(MO: MO0) && MO0.getReg() == Cmp0SrcReg)
28 return true;
29 }
30 return false;
31}
32
33bool SystemZPreRASchedStrategy::closesLiveRange(const SUnit *SU,
34 ScheduleDAGMILive *DAG) const {
35 if (SU->getInstr()->isCopy())
36 return false;
37
38 // Extract the PressureChanges that all fp/vector or GR64/GR32/GRH32 regs
39 // affect respectively. misched-prera-pdiffs.mir tests against any future
40 // change in the PressureSets modelling, so simply hard-code them here.
41 int VR16PChange = 0, GRX32PChange = 0;
42 const PressureDiff &PDiff = DAG->getPressureDiff(SU);
43 for (const PressureChange &PC : PDiff) {
44 if (!PC.isValid())
45 break;
46 if (PC.getPSet() == SystemZ::VR16Bit)
47 VR16PChange = PC.getUnitInc();
48 else if (PC.getPSet() == SystemZ::GRX32Bit)
49 GRX32PChange = PC.getUnitInc();
50 }
51
52 // Return true for a (vreg) def when register pressure is reduced. Prioritize
53 // FP/vector regs over GPRs.
54 return VR16PChange < 0 || (!VR16PChange && GRX32PChange < 0);
55}
56
57bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
58 SchedCandidate &TryCand,
59 SchedBoundary *Zone) const {
60 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
61
62 // Initialize the candidate if needed.
63 if (!Cand.isValid()) {
64 TryCand.Reason = FirstValid;
65 return true;
66 }
67
68 // Bias physreg defs and copies to their uses and definitions respectively.
69 if (tryBiasPhysRegs(TryCand, Cand, Zone, /*BiasPRegsExtra=*/true))
70 return TryCand.Reason != NoCand;
71
72 if (RegionPolicy.ShouldTrackPressure) {
73 auto schedLow = [&](const SUnit *SU) {
74 return SU->getHeight() <= Zone->getScheduledLatency() &&
75 SU->getHeight() < LivenessHeightCutOff && closesLiveRange(SU, DAG);
76 };
77 // One SU closes a live range while preserving the scheduled latency.
78 if (tryGreater(TryVal: schedLow(TryCand.SU), CandVal: schedLow(Cand.SU), TryCand, Cand,
79 Reason: RegExcess))
80 return TryCand.Reason != NoCand;
81 }
82
83 // Do latency scheduling by trying to not increase the scheduled latency
84 // (going bottom-up). This is important for performance, but it is best to
85 // only do it when likely beneficial and not "move everything around": Only
86 // apply this to SUs that have somewhat longer latencies, and don't push an
87 // SU upwards if it's on the critical path.
88 if (!RegionPolicy.DisableLatencyHeuristic &&
89 std::max(a: TryCand.SU->Latency, b: Cand.SU->Latency) >= 5)
90 if (const SUnit *HigherSU =
91 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
92 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
93 : nullptr)
94 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
95 HigherSU->getDepth() < computeRemLatency(CurrZone&: *Zone)) {
96 // HigherSU would increase the scheduled latency and there is room
97 // above for it next to the critical path, so wait with it.
98 tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(), TryCand, Cand,
99 Reason: GenericSchedulerBase::BotHeightReduce);
100 return TryCand.Reason != NoCand;
101 }
102
103 // Weak edges help copy coalescing.
104 if (tryLess(TryVal: TryCand.SU->WeakSuccsLeft, CandVal: Cand.SU->WeakSuccsLeft, TryCand, Cand,
105 Reason: Weak))
106 return TryCand.Reason != NoCand;
107
108 // Help compare with zero elimination.
109 if (tryGreater(TryVal: definesCmp0Src(MI: TryCand.SU->getInstr()),
110 CandVal: definesCmp0Src(MI: Cand.SU->getInstr()), TryCand, Cand, Reason: Weak))
111 return TryCand.Reason != NoCand;
112
113 // Fall through to original instruction order.
114 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
115 TryCand.Reason = NodeOrder;
116 return true;
117 }
118
119 return false;
120}
121
122void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
123 MachineBasicBlock::iterator End,
124 unsigned NumRegionInstrs) {
125 // TopRegionSUs is the number of SUs that is considered to be part of the
126 // "top" of a region. Liveness reduction is not done in regions smaller than
127 // this. The idea is to prioritize latency more after branches and help
128 // liveness only when the decoder is ahead of execution anyway.
129 static const unsigned TopRegionSUs = 36;
130
131 // Avoid setting up the register pressure tracker unless needed to save
132 // compile time.
133 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > TopRegionSUs;
134
135 // These heuristics has so far seemed to work better without adding a
136 // top-down boundary.
137 RegionPolicy.OnlyBottomUp = true;
138
139 BotIdx = NumRegionInstrs - 1;
140 this->NumRegionInstrs = NumRegionInstrs;
141}
142
143void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
144 GenericScheduler::initialize(dag);
145
146 Cmp0SrcReg = SystemZ::NoRegister;
147
148 unsigned DAGHeight = 0;
149 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
150 DAGHeight = std::max(a: DAGHeight, b: DAG->SUnits[Idx].getHeight());
151
152 if (RegionPolicy.ShouldTrackPressure)
153 LivenessHeightCutOff = DAGHeight / (DAG->SUnits.size() < 50 ? 4 : 2);
154
155 // Disable latency reduction if region has many SUs relative to the
156 // overall height.
157 RegionPolicy.DisableLatencyHeuristic =
158 DAG->SUnits.size() >= 3 * std::max(a: DAGHeight, b: 1u);
159}
160
161void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
162 GenericScheduler::schedNode(SU, IsTopNode);
163
164 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
165 MachineInstr *MI = SU->getInstr();
166 if (TII->isCompareZero(Compare: *MI))
167 Cmp0SrcReg = TII->getCompareSourceReg(Compare: *MI);
168 else if (MI->getDesc().hasImplicitDefOfPhysReg(Reg: SystemZ::CC) ||
169 definesCmp0Src(MI, /*CCDef=*/false))
170 Cmp0SrcReg = SystemZ::NoRegister;
171}
172
173/// Post-RA scheduling ///
174
175#ifndef NDEBUG
176// Print the set of SUs
177void SystemZPostRASchedStrategy::SUSet::
178dump(SystemZHazardRecognizer &HazardRec) const {
179 dbgs() << "{";
180 for (auto &SU : *this) {
181 HazardRec.dumpSU(SU, dbgs());
182 if (SU != *rbegin())
183 dbgs() << ", ";
184 }
185 dbgs() << "}\n";
186}
187#endif
188
189// Try to find a single predecessor that would be interesting for the
190// scheduler in the top-most region of MBB.
191static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
192 const MachineLoop *Loop) {
193 MachineBasicBlock *PredMBB = nullptr;
194 if (MBB->pred_size() == 1)
195 PredMBB = *MBB->pred_begin();
196
197 // The loop header has two predecessors, return the latch, but not for a
198 // single block loop.
199 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
200 for (MachineBasicBlock *Pred : MBB->predecessors())
201 if (Loop->contains(BB: Pred))
202 PredMBB = (Pred == MBB ? nullptr : Pred);
203 }
204
205 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
206 && "Loop MBB should not consider predecessor outside of loop.");
207
208 return PredMBB;
209}
210
211void SystemZPostRASchedStrategy::
212advanceTo(MachineBasicBlock::iterator NextBegin) {
213 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
214 MachineBasicBlock::iterator I =
215 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
216 std::next(x: LastEmittedMI) : MBB->begin());
217
218 for (; I != NextBegin; ++I) {
219 if (I->isPosition() || I->isDebugInstr())
220 continue;
221 HazardRec->emitInstruction(MI: &*I);
222 }
223}
224
225void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
226 Available.clear(); // -misched-cutoff.
227 LLVM_DEBUG(HazardRec->dumpState(););
228}
229
230void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
231 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
232 "Entering MBB twice?");
233 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
234
235 MBB = NextMBB;
236
237 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
238 /// point to it.
239 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
240 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
241 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
242 dbgs() << ":\n";);
243
244 // Try to take over the state from a single predecessor, if it has been
245 // scheduled. If this is not possible, we are done.
246 MachineBasicBlock *SinglePredMBB =
247 getSingleSchedPred(MBB, Loop: MLI->getLoopFor(BB: MBB));
248 if (SinglePredMBB == nullptr)
249 return;
250 auto It = SchedStates.find(x: SinglePredMBB);
251 if (It == SchedStates.end())
252 return;
253
254 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
255 << printMBBReference(*SinglePredMBB) << "\n";);
256
257 HazardRec->copyState(Incoming: It->second);
258 LLVM_DEBUG(HazardRec->dumpState(););
259
260 // Emit incoming terminator(s). Be optimistic and assume that branch
261 // prediction will generally do "the right thing".
262 for (MachineInstr &MI : SinglePredMBB->terminators()) {
263 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
264 bool TakenBranch = (MI.isBranch() &&
265 (TII->getBranchInfo(MI).isIndirect() ||
266 TII->getBranchInfo(MI).getMBBTarget() == MBB));
267 HazardRec->emitInstruction(MI: &MI, TakenBranch);
268 if (TakenBranch)
269 break;
270 }
271}
272
273void SystemZPostRASchedStrategy::leaveMBB() {
274 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
275
276 // Advance to first terminator. The successor block will handle terminators
277 // dependent on CFG layout (T/NT branch etc).
278 advanceTo(NextBegin: MBB->getFirstTerminator());
279}
280
281SystemZPostRASchedStrategy::
282SystemZPostRASchedStrategy(const MachineSchedContext *C)
283 : MLI(C->MLI),
284 TII(static_cast<const SystemZInstrInfo *>
285 (C->MF->getSubtarget().getInstrInfo())),
286 MBB(nullptr), HazardRec(nullptr) {
287 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
288 SchedModel.init(TSInfo: ST);
289}
290
291SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() {
292 // Delete hazard recognizers kept around for each MBB.
293 for (auto I : SchedStates) {
294 SystemZHazardRecognizer *hazrec = I.second;
295 delete hazrec;
296 }
297}
298
299void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
300 MachineBasicBlock::iterator End,
301 unsigned NumRegionInstrs) {
302 // Don't emit the terminators.
303 if (Begin->isTerminator())
304 return;
305
306 // Emit any instructions before start of region.
307 advanceTo(NextBegin: Begin);
308}
309
310// Pick the next node to schedule.
311SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
312 // Only scheduling top-down.
313 IsTopNode = true;
314
315 if (Available.empty())
316 return nullptr;
317
318 // If only one choice, return it.
319 if (Available.size() == 1) {
320 LLVM_DEBUG(dbgs() << "** Only one: ";
321 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
322 return *Available.begin();
323 }
324
325 // All nodes that are possible to schedule are stored in the Available set.
326 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
327
328 Candidate Best;
329 for (auto *SU : Available) {
330
331 // SU is the next candidate to be compared against current Best.
332 Candidate c(SU, *HazardRec);
333
334 // Remeber which SU is the best candidate.
335 if (Best.SU == nullptr || c < Best) {
336 Best = c;
337 LLVM_DEBUG(dbgs() << "** Best so far: ";);
338 } else
339 LLVM_DEBUG(dbgs() << "** Tried : ";);
340 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
341 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
342
343 // Once we know we have seen all SUs that affect grouping or use unbuffered
344 // resources, we can stop iterating if Best looks good.
345 if (!SU->isScheduleHigh && Best.noCost())
346 break;
347 }
348
349 assert (Best.SU != nullptr);
350 return Best.SU;
351}
352
353SystemZPostRASchedStrategy::Candidate::
354Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
355 SU = SU_;
356
357 // Check the grouping cost. For a node that must begin / end a
358 // group, it is positive if it would do so prematurely, or negative
359 // if it would fit naturally into the schedule.
360 GroupingCost = HazardRec.groupingCost(SU);
361
362 // Check the resources cost for this SU.
363 ResourcesCost = HazardRec.resourcesCost(SU);
364}
365
366bool SystemZPostRASchedStrategy::Candidate::
367operator<(const Candidate &other) {
368
369 // Check decoder grouping.
370 if (GroupingCost < other.GroupingCost)
371 return true;
372 if (GroupingCost > other.GroupingCost)
373 return false;
374
375 // Compare the use of resources.
376 if (ResourcesCost < other.ResourcesCost)
377 return true;
378 if (ResourcesCost > other.ResourcesCost)
379 return false;
380
381 // Higher SU is otherwise generally better.
382 if (SU->getHeight() > other.SU->getHeight())
383 return true;
384 if (SU->getHeight() < other.SU->getHeight())
385 return false;
386
387 // If all same, fall back to original order.
388 if (SU->NodeNum < other.SU->NodeNum)
389 return true;
390
391 return false;
392}
393
394void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
395 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
396 if (Available.size() == 1) dbgs() << "(only one) ";
397 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
398
399 // Remove SU from Available set and update HazardRec.
400 Available.erase(x: SU);
401 HazardRec->EmitInstruction(SU);
402}
403
404void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
405 // Set isScheduleHigh flag on all SUs that we want to consider first in
406 // pickNode().
407 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
408 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
409 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
410
411 // Put all released SUs in the Available set.
412 Available.insert(x: SU);
413}
414