1//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/VLIWMachineScheduler.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/CodeGen/DFAPacketizer.h"
17#include "llvm/CodeGen/MachineBasicBlock.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/MachineInstr.h"
20#include "llvm/CodeGen/MachineLoopInfo.h"
21#include "llvm/CodeGen/RegisterClassInfo.h"
22#include "llvm/CodeGen/RegisterPressure.h"
23#include "llvm/CodeGen/ScheduleDAG.h"
24#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25#include "llvm/CodeGen/TargetInstrInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSchedule.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33#include <algorithm>
34#include <cassert>
35#include <iomanip>
36#include <limits>
37#include <memory>
38#include <sstream>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "machine-scheduler"
43
44static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
45 cl::init(Val: false));
46
47static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
48 cl::init(Val: true));
49
50static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51 cl::Hidden, cl::init(Val: 1));
52
53// Check if the scheduler should penalize instructions that are available to
54// early due to a zero-latency dependence.
55static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
56 cl::init(Val: true));
57
58// This value is used to determine if a register class is a high pressure set.
59// We compute the maximum number of registers needed and divided by the total
60// available. Then, we compare the result to this value.
61static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
62 cl::init(Val: 0.75f),
63 cl::desc("High register pressure threhold."));
64
65VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
66 const TargetSchedModel *SM)
67 : TII(STI.getInstrInfo()), SchedModel(SM) {
68 ResourcesModel = createPacketizer(STI);
69
70 // This hard requirement could be relaxed,
71 // but for now do not let it proceed.
72 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
73
74 Packet.reserve(N: SchedModel->getIssueWidth());
75 Packet.clear();
76 ResourcesModel->clearResources();
77}
78
79void VLIWResourceModel::reset() {
80 Packet.clear();
81 ResourcesModel->clearResources();
82}
83
84VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
85
86/// Return true if there is a dependence between SUd and SUu.
87bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
88 if (SUd->Succs.size() == 0)
89 return false;
90
91 for (const auto &S : SUd->Succs) {
92 // Since we do not add pseudos to packets, might as well
93 // ignore order dependencies.
94 if (S.isCtrl())
95 continue;
96
97 if (S.getSUnit() == SUu && S.getLatency() > 0)
98 return true;
99 }
100 return false;
101}
102
103/// Check if scheduling of this SU is possible
104/// in the current packet.
105/// It is _not_ precise (statefull), it is more like
106/// another heuristic. Many corner cases are figured
107/// empirically.
108bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
109 if (!SU || !SU->getInstr())
110 return false;
111
112 // First see if the pipeline could receive this instruction
113 // in the current cycle.
114 switch (SU->getInstr()->getOpcode()) {
115 default:
116 if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr()))
117 return false;
118 break;
119 case TargetOpcode::EXTRACT_SUBREG:
120 case TargetOpcode::INSERT_SUBREG:
121 case TargetOpcode::SUBREG_TO_REG:
122 case TargetOpcode::REG_SEQUENCE:
123 case TargetOpcode::IMPLICIT_DEF:
124 case TargetOpcode::COPY:
125 case TargetOpcode::INLINEASM:
126 case TargetOpcode::INLINEASM_BR:
127 break;
128 }
129
130 // Now see if there are no other dependencies to instructions already
131 // in the packet.
132 if (IsTop) {
133 for (const SUnit *U : Packet)
134 if (hasDependence(SUd: U, SUu: SU))
135 return false;
136 } else {
137 for (const SUnit *U : Packet)
138 if (hasDependence(SUd: SU, SUu: U))
139 return false;
140 }
141 return true;
142}
143
144/// Keep track of available resources.
145bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
146 bool startNewCycle = false;
147 // Artificially reset state.
148 if (!SU) {
149 reset();
150 TotalPackets++;
151 return false;
152 }
153 // If this SU does not fit in the packet or the packet is now full
154 // start a new one.
155 if (!isResourceAvailable(SU, IsTop) ||
156 Packet.size() >= SchedModel->getIssueWidth()) {
157 reset();
158 TotalPackets++;
159 startNewCycle = true;
160 }
161
162 switch (SU->getInstr()->getOpcode()) {
163 default:
164 ResourcesModel->reserveResources(MI&: *SU->getInstr());
165 break;
166 case TargetOpcode::EXTRACT_SUBREG:
167 case TargetOpcode::INSERT_SUBREG:
168 case TargetOpcode::SUBREG_TO_REG:
169 case TargetOpcode::REG_SEQUENCE:
170 case TargetOpcode::IMPLICIT_DEF:
171 case TargetOpcode::KILL:
172 case TargetOpcode::CFI_INSTRUCTION:
173 case TargetOpcode::EH_LABEL:
174 case TargetOpcode::COPY:
175 case TargetOpcode::INLINEASM:
176 case TargetOpcode::INLINEASM_BR:
177 break;
178 }
179 Packet.push_back(Elt: SU);
180
181#ifndef NDEBUG
182 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
183 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
184 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
185 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
186 LLVM_DEBUG(Packet[i]->getInstr()->dump());
187 }
188#endif
189
190 return startNewCycle;
191}
192
193DFAPacketizer *
194VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
195 return STI.getInstrInfo()->CreateTargetScheduleState(STI);
196}
197
198/// schedule - Called back from MachineScheduler::runOnMachineFunction
199/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200/// only includes instructions that have DAG nodes, not scheduling boundaries.
201void VLIWMachineScheduler::schedule() {
202 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203 << printMBBReference(*BB) << " " << BB->getName()
204 << " in_func " << BB->getParent()->getName()
205 << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
206
207 buildDAGWithRegPressure();
208
209 Topo.InitDAGTopologicalSorting();
210
211 // Postprocess the DAG to add platform-specific artificial dependencies.
212 postProcessDAG();
213
214 SmallVector<SUnit *, 8> TopRoots, BotRoots;
215 findRootsAndBiasEdges(TopRoots, BotRoots);
216
217 // Initialize the strategy before modifying the DAG.
218 SchedImpl->initialize(DAG: this);
219
220 LLVM_DEBUG({
221 unsigned maxH = 0;
222 for (const SUnit &SU : SUnits)
223 if (SU.getHeight() > maxH)
224 maxH = SU.getHeight();
225 dbgs() << "Max Height " << maxH << "\n";
226 });
227 LLVM_DEBUG({
228 unsigned maxD = 0;
229 for (const SUnit &SU : SUnits)
230 if (SU.getDepth() > maxD)
231 maxD = SU.getDepth();
232 dbgs() << "Max Depth " << maxD << "\n";
233 });
234 LLVM_DEBUG(dump());
235 if (ViewMISchedDAGs)
236 viewGraph();
237
238 initQueues(TopRoots, BotRoots);
239
240 bool IsTopNode = false;
241 while (true) {
242 LLVM_DEBUG(
243 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244 SUnit *SU = SchedImpl->pickNode(IsTopNode);
245 if (!SU)
246 break;
247
248 if (!checkSchedLimit())
249 break;
250
251 scheduleMI(SU, IsTopNode);
252
253 // Notify the scheduling strategy after updating the DAG.
254 SchedImpl->schedNode(SU, IsTopNode);
255
256 updateQueues(SU, IsTopNode);
257 }
258 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
259
260 placeDebugValues();
261
262 LLVM_DEBUG({
263 dbgs() << "*** Final schedule for "
264 << printMBBReference(*begin()->getParent()) << " ***\n";
265 dumpSchedule();
266 dbgs() << '\n';
267 });
268}
269
270void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
271 DAG = static_cast<VLIWMachineScheduler *>(dag);
272 SchedModel = DAG->getSchedModel();
273
274 Top.init(dag: DAG, smodel: SchedModel);
275 Bot.init(dag: DAG, smodel: SchedModel);
276
277 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278 // are disabled, then these HazardRecs will be disabled.
279 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
280 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
281 const TargetInstrInfo *TII = STI.getInstrInfo();
282 delete Top.HazardRec;
283 delete Bot.HazardRec;
284 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
286
287 delete Top.ResourceModel;
288 delete Bot.ResourceModel;
289 Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
290 Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
291
292 const std::vector<unsigned> &MaxPressure =
293 DAG->getRegPressure().MaxSetPressure;
294 HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false);
295 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
296 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i);
297 HighPressureSets[i] =
298 ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
299 }
300}
301
302VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
303 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
304 return new VLIWResourceModel(STI, SchedModel);
305}
306
307void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
308 for (const SDep &PI : SU->Preds) {
309 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
310 unsigned MinLatency = PI.getLatency();
311#ifndef NDEBUG
312 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
313#endif
314 if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
315 SU->TopReadyCycle = PredReadyCycle + MinLatency;
316 }
317
318 if (!SU->isScheduled)
319 Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle);
320}
321
322void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
323 assert(SU->getInstr() && "Scheduled SUnit must have instr");
324
325 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
326 ++I) {
327 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
328 unsigned MinLatency = I->getLatency();
329#ifndef NDEBUG
330 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
331#endif
332 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
333 SU->BotReadyCycle = SuccReadyCycle + MinLatency;
334 }
335
336 if (!SU->isScheduled)
337 Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle);
338}
339
340ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
341 delete ResourceModel;
342 delete HazardRec;
343}
344
345/// Does this SU have a hazard within the current instruction group.
346///
347/// The scheduler supports two modes of hazard recognition. The first is the
348/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
349/// supports highly complicated in-order reservation tables
350/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
351///
352/// The second is a streamlined mechanism that checks for hazards based on
353/// simple counters that the scheduler itself maintains. It explicitly checks
354/// for instruction dispatch limitations, including the number of micro-ops that
355/// can dispatch per cycle.
356///
357/// TODO: Also check whether the SU must start a new group.
358bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
359 if (HazardRec->isEnabled())
360 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
361
362 unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
363 if (IssueCount + uops > SchedModel->getIssueWidth())
364 return true;
365
366 return false;
367}
368
369void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
370 SUnit *SU, unsigned ReadyCycle) {
371 if (ReadyCycle < MinReadyCycle)
372 MinReadyCycle = ReadyCycle;
373
374 // Check for interlocks first. For the purpose of other heuristics, an
375 // instruction that cannot issue appears as if it's not in the ReadyQueue.
376 if (ReadyCycle > CurrCycle || checkHazard(SU))
377
378 Pending.push(SU);
379 else
380 Available.push(SU);
381}
382
383/// Move the boundary of scheduled code by one cycle.
384void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
385 unsigned Width = SchedModel->getIssueWidth();
386 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
387
388 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
389 "MinReadyCycle uninitialized");
390 unsigned NextCycle = std::max(a: CurrCycle + 1, b: MinReadyCycle);
391
392 if (!HazardRec->isEnabled()) {
393 // Bypass HazardRec virtual calls.
394 CurrCycle = NextCycle;
395 } else {
396 // Bypass getHazardType calls in case of long latency.
397 for (; CurrCycle != NextCycle; ++CurrCycle) {
398 if (isTop())
399 HazardRec->AdvanceCycle();
400 else
401 HazardRec->RecedeCycle();
402 }
403 }
404 CheckPending = true;
405
406 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
407 << CurrCycle << '\n');
408}
409
410/// Move the boundary of scheduled code by one SUnit.
411void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
412 bool startNewCycle = false;
413
414 // Update the reservation table.
415 if (HazardRec->isEnabled()) {
416 if (!isTop() && SU->isCall) {
417 // Calls are scheduled with their preceding instructions. For bottom-up
418 // scheduling, clear the pipeline state before emitting.
419 HazardRec->Reset();
420 }
421 HazardRec->EmitInstruction(SU);
422 }
423
424 // Update DFA model.
425 startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop());
426
427 // Check the instruction group dispatch limit.
428 // TODO: Check if this SU must end a dispatch group.
429 IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr());
430 if (startNewCycle) {
431 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
432 bumpCycle();
433 } else
434 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
435 << CurrCycle << '\n');
436}
437
438/// Release pending ready nodes in to the available queue. This makes them
439/// visible to heuristics.
440void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
441 // If the available queue is empty, it is safe to reset MinReadyCycle.
442 if (Available.empty())
443 MinReadyCycle = std::numeric_limits<unsigned>::max();
444
445 // Check to see if any of the pending instructions are ready to issue. If
446 // so, add them to the available queue.
447 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
448 SUnit *SU = *(Pending.begin() + i);
449 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
450
451 if (ReadyCycle < MinReadyCycle)
452 MinReadyCycle = ReadyCycle;
453
454 if (ReadyCycle > CurrCycle)
455 continue;
456
457 if (checkHazard(SU))
458 continue;
459
460 Available.push(SU);
461 Pending.remove(I: Pending.begin() + i);
462 --i;
463 --e;
464 }
465 CheckPending = false;
466}
467
468/// Remove SU from the ready set for this boundary.
469void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
470 if (Available.isInQueue(SU))
471 Available.remove(I: Available.find(SU));
472 else {
473 assert(Pending.isInQueue(SU) && "bad ready count");
474 Pending.remove(I: Pending.find(SU));
475 }
476}
477
478/// If this queue only has one ready candidate, return it. As a side effect,
479/// advance the cycle until at least one node is ready. If multiple instructions
480/// are ready, return NULL.
481SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
482 if (CheckPending)
483 releasePending();
484
485 auto AdvanceCycle = [this]() {
486 if (Available.empty())
487 return true;
488 if (Available.size() == 1 && Pending.size() > 0)
489 return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) ||
490 getWeakLeft(SU: *Available.begin(), isTop: isTop()) != 0;
491 return false;
492 };
493 for (unsigned i = 0; AdvanceCycle(); ++i) {
494 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
495 "permanent hazard");
496 (void)i;
497 ResourceModel->reserveResources(SU: nullptr, IsTop: isTop());
498 bumpCycle();
499 releasePending();
500 }
501 if (Available.size() == 1)
502 return *Available.begin();
503 return nullptr;
504}
505
506#ifndef NDEBUG
507void ConvergingVLIWScheduler::traceCandidate(const char *Label,
508 const ReadyQueue &Q, SUnit *SU,
509 int Cost, PressureChange P) {
510 dbgs() << Label << " " << Q.getName() << " ";
511 if (P.isValid())
512 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
513 << P.getUnitInc() << " ";
514 else
515 dbgs() << " ";
516 dbgs() << "cost(" << Cost << ")\t";
517 DAG->dumpNode(*SU);
518}
519
520// Very detailed queue dump, to be used with higher verbosity levels.
521void ConvergingVLIWScheduler::readyQueueVerboseDump(
522 const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
523 ReadyQueue &Q) {
524 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
525
526 dbgs() << ">>> " << Q.getName() << "\n";
527 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
528 RegPressureDelta RPDelta;
529 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
530 DAG->getRegionCriticalPSets(),
531 DAG->getRegPressure().MaxSetPressure);
532 std::stringstream dbgstr;
533 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
534 dbgs() << dbgstr.str();
535 SchedulingCost(Q, *I, Candidate, RPDelta, true);
536 dbgs() << "\t";
537 (*I)->getInstr()->dump();
538 }
539 dbgs() << "\n";
540}
541#endif
542
543/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
544/// of SU, return true (we may have duplicates)
545static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
546 if (SU->NumPredsLeft == 0)
547 return false;
548
549 for (auto &Pred : SU->Preds) {
550 // We found an available, but not scheduled, predecessor.
551 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
552 return false;
553 }
554
555 return true;
556}
557
558/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
559/// of SU, return true (we may have duplicates)
560static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
561 if (SU->NumSuccsLeft == 0)
562 return false;
563
564 for (auto &Succ : SU->Succs) {
565 // We found an available, but not scheduled, successor.
566 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
567 return false;
568 }
569 return true;
570}
571
572/// Check if the instruction changes the register pressure of a register in the
573/// high pressure set. The function returns a negative value if the pressure
574/// decreases and a positive value is the pressure increases. If the instruction
575/// doesn't use a high pressure register or doesn't change the register
576/// pressure, then return 0.
577int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
578 PressureDiff &PD = DAG->getPressureDiff(SU);
579 for (const auto &P : PD) {
580 if (!P.isValid())
581 continue;
582 // The pressure differences are computed bottom-up, so the comparison for
583 // an increase is positive in the bottom direction, but negative in the
584 // top-down direction.
585 if (HighPressureSets[P.getPSet()])
586 return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
587 }
588 return 0;
589}
590
591/// Single point to compute overall scheduling cost.
592/// TODO: More heuristics will be used soon.
593int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
594 SchedCandidate &Candidate,
595 RegPressureDelta &Delta,
596 bool verbose) {
597 // Initial trivial priority.
598 int ResCount = 1;
599
600 // Do not waste time on a node that is already scheduled.
601 if (!SU || SU->isScheduled)
602 return ResCount;
603
604 LLVM_DEBUG(if (verbose) dbgs()
605 << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
606 // Forced priority is high.
607 if (SU->isScheduleHigh) {
608 ResCount += PriorityOne;
609 LLVM_DEBUG(dbgs() << "H|");
610 }
611
612 unsigned IsAvailableAmt = 0;
613 // Critical path first.
614 if (Q.getID() == TopQID) {
615 if (Top.isLatencyBound(SU)) {
616 LLVM_DEBUG(if (verbose) dbgs() << "LB|");
617 ResCount += (SU->getHeight() * ScaleTwo);
618 }
619
620 LLVM_DEBUG(if (verbose) {
621 std::stringstream dbgstr;
622 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
623 dbgs() << dbgstr.str();
624 });
625
626 // If resources are available for it, multiply the
627 // chance of scheduling.
628 if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) {
629 IsAvailableAmt = (PriorityTwo + PriorityThree);
630 ResCount += IsAvailableAmt;
631 LLVM_DEBUG(if (verbose) dbgs() << "A|");
632 } else
633 LLVM_DEBUG(if (verbose) dbgs() << " |");
634 } else {
635 if (Bot.isLatencyBound(SU)) {
636 LLVM_DEBUG(if (verbose) dbgs() << "LB|");
637 ResCount += (SU->getDepth() * ScaleTwo);
638 }
639
640 LLVM_DEBUG(if (verbose) {
641 std::stringstream dbgstr;
642 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
643 dbgs() << dbgstr.str();
644 });
645
646 // If resources are available for it, multiply the
647 // chance of scheduling.
648 if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) {
649 IsAvailableAmt = (PriorityTwo + PriorityThree);
650 ResCount += IsAvailableAmt;
651 LLVM_DEBUG(if (verbose) dbgs() << "A|");
652 } else
653 LLVM_DEBUG(if (verbose) dbgs() << " |");
654 }
655
656 unsigned NumNodesBlocking = 0;
657 if (Q.getID() == TopQID) {
658 // How many SUs does it block from scheduling?
659 // Look at all of the successors of this node.
660 // Count the number of nodes that
661 // this node is the sole unscheduled node for.
662 if (Top.isLatencyBound(SU))
663 for (const SDep &SI : SU->Succs)
664 if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU))
665 ++NumNodesBlocking;
666 } else {
667 // How many unscheduled predecessors block this node?
668 if (Bot.isLatencyBound(SU))
669 for (const SDep &PI : SU->Preds)
670 if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU))
671 ++NumNodesBlocking;
672 }
673 ResCount += (NumNodesBlocking * ScaleTwo);
674
675 LLVM_DEBUG(if (verbose) {
676 std::stringstream dbgstr;
677 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
678 dbgs() << dbgstr.str();
679 });
680
681 // Factor in reg pressure as a heuristic.
682 if (!IgnoreBBRegPressure) {
683 // Decrease priority by the amount that register pressure exceeds the limit.
684 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
685 // Decrease priority if register pressure exceeds the limit.
686 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
687 // Decrease priority slightly if register pressure would increase over the
688 // current maximum.
689 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
690 // If there are register pressure issues, then we remove the value added for
691 // the instruction being available. The rationale is that we really don't
692 // want to schedule an instruction that causes a spill.
693 if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > 0 &&
694 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
695 Delta.CurrentMax.getUnitInc()))
696 ResCount -= IsAvailableAmt;
697 LLVM_DEBUG(if (verbose) {
698 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
699 << Delta.CriticalMax.getUnitInc() << "/"
700 << Delta.CurrentMax.getUnitInc() << ")|";
701 });
702 }
703
704 // Give preference to a zero latency instruction if the dependent
705 // instruction is in the current packet.
706 if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == 0) {
707 for (const SDep &PI : SU->Preds) {
708 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
709 PI.getLatency() == 0 &&
710 Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
711 ResCount += PriorityThree;
712 LLVM_DEBUG(if (verbose) dbgs() << "Z|");
713 }
714 }
715 } else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == 0) {
716 for (const SDep &SI : SU->Succs) {
717 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
718 SI.getLatency() == 0 &&
719 Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
720 ResCount += PriorityThree;
721 LLVM_DEBUG(if (verbose) dbgs() << "Z|");
722 }
723 }
724 }
725
726 // If the instruction has a non-zero latency dependence with an instruction in
727 // the current packet, then it should not be scheduled yet. The case occurs
728 // when the dependent instruction is scheduled in a new packet, so the
729 // scheduler updates the current cycle and pending instructions become
730 // available.
731 if (CheckEarlyAvail) {
732 if (Q.getID() == TopQID) {
733 for (const auto &PI : SU->Preds) {
734 if (PI.getLatency() > 0 &&
735 Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
736 ResCount -= PriorityOne;
737 LLVM_DEBUG(if (verbose) dbgs() << "D|");
738 }
739 }
740 } else {
741 for (const auto &SI : SU->Succs) {
742 if (SI.getLatency() > 0 &&
743 Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
744 ResCount -= PriorityOne;
745 LLVM_DEBUG(if (verbose) dbgs() << "D|");
746 }
747 }
748 }
749 }
750
751 LLVM_DEBUG(if (verbose) {
752 std::stringstream dbgstr;
753 dbgstr << "Total " << std::setw(4) << ResCount << ")";
754 dbgs() << dbgstr.str();
755 });
756
757 return ResCount;
758}
759
760/// Pick the best candidate from the top queue.
761///
762/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
763/// DAG building. To adjust for the current scheduling location we need to
764/// maintain the number of vreg uses remaining to be top-scheduled.
765ConvergingVLIWScheduler::CandResult
766ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
767 const RegPressureTracker &RPTracker,
768 SchedCandidate &Candidate) {
769 ReadyQueue &Q = Zone.Available;
770 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
771 readyQueueVerboseDump(RPTracker, Candidate, Q);
772 else Q.dump(););
773
774 // getMaxPressureDelta temporarily modifies the tracker.
775 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
776
777 // BestSU remains NULL if no top candidates beat the best existing candidate.
778 CandResult FoundCandidate = NoCand;
779 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
780 RegPressureDelta RPDelta;
781 TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta,
782 CriticalPSets: DAG->getRegionCriticalPSets(),
783 MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
784
785 int CurrentCost = SchedulingCost(Q, SU: *I, Candidate, Delta&: RPDelta, verbose: false);
786
787 // Initialize the candidate if needed.
788 if (!Candidate.SU) {
789 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
790 Candidate.SU = *I;
791 Candidate.RPDelta = RPDelta;
792 Candidate.SCost = CurrentCost;
793 FoundCandidate = NodeOrder;
794 continue;
795 }
796
797 // Choose node order for negative cost candidates. There is no good
798 // candidate in this case.
799 if (CurrentCost < 0 && Candidate.SCost < 0) {
800 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
801 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
802 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
803 Candidate.SU = *I;
804 Candidate.RPDelta = RPDelta;
805 Candidate.SCost = CurrentCost;
806 FoundCandidate = NodeOrder;
807 }
808 continue;
809 }
810
811 // Best cost.
812 if (CurrentCost > Candidate.SCost) {
813 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
814 Candidate.SU = *I;
815 Candidate.RPDelta = RPDelta;
816 Candidate.SCost = CurrentCost;
817 FoundCandidate = BestCost;
818 continue;
819 }
820
821 // Choose an instruction that does not depend on an artificial edge.
822 unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID));
823 unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID));
824 if (CurrWeak != CandWeak) {
825 if (CurrWeak < CandWeak) {
826 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
827 Candidate.SU = *I;
828 Candidate.RPDelta = RPDelta;
829 Candidate.SCost = CurrentCost;
830 FoundCandidate = Weak;
831 }
832 continue;
833 }
834
835 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) {
836 unsigned CurrSize, CandSize;
837 if (Q.getID() == TopQID) {
838 CurrSize = (*I)->Succs.size();
839 CandSize = Candidate.SU->Succs.size();
840 } else {
841 CurrSize = (*I)->Preds.size();
842 CandSize = Candidate.SU->Preds.size();
843 }
844 if (CurrSize > CandSize) {
845 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
846 Candidate.SU = *I;
847 Candidate.RPDelta = RPDelta;
848 Candidate.SCost = CurrentCost;
849 FoundCandidate = BestCost;
850 }
851 // Keep the old candidate if it's a better candidate. That is, don't use
852 // the subsequent tie breaker.
853 if (CurrSize != CandSize)
854 continue;
855 }
856
857 // Tie breaker.
858 // To avoid scheduling indeterminism, we need a tie breaker
859 // for the case when cost is identical for two nodes.
860 if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
861 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
862 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
863 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
864 Candidate.SU = *I;
865 Candidate.RPDelta = RPDelta;
866 Candidate.SCost = CurrentCost;
867 FoundCandidate = NodeOrder;
868 continue;
869 }
870 }
871
872 // Fall through to original instruction order.
873 // Only consider node order if Candidate was chosen from this Q.
874 if (FoundCandidate == NoCand)
875 continue;
876 }
877 return FoundCandidate;
878}
879
880/// Pick the best candidate node from either the top or bottom queue.
881SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
882 // Schedule as far as possible in the direction of no choice. This is most
883 // efficient, but also provides the best heuristics for CriticalPSets.
884 if (SUnit *SU = Bot.pickOnlyChoice()) {
885 LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
886 IsTopNode = false;
887 return SU;
888 }
889 if (SUnit *SU = Top.pickOnlyChoice()) {
890 LLVM_DEBUG(dbgs() << "Picked only Top\n");
891 IsTopNode = true;
892 return SU;
893 }
894 SchedCandidate BotCand;
895 // Prefer bottom scheduling when heuristics are silent.
896 CandResult BotResult =
897 pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
898 assert(BotResult != NoCand && "failed to find the first candidate");
899
900 // If either Q has a single candidate that provides the least increase in
901 // Excess pressure, we can immediately schedule from that Q.
902 //
903 // RegionCriticalPSets summarizes the pressure within the scheduled region and
904 // affects picking from either Q. If scheduling in one direction must
905 // increase pressure for one of the excess PSets, then schedule in that
906 // direction first to provide more freedom in the other direction.
907 if (BotResult == SingleExcess || BotResult == SingleCritical) {
908 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
909 IsTopNode = false;
910 return BotCand.SU;
911 }
912 // Check if the top Q has a better candidate.
913 SchedCandidate TopCand;
914 CandResult TopResult =
915 pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
916 assert(TopResult != NoCand && "failed to find the first candidate");
917
918 if (TopResult == SingleExcess || TopResult == SingleCritical) {
919 LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
920 IsTopNode = true;
921 return TopCand.SU;
922 }
923 // If either Q has a single candidate that minimizes pressure above the
924 // original region's pressure pick it.
925 if (BotResult == SingleMax) {
926 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
927 IsTopNode = false;
928 return BotCand.SU;
929 }
930 if (TopResult == SingleMax) {
931 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
932 IsTopNode = true;
933 return TopCand.SU;
934 }
935 if (TopCand.SCost > BotCand.SCost) {
936 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
937 IsTopNode = true;
938 return TopCand.SU;
939 }
940 // Otherwise prefer the bottom candidate in node order.
941 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
942 IsTopNode = false;
943 return BotCand.SU;
944}
945
946/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
947SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
948 if (DAG->top() == DAG->bottom()) {
949 assert(Top.Available.empty() && Top.Pending.empty() &&
950 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
951 return nullptr;
952 }
953 SUnit *SU;
954 if (PreRADirection == MISched::TopDown) {
955 SU = Top.pickOnlyChoice();
956 if (!SU) {
957 SchedCandidate TopCand;
958 CandResult TopResult =
959 pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
960 assert(TopResult != NoCand && "failed to find the first candidate");
961 (void)TopResult;
962 SU = TopCand.SU;
963 }
964 IsTopNode = true;
965 } else if (PreRADirection == MISched::BottomUp) {
966 SU = Bot.pickOnlyChoice();
967 if (!SU) {
968 SchedCandidate BotCand;
969 CandResult BotResult =
970 pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
971 assert(BotResult != NoCand && "failed to find the first candidate");
972 (void)BotResult;
973 SU = BotCand.SU;
974 }
975 IsTopNode = false;
976 } else {
977 SU = pickNodeBidrectional(IsTopNode);
978 }
979 if (SU->isTopReady())
980 Top.removeReady(SU);
981 if (SU->isBottomReady())
982 Bot.removeReady(SU);
983
984 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
985 << " Scheduling instruction in cycle "
986 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
987 << reportPackets() << ")\n";
988 DAG->dumpNode(*SU));
989 return SU;
990}
991
992/// Update the scheduler's state after scheduling a node. This is the same node
993/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
994/// to update it's state based on the current cycle before MachineSchedStrategy
995/// does.
996void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
997 if (IsTopNode) {
998 Top.bumpNode(SU);
999 SU->TopReadyCycle = Top.CurrCycle;
1000 } else {
1001 Bot.bumpNode(SU);
1002 SU->BotReadyCycle = Bot.CurrCycle;
1003 }
1004}
1005