1 | //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // MachineScheduler schedules machine instructions after phi elimination. It |
10 | // preserves LiveIntervals so it can be invoked before register allocation. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/VLIWMachineScheduler.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/CodeGen/DFAPacketizer.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineInstr.h" |
20 | #include "llvm/CodeGen/MachineLoopInfo.h" |
21 | #include "llvm/CodeGen/RegisterClassInfo.h" |
22 | #include "llvm/CodeGen/RegisterPressure.h" |
23 | #include "llvm/CodeGen/ScheduleDAG.h" |
24 | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/CodeGen/TargetOpcodes.h" |
27 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
28 | #include "llvm/CodeGen/TargetSchedule.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/Support/CommandLine.h" |
31 | #include "llvm/Support/Debug.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <algorithm> |
34 | #include <cassert> |
35 | #include <iomanip> |
36 | #include <limits> |
37 | #include <memory> |
38 | #include <sstream> |
39 | |
40 | using namespace llvm; |
41 | |
42 | #define DEBUG_TYPE "machine-scheduler" |
43 | |
44 | static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure" , cl::Hidden, |
45 | cl::init(Val: false)); |
46 | |
47 | static cl::opt<bool> UseNewerCandidate("use-newer-candidate" , cl::Hidden, |
48 | cl::init(Val: true)); |
49 | |
50 | static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level" , |
51 | cl::Hidden, cl::init(Val: 1)); |
52 | |
53 | // Check if the scheduler should penalize instructions that are available to |
54 | // early due to a zero-latency dependence. |
55 | static cl::opt<bool> CheckEarlyAvail("check-early-avail" , cl::Hidden, |
56 | cl::init(Val: true)); |
57 | |
58 | // This value is used to determine if a register class is a high pressure set. |
59 | // We compute the maximum number of registers needed and divided by the total |
60 | // available. Then, we compare the result to this value. |
61 | static cl::opt<float> RPThreshold("vliw-misched-reg-pressure" , cl::Hidden, |
62 | cl::init(Val: 0.75f), |
63 | cl::desc("High register pressure threhold." )); |
64 | |
65 | VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, |
66 | const TargetSchedModel *SM) |
67 | : TII(STI.getInstrInfo()), SchedModel(SM) { |
68 | ResourcesModel = createPacketizer(STI); |
69 | |
70 | // This hard requirement could be relaxed, |
71 | // but for now do not let it proceed. |
72 | assert(ResourcesModel && "Unimplemented CreateTargetScheduleState." ); |
73 | |
74 | Packet.reserve(N: SchedModel->getIssueWidth()); |
75 | Packet.clear(); |
76 | ResourcesModel->clearResources(); |
77 | } |
78 | |
79 | void VLIWResourceModel::reset() { |
80 | Packet.clear(); |
81 | ResourcesModel->clearResources(); |
82 | } |
83 | |
84 | VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } |
85 | |
86 | /// Return true if there is a dependence between SUd and SUu. |
87 | bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { |
88 | if (SUd->Succs.size() == 0) |
89 | return false; |
90 | |
91 | for (const auto &S : SUd->Succs) { |
92 | // Since we do not add pseudos to packets, might as well |
93 | // ignore order dependencies. |
94 | if (S.isCtrl()) |
95 | continue; |
96 | |
97 | if (S.getSUnit() == SUu && S.getLatency() > 0) |
98 | return true; |
99 | } |
100 | return false; |
101 | } |
102 | |
103 | /// Check if scheduling of this SU is possible |
104 | /// in the current packet. |
105 | /// It is _not_ precise (statefull), it is more like |
106 | /// another heuristic. Many corner cases are figured |
107 | /// empirically. |
108 | bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { |
109 | if (!SU || !SU->getInstr()) |
110 | return false; |
111 | |
112 | // First see if the pipeline could receive this instruction |
113 | // in the current cycle. |
114 | switch (SU->getInstr()->getOpcode()) { |
115 | default: |
116 | if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr())) |
117 | return false; |
118 | break; |
119 | case TargetOpcode::EXTRACT_SUBREG: |
120 | case TargetOpcode::INSERT_SUBREG: |
121 | case TargetOpcode::SUBREG_TO_REG: |
122 | case TargetOpcode::REG_SEQUENCE: |
123 | case TargetOpcode::IMPLICIT_DEF: |
124 | case TargetOpcode::COPY: |
125 | case TargetOpcode::INLINEASM: |
126 | case TargetOpcode::INLINEASM_BR: |
127 | break; |
128 | } |
129 | |
130 | // Now see if there are no other dependencies to instructions already |
131 | // in the packet. |
132 | if (IsTop) { |
133 | for (const SUnit *U : Packet) |
134 | if (hasDependence(SUd: U, SUu: SU)) |
135 | return false; |
136 | } else { |
137 | for (const SUnit *U : Packet) |
138 | if (hasDependence(SUd: SU, SUu: U)) |
139 | return false; |
140 | } |
141 | return true; |
142 | } |
143 | |
144 | /// Keep track of available resources. |
145 | bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { |
146 | bool startNewCycle = false; |
147 | // Artificially reset state. |
148 | if (!SU) { |
149 | reset(); |
150 | TotalPackets++; |
151 | return false; |
152 | } |
153 | // If this SU does not fit in the packet or the packet is now full |
154 | // start a new one. |
155 | if (!isResourceAvailable(SU, IsTop) || |
156 | Packet.size() >= SchedModel->getIssueWidth()) { |
157 | reset(); |
158 | TotalPackets++; |
159 | startNewCycle = true; |
160 | } |
161 | |
162 | switch (SU->getInstr()->getOpcode()) { |
163 | default: |
164 | ResourcesModel->reserveResources(MI&: *SU->getInstr()); |
165 | break; |
166 | case TargetOpcode::EXTRACT_SUBREG: |
167 | case TargetOpcode::INSERT_SUBREG: |
168 | case TargetOpcode::SUBREG_TO_REG: |
169 | case TargetOpcode::REG_SEQUENCE: |
170 | case TargetOpcode::IMPLICIT_DEF: |
171 | case TargetOpcode::KILL: |
172 | case TargetOpcode::CFI_INSTRUCTION: |
173 | case TargetOpcode::EH_LABEL: |
174 | case TargetOpcode::COPY: |
175 | case TargetOpcode::INLINEASM: |
176 | case TargetOpcode::INLINEASM_BR: |
177 | break; |
178 | } |
179 | Packet.push_back(Elt: SU); |
180 | |
181 | #ifndef NDEBUG |
182 | LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n" ); |
183 | for (unsigned i = 0, e = Packet.size(); i != e; ++i) { |
184 | LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(" ); |
185 | LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t" ); |
186 | LLVM_DEBUG(Packet[i]->getInstr()->dump()); |
187 | } |
188 | #endif |
189 | |
190 | return startNewCycle; |
191 | } |
192 | |
193 | DFAPacketizer * |
194 | VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { |
195 | return STI.getInstrInfo()->CreateTargetScheduleState(STI); |
196 | } |
197 | |
198 | /// schedule - Called back from MachineScheduler::runOnMachineFunction |
199 | /// after setting up the current scheduling region. [RegionBegin, RegionEnd) |
200 | /// only includes instructions that have DAG nodes, not scheduling boundaries. |
201 | void VLIWMachineScheduler::schedule() { |
202 | LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " |
203 | << printMBBReference(*BB) << " " << BB->getName() |
204 | << " in_func " << BB->getParent()->getName() |
205 | << " at loop depth " << MLI->getLoopDepth(BB) << " \n" ); |
206 | |
207 | buildDAGWithRegPressure(); |
208 | |
209 | Topo.InitDAGTopologicalSorting(); |
210 | |
211 | // Postprocess the DAG to add platform-specific artificial dependencies. |
212 | postProcessDAG(); |
213 | |
214 | SmallVector<SUnit *, 8> TopRoots, BotRoots; |
215 | findRootsAndBiasEdges(TopRoots, BotRoots); |
216 | |
217 | // Initialize the strategy before modifying the DAG. |
218 | SchedImpl->initialize(DAG: this); |
219 | |
220 | LLVM_DEBUG({ |
221 | unsigned maxH = 0; |
222 | for (const SUnit &SU : SUnits) |
223 | if (SU.getHeight() > maxH) |
224 | maxH = SU.getHeight(); |
225 | dbgs() << "Max Height " << maxH << "\n" ; |
226 | }); |
227 | LLVM_DEBUG({ |
228 | unsigned maxD = 0; |
229 | for (const SUnit &SU : SUnits) |
230 | if (SU.getDepth() > maxD) |
231 | maxD = SU.getDepth(); |
232 | dbgs() << "Max Depth " << maxD << "\n" ; |
233 | }); |
234 | LLVM_DEBUG(dump()); |
235 | if (ViewMISchedDAGs) |
236 | viewGraph(); |
237 | |
238 | initQueues(TopRoots, BotRoots); |
239 | |
240 | bool IsTopNode = false; |
241 | while (true) { |
242 | LLVM_DEBUG( |
243 | dbgs() << "** VLIWMachineScheduler::schedule picking next node\n" ); |
244 | SUnit *SU = SchedImpl->pickNode(IsTopNode); |
245 | if (!SU) |
246 | break; |
247 | |
248 | if (!checkSchedLimit()) |
249 | break; |
250 | |
251 | scheduleMI(SU, IsTopNode); |
252 | |
253 | // Notify the scheduling strategy after updating the DAG. |
254 | SchedImpl->schedNode(SU, IsTopNode); |
255 | |
256 | updateQueues(SU, IsTopNode); |
257 | } |
258 | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone." ); |
259 | |
260 | placeDebugValues(); |
261 | |
262 | LLVM_DEBUG({ |
263 | dbgs() << "*** Final schedule for " |
264 | << printMBBReference(*begin()->getParent()) << " ***\n" ; |
265 | dumpSchedule(); |
266 | dbgs() << '\n'; |
267 | }); |
268 | } |
269 | |
270 | void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { |
271 | DAG = static_cast<VLIWMachineScheduler *>(dag); |
272 | SchedModel = DAG->getSchedModel(); |
273 | |
274 | Top.init(dag: DAG, smodel: SchedModel); |
275 | Bot.init(dag: DAG, smodel: SchedModel); |
276 | |
277 | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or |
278 | // are disabled, then these HazardRecs will be disabled. |
279 | const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); |
280 | const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); |
281 | const TargetInstrInfo *TII = STI.getInstrInfo(); |
282 | delete Top.HazardRec; |
283 | delete Bot.HazardRec; |
284 | Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); |
285 | Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); |
286 | |
287 | delete Top.ResourceModel; |
288 | delete Bot.ResourceModel; |
289 | Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel()); |
290 | Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel()); |
291 | |
292 | const std::vector<unsigned> &MaxPressure = |
293 | DAG->getRegPressure().MaxSetPressure; |
294 | HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false); |
295 | for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { |
296 | unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i); |
297 | HighPressureSets[i] = |
298 | ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); |
299 | } |
300 | |
301 | assert((!ForceTopDown || !ForceBottomUp) && |
302 | "-misched-topdown incompatible with -misched-bottomup" ); |
303 | } |
304 | |
305 | VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( |
306 | const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { |
307 | return new VLIWResourceModel(STI, SchedModel); |
308 | } |
309 | |
310 | void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { |
311 | for (const SDep &PI : SU->Preds) { |
312 | unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; |
313 | unsigned MinLatency = PI.getLatency(); |
314 | #ifndef NDEBUG |
315 | Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); |
316 | #endif |
317 | if (SU->TopReadyCycle < PredReadyCycle + MinLatency) |
318 | SU->TopReadyCycle = PredReadyCycle + MinLatency; |
319 | } |
320 | |
321 | if (!SU->isScheduled) |
322 | Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle); |
323 | } |
324 | |
325 | void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { |
326 | assert(SU->getInstr() && "Scheduled SUnit must have instr" ); |
327 | |
328 | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; |
329 | ++I) { |
330 | unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; |
331 | unsigned MinLatency = I->getLatency(); |
332 | #ifndef NDEBUG |
333 | Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); |
334 | #endif |
335 | if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) |
336 | SU->BotReadyCycle = SuccReadyCycle + MinLatency; |
337 | } |
338 | |
339 | if (!SU->isScheduled) |
340 | Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle); |
341 | } |
342 | |
343 | ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { |
344 | delete ResourceModel; |
345 | delete HazardRec; |
346 | } |
347 | |
348 | /// Does this SU have a hazard within the current instruction group. |
349 | /// |
350 | /// The scheduler supports two modes of hazard recognition. The first is the |
351 | /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that |
352 | /// supports highly complicated in-order reservation tables |
353 | /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. |
354 | /// |
355 | /// The second is a streamlined mechanism that checks for hazards based on |
356 | /// simple counters that the scheduler itself maintains. It explicitly checks |
357 | /// for instruction dispatch limitations, including the number of micro-ops that |
358 | /// can dispatch per cycle. |
359 | /// |
360 | /// TODO: Also check whether the SU must start a new group. |
361 | bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { |
362 | if (HazardRec->isEnabled()) |
363 | return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; |
364 | |
365 | unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr()); |
366 | if (IssueCount + uops > SchedModel->getIssueWidth()) |
367 | return true; |
368 | |
369 | return false; |
370 | } |
371 | |
372 | void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( |
373 | SUnit *SU, unsigned ReadyCycle) { |
374 | if (ReadyCycle < MinReadyCycle) |
375 | MinReadyCycle = ReadyCycle; |
376 | |
377 | // Check for interlocks first. For the purpose of other heuristics, an |
378 | // instruction that cannot issue appears as if it's not in the ReadyQueue. |
379 | if (ReadyCycle > CurrCycle || checkHazard(SU)) |
380 | |
381 | Pending.push(SU); |
382 | else |
383 | Available.push(SU); |
384 | } |
385 | |
386 | /// Move the boundary of scheduled code by one cycle. |
387 | void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { |
388 | unsigned Width = SchedModel->getIssueWidth(); |
389 | IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; |
390 | |
391 | assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && |
392 | "MinReadyCycle uninitialized" ); |
393 | unsigned NextCycle = std::max(a: CurrCycle + 1, b: MinReadyCycle); |
394 | |
395 | if (!HazardRec->isEnabled()) { |
396 | // Bypass HazardRec virtual calls. |
397 | CurrCycle = NextCycle; |
398 | } else { |
399 | // Bypass getHazardType calls in case of long latency. |
400 | for (; CurrCycle != NextCycle; ++CurrCycle) { |
401 | if (isTop()) |
402 | HazardRec->AdvanceCycle(); |
403 | else |
404 | HazardRec->RecedeCycle(); |
405 | } |
406 | } |
407 | CheckPending = true; |
408 | |
409 | LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " |
410 | << CurrCycle << '\n'); |
411 | } |
412 | |
413 | /// Move the boundary of scheduled code by one SUnit. |
414 | void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { |
415 | bool startNewCycle = false; |
416 | |
417 | // Update the reservation table. |
418 | if (HazardRec->isEnabled()) { |
419 | if (!isTop() && SU->isCall) { |
420 | // Calls are scheduled with their preceding instructions. For bottom-up |
421 | // scheduling, clear the pipeline state before emitting. |
422 | HazardRec->Reset(); |
423 | } |
424 | HazardRec->EmitInstruction(SU); |
425 | } |
426 | |
427 | // Update DFA model. |
428 | startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop()); |
429 | |
430 | // Check the instruction group dispatch limit. |
431 | // TODO: Check if this SU must end a dispatch group. |
432 | IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr()); |
433 | if (startNewCycle) { |
434 | LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); |
435 | bumpCycle(); |
436 | } else |
437 | LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " |
438 | << CurrCycle << '\n'); |
439 | } |
440 | |
441 | /// Release pending ready nodes in to the available queue. This makes them |
442 | /// visible to heuristics. |
443 | void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { |
444 | // If the available queue is empty, it is safe to reset MinReadyCycle. |
445 | if (Available.empty()) |
446 | MinReadyCycle = std::numeric_limits<unsigned>::max(); |
447 | |
448 | // Check to see if any of the pending instructions are ready to issue. If |
449 | // so, add them to the available queue. |
450 | for (unsigned i = 0, e = Pending.size(); i != e; ++i) { |
451 | SUnit *SU = *(Pending.begin() + i); |
452 | unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; |
453 | |
454 | if (ReadyCycle < MinReadyCycle) |
455 | MinReadyCycle = ReadyCycle; |
456 | |
457 | if (ReadyCycle > CurrCycle) |
458 | continue; |
459 | |
460 | if (checkHazard(SU)) |
461 | continue; |
462 | |
463 | Available.push(SU); |
464 | Pending.remove(I: Pending.begin() + i); |
465 | --i; |
466 | --e; |
467 | } |
468 | CheckPending = false; |
469 | } |
470 | |
471 | /// Remove SU from the ready set for this boundary. |
472 | void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { |
473 | if (Available.isInQueue(SU)) |
474 | Available.remove(I: Available.find(SU)); |
475 | else { |
476 | assert(Pending.isInQueue(SU) && "bad ready count" ); |
477 | Pending.remove(I: Pending.find(SU)); |
478 | } |
479 | } |
480 | |
481 | /// If this queue only has one ready candidate, return it. As a side effect, |
482 | /// advance the cycle until at least one node is ready. If multiple instructions |
483 | /// are ready, return NULL. |
484 | SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { |
485 | if (CheckPending) |
486 | releasePending(); |
487 | |
488 | auto AdvanceCycle = [this]() { |
489 | if (Available.empty()) |
490 | return true; |
491 | if (Available.size() == 1 && Pending.size() > 0) |
492 | return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) || |
493 | getWeakLeft(SU: *Available.begin(), isTop: isTop()) != 0; |
494 | return false; |
495 | }; |
496 | for (unsigned i = 0; AdvanceCycle(); ++i) { |
497 | assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && |
498 | "permanent hazard" ); |
499 | (void)i; |
500 | ResourceModel->reserveResources(SU: nullptr, IsTop: isTop()); |
501 | bumpCycle(); |
502 | releasePending(); |
503 | } |
504 | if (Available.size() == 1) |
505 | return *Available.begin(); |
506 | return nullptr; |
507 | } |
508 | |
509 | #ifndef NDEBUG |
510 | void ConvergingVLIWScheduler::traceCandidate(const char *Label, |
511 | const ReadyQueue &Q, SUnit *SU, |
512 | int Cost, PressureChange P) { |
513 | dbgs() << Label << " " << Q.getName() << " " ; |
514 | if (P.isValid()) |
515 | dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" |
516 | << P.getUnitInc() << " " ; |
517 | else |
518 | dbgs() << " " ; |
519 | dbgs() << "cost(" << Cost << ")\t" ; |
520 | DAG->dumpNode(*SU); |
521 | } |
522 | |
523 | // Very detailed queue dump, to be used with higher verbosity levels. |
524 | void ConvergingVLIWScheduler::readyQueueVerboseDump( |
525 | const RegPressureTracker &RPTracker, SchedCandidate &Candidate, |
526 | ReadyQueue &Q) { |
527 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); |
528 | |
529 | dbgs() << ">>> " << Q.getName() << "\n" ; |
530 | for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { |
531 | RegPressureDelta RPDelta; |
532 | TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, |
533 | DAG->getRegionCriticalPSets(), |
534 | DAG->getRegPressure().MaxSetPressure); |
535 | std::stringstream dbgstr; |
536 | dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")" ; |
537 | dbgs() << dbgstr.str(); |
538 | SchedulingCost(Q, *I, Candidate, RPDelta, true); |
539 | dbgs() << "\t" ; |
540 | (*I)->getInstr()->dump(); |
541 | } |
542 | dbgs() << "\n" ; |
543 | } |
544 | #endif |
545 | |
546 | /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor |
547 | /// of SU, return true (we may have duplicates) |
548 | static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { |
549 | if (SU->NumPredsLeft == 0) |
550 | return false; |
551 | |
552 | for (auto &Pred : SU->Preds) { |
553 | // We found an available, but not scheduled, predecessor. |
554 | if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) |
555 | return false; |
556 | } |
557 | |
558 | return true; |
559 | } |
560 | |
561 | /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor |
562 | /// of SU, return true (we may have duplicates) |
563 | static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { |
564 | if (SU->NumSuccsLeft == 0) |
565 | return false; |
566 | |
567 | for (auto &Succ : SU->Succs) { |
568 | // We found an available, but not scheduled, successor. |
569 | if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) |
570 | return false; |
571 | } |
572 | return true; |
573 | } |
574 | |
575 | /// Check if the instruction changes the register pressure of a register in the |
576 | /// high pressure set. The function returns a negative value if the pressure |
577 | /// decreases and a positive value is the pressure increases. If the instruction |
578 | /// doesn't use a high pressure register or doesn't change the register |
579 | /// pressure, then return 0. |
580 | int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { |
581 | PressureDiff &PD = DAG->getPressureDiff(SU); |
582 | for (const auto &P : PD) { |
583 | if (!P.isValid()) |
584 | continue; |
585 | // The pressure differences are computed bottom-up, so the comparison for |
586 | // an increase is positive in the bottom direction, but negative in the |
587 | // top-down direction. |
588 | if (HighPressureSets[P.getPSet()]) |
589 | return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); |
590 | } |
591 | return 0; |
592 | } |
593 | |
594 | /// Single point to compute overall scheduling cost. |
595 | /// TODO: More heuristics will be used soon. |
596 | int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, |
597 | SchedCandidate &Candidate, |
598 | RegPressureDelta &Delta, |
599 | bool verbose) { |
600 | // Initial trivial priority. |
601 | int ResCount = 1; |
602 | |
603 | // Do not waste time on a node that is already scheduled. |
604 | if (!SU || SU->isScheduled) |
605 | return ResCount; |
606 | |
607 | LLVM_DEBUG(if (verbose) dbgs() |
608 | << ((Q.getID() == TopQID) ? "(top|" : "(bot|" )); |
609 | // Forced priority is high. |
610 | if (SU->isScheduleHigh) { |
611 | ResCount += PriorityOne; |
612 | LLVM_DEBUG(dbgs() << "H|" ); |
613 | } |
614 | |
615 | unsigned IsAvailableAmt = 0; |
616 | // Critical path first. |
617 | if (Q.getID() == TopQID) { |
618 | if (Top.isLatencyBound(SU)) { |
619 | LLVM_DEBUG(if (verbose) dbgs() << "LB|" ); |
620 | ResCount += (SU->getHeight() * ScaleTwo); |
621 | } |
622 | |
623 | LLVM_DEBUG(if (verbose) { |
624 | std::stringstream dbgstr; |
625 | dbgstr << "h" << std::setw(3) << SU->getHeight() << "|" ; |
626 | dbgs() << dbgstr.str(); |
627 | }); |
628 | |
629 | // If resources are available for it, multiply the |
630 | // chance of scheduling. |
631 | if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) { |
632 | IsAvailableAmt = (PriorityTwo + PriorityThree); |
633 | ResCount += IsAvailableAmt; |
634 | LLVM_DEBUG(if (verbose) dbgs() << "A|" ); |
635 | } else |
636 | LLVM_DEBUG(if (verbose) dbgs() << " |" ); |
637 | } else { |
638 | if (Bot.isLatencyBound(SU)) { |
639 | LLVM_DEBUG(if (verbose) dbgs() << "LB|" ); |
640 | ResCount += (SU->getDepth() * ScaleTwo); |
641 | } |
642 | |
643 | LLVM_DEBUG(if (verbose) { |
644 | std::stringstream dbgstr; |
645 | dbgstr << "d" << std::setw(3) << SU->getDepth() << "|" ; |
646 | dbgs() << dbgstr.str(); |
647 | }); |
648 | |
649 | // If resources are available for it, multiply the |
650 | // chance of scheduling. |
651 | if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) { |
652 | IsAvailableAmt = (PriorityTwo + PriorityThree); |
653 | ResCount += IsAvailableAmt; |
654 | LLVM_DEBUG(if (verbose) dbgs() << "A|" ); |
655 | } else |
656 | LLVM_DEBUG(if (verbose) dbgs() << " |" ); |
657 | } |
658 | |
659 | unsigned NumNodesBlocking = 0; |
660 | if (Q.getID() == TopQID) { |
661 | // How many SUs does it block from scheduling? |
662 | // Look at all of the successors of this node. |
663 | // Count the number of nodes that |
664 | // this node is the sole unscheduled node for. |
665 | if (Top.isLatencyBound(SU)) |
666 | for (const SDep &SI : SU->Succs) |
667 | if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU)) |
668 | ++NumNodesBlocking; |
669 | } else { |
670 | // How many unscheduled predecessors block this node? |
671 | if (Bot.isLatencyBound(SU)) |
672 | for (const SDep &PI : SU->Preds) |
673 | if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU)) |
674 | ++NumNodesBlocking; |
675 | } |
676 | ResCount += (NumNodesBlocking * ScaleTwo); |
677 | |
678 | LLVM_DEBUG(if (verbose) { |
679 | std::stringstream dbgstr; |
680 | dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|" ; |
681 | dbgs() << dbgstr.str(); |
682 | }); |
683 | |
684 | // Factor in reg pressure as a heuristic. |
685 | if (!IgnoreBBRegPressure) { |
686 | // Decrease priority by the amount that register pressure exceeds the limit. |
687 | ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); |
688 | // Decrease priority if register pressure exceeds the limit. |
689 | ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); |
690 | // Decrease priority slightly if register pressure would increase over the |
691 | // current maximum. |
692 | ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); |
693 | // If there are register pressure issues, then we remove the value added for |
694 | // the instruction being available. The rationale is that we really don't |
695 | // want to schedule an instruction that causes a spill. |
696 | if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > 0 && |
697 | (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || |
698 | Delta.CurrentMax.getUnitInc())) |
699 | ResCount -= IsAvailableAmt; |
700 | LLVM_DEBUG(if (verbose) { |
701 | dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" |
702 | << Delta.CriticalMax.getUnitInc() << "/" |
703 | << Delta.CurrentMax.getUnitInc() << ")|" ; |
704 | }); |
705 | } |
706 | |
707 | // Give preference to a zero latency instruction if the dependent |
708 | // instruction is in the current packet. |
709 | if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == 0) { |
710 | for (const SDep &PI : SU->Preds) { |
711 | if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && |
712 | PI.getLatency() == 0 && |
713 | Top.ResourceModel->isInPacket(SU: PI.getSUnit())) { |
714 | ResCount += PriorityThree; |
715 | LLVM_DEBUG(if (verbose) dbgs() << "Z|" ); |
716 | } |
717 | } |
718 | } else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == 0) { |
719 | for (const SDep &SI : SU->Succs) { |
720 | if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && |
721 | SI.getLatency() == 0 && |
722 | Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) { |
723 | ResCount += PriorityThree; |
724 | LLVM_DEBUG(if (verbose) dbgs() << "Z|" ); |
725 | } |
726 | } |
727 | } |
728 | |
729 | // If the instruction has a non-zero latency dependence with an instruction in |
730 | // the current packet, then it should not be scheduled yet. The case occurs |
731 | // when the dependent instruction is scheduled in a new packet, so the |
732 | // scheduler updates the current cycle and pending instructions become |
733 | // available. |
734 | if (CheckEarlyAvail) { |
735 | if (Q.getID() == TopQID) { |
736 | for (const auto &PI : SU->Preds) { |
737 | if (PI.getLatency() > 0 && |
738 | Top.ResourceModel->isInPacket(SU: PI.getSUnit())) { |
739 | ResCount -= PriorityOne; |
740 | LLVM_DEBUG(if (verbose) dbgs() << "D|" ); |
741 | } |
742 | } |
743 | } else { |
744 | for (const auto &SI : SU->Succs) { |
745 | if (SI.getLatency() > 0 && |
746 | Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) { |
747 | ResCount -= PriorityOne; |
748 | LLVM_DEBUG(if (verbose) dbgs() << "D|" ); |
749 | } |
750 | } |
751 | } |
752 | } |
753 | |
754 | LLVM_DEBUG(if (verbose) { |
755 | std::stringstream dbgstr; |
756 | dbgstr << "Total " << std::setw(4) << ResCount << ")" ; |
757 | dbgs() << dbgstr.str(); |
758 | }); |
759 | |
760 | return ResCount; |
761 | } |
762 | |
763 | /// Pick the best candidate from the top queue. |
764 | /// |
765 | /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during |
766 | /// DAG building. To adjust for the current scheduling location we need to |
767 | /// maintain the number of vreg uses remaining to be top-scheduled. |
768 | ConvergingVLIWScheduler::CandResult |
769 | ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, |
770 | const RegPressureTracker &RPTracker, |
771 | SchedCandidate &Candidate) { |
772 | ReadyQueue &Q = Zone.Available; |
773 | LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) |
774 | readyQueueVerboseDump(RPTracker, Candidate, Q); |
775 | else Q.dump();); |
776 | |
777 | // getMaxPressureDelta temporarily modifies the tracker. |
778 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); |
779 | |
780 | // BestSU remains NULL if no top candidates beat the best existing candidate. |
781 | CandResult FoundCandidate = NoCand; |
782 | for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { |
783 | RegPressureDelta RPDelta; |
784 | TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta, |
785 | CriticalPSets: DAG->getRegionCriticalPSets(), |
786 | MaxPressureLimit: DAG->getRegPressure().MaxSetPressure); |
787 | |
788 | int CurrentCost = SchedulingCost(Q, SU: *I, Candidate, Delta&: RPDelta, verbose: false); |
789 | |
790 | // Initialize the candidate if needed. |
791 | if (!Candidate.SU) { |
792 | LLVM_DEBUG(traceCandidate("DCAND" , Q, *I, CurrentCost)); |
793 | Candidate.SU = *I; |
794 | Candidate.RPDelta = RPDelta; |
795 | Candidate.SCost = CurrentCost; |
796 | FoundCandidate = NodeOrder; |
797 | continue; |
798 | } |
799 | |
800 | // Choose node order for negative cost candidates. There is no good |
801 | // candidate in this case. |
802 | if (CurrentCost < 0 && Candidate.SCost < 0) { |
803 | if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || |
804 | (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { |
805 | LLVM_DEBUG(traceCandidate("NCAND" , Q, *I, CurrentCost)); |
806 | Candidate.SU = *I; |
807 | Candidate.RPDelta = RPDelta; |
808 | Candidate.SCost = CurrentCost; |
809 | FoundCandidate = NodeOrder; |
810 | } |
811 | continue; |
812 | } |
813 | |
814 | // Best cost. |
815 | if (CurrentCost > Candidate.SCost) { |
816 | LLVM_DEBUG(traceCandidate("CCAND" , Q, *I, CurrentCost)); |
817 | Candidate.SU = *I; |
818 | Candidate.RPDelta = RPDelta; |
819 | Candidate.SCost = CurrentCost; |
820 | FoundCandidate = BestCost; |
821 | continue; |
822 | } |
823 | |
824 | // Choose an instruction that does not depend on an artificial edge. |
825 | unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID)); |
826 | unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID)); |
827 | if (CurrWeak != CandWeak) { |
828 | if (CurrWeak < CandWeak) { |
829 | LLVM_DEBUG(traceCandidate("WCAND" , Q, *I, CurrentCost)); |
830 | Candidate.SU = *I; |
831 | Candidate.RPDelta = RPDelta; |
832 | Candidate.SCost = CurrentCost; |
833 | FoundCandidate = Weak; |
834 | } |
835 | continue; |
836 | } |
837 | |
838 | if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) { |
839 | unsigned CurrSize, CandSize; |
840 | if (Q.getID() == TopQID) { |
841 | CurrSize = (*I)->Succs.size(); |
842 | CandSize = Candidate.SU->Succs.size(); |
843 | } else { |
844 | CurrSize = (*I)->Preds.size(); |
845 | CandSize = Candidate.SU->Preds.size(); |
846 | } |
847 | if (CurrSize > CandSize) { |
848 | LLVM_DEBUG(traceCandidate("SPCAND" , Q, *I, CurrentCost)); |
849 | Candidate.SU = *I; |
850 | Candidate.RPDelta = RPDelta; |
851 | Candidate.SCost = CurrentCost; |
852 | FoundCandidate = BestCost; |
853 | } |
854 | // Keep the old candidate if it's a better candidate. That is, don't use |
855 | // the subsequent tie breaker. |
856 | if (CurrSize != CandSize) |
857 | continue; |
858 | } |
859 | |
860 | // Tie breaker. |
861 | // To avoid scheduling indeterminism, we need a tie breaker |
862 | // for the case when cost is identical for two nodes. |
863 | if (UseNewerCandidate && CurrentCost == Candidate.SCost) { |
864 | if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || |
865 | (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { |
866 | LLVM_DEBUG(traceCandidate("TCAND" , Q, *I, CurrentCost)); |
867 | Candidate.SU = *I; |
868 | Candidate.RPDelta = RPDelta; |
869 | Candidate.SCost = CurrentCost; |
870 | FoundCandidate = NodeOrder; |
871 | continue; |
872 | } |
873 | } |
874 | |
875 | // Fall through to original instruction order. |
876 | // Only consider node order if Candidate was chosen from this Q. |
877 | if (FoundCandidate == NoCand) |
878 | continue; |
879 | } |
880 | return FoundCandidate; |
881 | } |
882 | |
883 | /// Pick the best candidate node from either the top or bottom queue. |
884 | SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { |
885 | // Schedule as far as possible in the direction of no choice. This is most |
886 | // efficient, but also provides the best heuristics for CriticalPSets. |
887 | if (SUnit *SU = Bot.pickOnlyChoice()) { |
888 | LLVM_DEBUG(dbgs() << "Picked only Bottom\n" ); |
889 | IsTopNode = false; |
890 | return SU; |
891 | } |
892 | if (SUnit *SU = Top.pickOnlyChoice()) { |
893 | LLVM_DEBUG(dbgs() << "Picked only Top\n" ); |
894 | IsTopNode = true; |
895 | return SU; |
896 | } |
897 | SchedCandidate BotCand; |
898 | // Prefer bottom scheduling when heuristics are silent. |
899 | CandResult BotResult = |
900 | pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand); |
901 | assert(BotResult != NoCand && "failed to find the first candidate" ); |
902 | |
903 | // If either Q has a single candidate that provides the least increase in |
904 | // Excess pressure, we can immediately schedule from that Q. |
905 | // |
906 | // RegionCriticalPSets summarizes the pressure within the scheduled region and |
907 | // affects picking from either Q. If scheduling in one direction must |
908 | // increase pressure for one of the excess PSets, then schedule in that |
909 | // direction first to provide more freedom in the other direction. |
910 | if (BotResult == SingleExcess || BotResult == SingleCritical) { |
911 | LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n" ); |
912 | IsTopNode = false; |
913 | return BotCand.SU; |
914 | } |
915 | // Check if the top Q has a better candidate. |
916 | SchedCandidate TopCand; |
917 | CandResult TopResult = |
918 | pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand); |
919 | assert(TopResult != NoCand && "failed to find the first candidate" ); |
920 | |
921 | if (TopResult == SingleExcess || TopResult == SingleCritical) { |
922 | LLVM_DEBUG(dbgs() << "Prefered Top Node\n" ); |
923 | IsTopNode = true; |
924 | return TopCand.SU; |
925 | } |
926 | // If either Q has a single candidate that minimizes pressure above the |
927 | // original region's pressure pick it. |
928 | if (BotResult == SingleMax) { |
929 | LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n" ); |
930 | IsTopNode = false; |
931 | return BotCand.SU; |
932 | } |
933 | if (TopResult == SingleMax) { |
934 | LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n" ); |
935 | IsTopNode = true; |
936 | return TopCand.SU; |
937 | } |
938 | if (TopCand.SCost > BotCand.SCost) { |
939 | LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n" ); |
940 | IsTopNode = true; |
941 | return TopCand.SU; |
942 | } |
943 | // Otherwise prefer the bottom candidate in node order. |
944 | LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n" ); |
945 | IsTopNode = false; |
946 | return BotCand.SU; |
947 | } |
948 | |
949 | /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. |
950 | SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { |
951 | if (DAG->top() == DAG->bottom()) { |
952 | assert(Top.Available.empty() && Top.Pending.empty() && |
953 | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage" ); |
954 | return nullptr; |
955 | } |
956 | SUnit *SU; |
957 | if (ForceTopDown) { |
958 | SU = Top.pickOnlyChoice(); |
959 | if (!SU) { |
960 | SchedCandidate TopCand; |
961 | CandResult TopResult = |
962 | pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand); |
963 | assert(TopResult != NoCand && "failed to find the first candidate" ); |
964 | (void)TopResult; |
965 | SU = TopCand.SU; |
966 | } |
967 | IsTopNode = true; |
968 | } else if (ForceBottomUp) { |
969 | SU = Bot.pickOnlyChoice(); |
970 | if (!SU) { |
971 | SchedCandidate BotCand; |
972 | CandResult BotResult = |
973 | pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand); |
974 | assert(BotResult != NoCand && "failed to find the first candidate" ); |
975 | (void)BotResult; |
976 | SU = BotCand.SU; |
977 | } |
978 | IsTopNode = false; |
979 | } else { |
980 | SU = pickNodeBidrectional(IsTopNode); |
981 | } |
982 | if (SU->isTopReady()) |
983 | Top.removeReady(SU); |
984 | if (SU->isBottomReady()) |
985 | Bot.removeReady(SU); |
986 | |
987 | LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom" ) |
988 | << " Scheduling instruction in cycle " |
989 | << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" |
990 | << reportPackets() << ")\n" ; |
991 | DAG->dumpNode(*SU)); |
992 | return SU; |
993 | } |
994 | |
995 | /// Update the scheduler's state after scheduling a node. This is the same node |
996 | /// that was just returned by pickNode(). However, VLIWMachineScheduler needs |
997 | /// to update it's state based on the current cycle before MachineSchedStrategy |
998 | /// does. |
999 | void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
1000 | if (IsTopNode) { |
1001 | Top.bumpNode(SU); |
1002 | SU->TopReadyCycle = Top.CurrCycle; |
1003 | } else { |
1004 | Bot.bumpNode(SU); |
1005 | SU->BotReadyCycle = Bot.CurrCycle; |
1006 | } |
1007 | } |
1008 | |