1 | //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // MachineScheduler schedules machine instructions after phi elimination. It |
10 | // preserves LiveIntervals so it can be invoked before register allocation. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/VLIWMachineScheduler.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/CodeGen/DFAPacketizer.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineInstr.h" |
20 | #include "llvm/CodeGen/MachineLoopInfo.h" |
21 | #include "llvm/CodeGen/RegisterClassInfo.h" |
22 | #include "llvm/CodeGen/RegisterPressure.h" |
23 | #include "llvm/CodeGen/ScheduleDAG.h" |
24 | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/CodeGen/TargetOpcodes.h" |
27 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
28 | #include "llvm/CodeGen/TargetSchedule.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/Support/CommandLine.h" |
31 | #include "llvm/Support/Debug.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <algorithm> |
34 | #include <cassert> |
35 | #include <iomanip> |
36 | #include <limits> |
37 | #include <memory> |
38 | #include <sstream> |
39 | |
40 | using namespace llvm; |
41 | |
42 | #define DEBUG_TYPE "machine-scheduler" |
43 | |
44 | static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure" , cl::Hidden, |
45 | cl::init(Val: false)); |
46 | |
47 | static cl::opt<bool> UseNewerCandidate("use-newer-candidate" , cl::Hidden, |
48 | cl::init(Val: true)); |
49 | |
50 | static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level" , |
51 | cl::Hidden, cl::init(Val: 1)); |
52 | |
53 | // Check if the scheduler should penalize instructions that are available to |
54 | // early due to a zero-latency dependence. |
55 | static cl::opt<bool> CheckEarlyAvail("check-early-avail" , cl::Hidden, |
56 | cl::init(Val: true)); |
57 | |
58 | // This value is used to determine if a register class is a high pressure set. |
59 | // We compute the maximum number of registers needed and divided by the total |
60 | // available. Then, we compare the result to this value. |
61 | static cl::opt<float> RPThreshold("vliw-misched-reg-pressure" , cl::Hidden, |
62 | cl::init(Val: 0.75f), |
63 | cl::desc("High register pressure threhold." )); |
64 | |
65 | VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, |
66 | const TargetSchedModel *SM) |
67 | : TII(STI.getInstrInfo()), SchedModel(SM) { |
68 | ResourcesModel = createPacketizer(STI); |
69 | |
70 | // This hard requirement could be relaxed, |
71 | // but for now do not let it proceed. |
72 | assert(ResourcesModel && "Unimplemented CreateTargetScheduleState." ); |
73 | |
74 | Packet.reserve(N: SchedModel->getIssueWidth()); |
75 | Packet.clear(); |
76 | ResourcesModel->clearResources(); |
77 | } |
78 | |
79 | void VLIWResourceModel::reset() { |
80 | Packet.clear(); |
81 | ResourcesModel->clearResources(); |
82 | } |
83 | |
84 | VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } |
85 | |
86 | /// Return true if there is a dependence between SUd and SUu. |
87 | bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { |
88 | if (SUd->Succs.size() == 0) |
89 | return false; |
90 | |
91 | for (const auto &S : SUd->Succs) { |
92 | // Since we do not add pseudos to packets, might as well |
93 | // ignore order dependencies. |
94 | if (S.isCtrl()) |
95 | continue; |
96 | |
97 | if (S.getSUnit() == SUu && S.getLatency() > 0) |
98 | return true; |
99 | } |
100 | return false; |
101 | } |
102 | |
103 | /// Check if scheduling of this SU is possible |
104 | /// in the current packet. |
105 | /// It is _not_ precise (statefull), it is more like |
106 | /// another heuristic. Many corner cases are figured |
107 | /// empirically. |
108 | bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { |
109 | if (!SU || !SU->getInstr()) |
110 | return false; |
111 | |
112 | // First see if the pipeline could receive this instruction |
113 | // in the current cycle. |
114 | switch (SU->getInstr()->getOpcode()) { |
115 | default: |
116 | if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr())) |
117 | return false; |
118 | break; |
119 | case TargetOpcode::EXTRACT_SUBREG: |
120 | case TargetOpcode::INSERT_SUBREG: |
121 | case TargetOpcode::SUBREG_TO_REG: |
122 | case TargetOpcode::REG_SEQUENCE: |
123 | case TargetOpcode::IMPLICIT_DEF: |
124 | case TargetOpcode::COPY: |
125 | case TargetOpcode::INLINEASM: |
126 | case TargetOpcode::INLINEASM_BR: |
127 | break; |
128 | } |
129 | |
130 | // Now see if there are no other dependencies to instructions already |
131 | // in the packet. |
132 | if (IsTop) { |
133 | for (const SUnit *U : Packet) |
134 | if (hasDependence(SUd: U, SUu: SU)) |
135 | return false; |
136 | } else { |
137 | for (const SUnit *U : Packet) |
138 | if (hasDependence(SUd: SU, SUu: U)) |
139 | return false; |
140 | } |
141 | return true; |
142 | } |
143 | |
144 | /// Keep track of available resources. |
145 | bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { |
146 | bool startNewCycle = false; |
147 | // Artificially reset state. |
148 | if (!SU) { |
149 | reset(); |
150 | TotalPackets++; |
151 | return false; |
152 | } |
153 | // If this SU does not fit in the packet or the packet is now full |
154 | // start a new one. |
155 | if (!isResourceAvailable(SU, IsTop) || |
156 | Packet.size() >= SchedModel->getIssueWidth()) { |
157 | reset(); |
158 | TotalPackets++; |
159 | startNewCycle = true; |
160 | } |
161 | |
162 | switch (SU->getInstr()->getOpcode()) { |
163 | default: |
164 | ResourcesModel->reserveResources(MI&: *SU->getInstr()); |
165 | break; |
166 | case TargetOpcode::EXTRACT_SUBREG: |
167 | case TargetOpcode::INSERT_SUBREG: |
168 | case TargetOpcode::SUBREG_TO_REG: |
169 | case TargetOpcode::REG_SEQUENCE: |
170 | case TargetOpcode::IMPLICIT_DEF: |
171 | case TargetOpcode::KILL: |
172 | case TargetOpcode::CFI_INSTRUCTION: |
173 | case TargetOpcode::EH_LABEL: |
174 | case TargetOpcode::COPY: |
175 | case TargetOpcode::INLINEASM: |
176 | case TargetOpcode::INLINEASM_BR: |
177 | break; |
178 | } |
179 | Packet.push_back(Elt: SU); |
180 | |
181 | #ifndef NDEBUG |
182 | LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n" ); |
183 | for (unsigned i = 0, e = Packet.size(); i != e; ++i) { |
184 | LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(" ); |
185 | LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t" ); |
186 | LLVM_DEBUG(Packet[i]->getInstr()->dump()); |
187 | } |
188 | #endif |
189 | |
190 | return startNewCycle; |
191 | } |
192 | |
193 | DFAPacketizer * |
194 | VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { |
195 | return STI.getInstrInfo()->CreateTargetScheduleState(STI); |
196 | } |
197 | |
198 | /// schedule - Called back from MachineScheduler::runOnMachineFunction |
199 | /// after setting up the current scheduling region. [RegionBegin, RegionEnd) |
200 | /// only includes instructions that have DAG nodes, not scheduling boundaries. |
201 | void VLIWMachineScheduler::schedule() { |
202 | LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " |
203 | << printMBBReference(*BB) << " " << BB->getName() |
204 | << " in_func " << BB->getParent()->getName() |
205 | << " at loop depth " << MLI->getLoopDepth(BB) << " \n" ); |
206 | |
207 | buildDAGWithRegPressure(); |
208 | |
209 | Topo.InitDAGTopologicalSorting(); |
210 | |
211 | // Postprocess the DAG to add platform-specific artificial dependencies. |
212 | postProcessDAG(); |
213 | |
214 | SmallVector<SUnit *, 8> TopRoots, BotRoots; |
215 | findRootsAndBiasEdges(TopRoots, BotRoots); |
216 | |
217 | // Initialize the strategy before modifying the DAG. |
218 | SchedImpl->initialize(DAG: this); |
219 | |
220 | LLVM_DEBUG({ |
221 | unsigned maxH = 0; |
222 | for (const SUnit &SU : SUnits) |
223 | if (SU.getHeight() > maxH) |
224 | maxH = SU.getHeight(); |
225 | dbgs() << "Max Height " << maxH << "\n" ; |
226 | }); |
227 | LLVM_DEBUG({ |
228 | unsigned maxD = 0; |
229 | for (const SUnit &SU : SUnits) |
230 | if (SU.getDepth() > maxD) |
231 | maxD = SU.getDepth(); |
232 | dbgs() << "Max Depth " << maxD << "\n" ; |
233 | }); |
234 | LLVM_DEBUG(dump()); |
235 | if (ViewMISchedDAGs) |
236 | viewGraph(); |
237 | |
238 | initQueues(TopRoots, BotRoots); |
239 | |
240 | bool IsTopNode = false; |
241 | while (true) { |
242 | LLVM_DEBUG( |
243 | dbgs() << "** VLIWMachineScheduler::schedule picking next node\n" ); |
244 | SUnit *SU = SchedImpl->pickNode(IsTopNode); |
245 | if (!SU) |
246 | break; |
247 | |
248 | if (!checkSchedLimit()) |
249 | break; |
250 | |
251 | scheduleMI(SU, IsTopNode); |
252 | |
253 | // Notify the scheduling strategy after updating the DAG. |
254 | SchedImpl->schedNode(SU, IsTopNode); |
255 | |
256 | updateQueues(SU, IsTopNode); |
257 | } |
258 | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone." ); |
259 | |
260 | placeDebugValues(); |
261 | |
262 | LLVM_DEBUG({ |
263 | dbgs() << "*** Final schedule for " |
264 | << printMBBReference(*begin()->getParent()) << " ***\n" ; |
265 | dumpSchedule(); |
266 | dbgs() << '\n'; |
267 | }); |
268 | } |
269 | |
270 | void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { |
271 | DAG = static_cast<VLIWMachineScheduler *>(dag); |
272 | SchedModel = DAG->getSchedModel(); |
273 | |
274 | Top.init(dag: DAG, smodel: SchedModel); |
275 | Bot.init(dag: DAG, smodel: SchedModel); |
276 | |
277 | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or |
278 | // are disabled, then these HazardRecs will be disabled. |
279 | const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); |
280 | const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); |
281 | const TargetInstrInfo *TII = STI.getInstrInfo(); |
282 | delete Top.HazardRec; |
283 | delete Bot.HazardRec; |
284 | Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); |
285 | Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); |
286 | |
287 | delete Top.ResourceModel; |
288 | delete Bot.ResourceModel; |
289 | Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel()); |
290 | Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel()); |
291 | |
292 | const std::vector<unsigned> &MaxPressure = |
293 | DAG->getRegPressure().MaxSetPressure; |
294 | HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false); |
295 | for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { |
296 | unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i); |
297 | HighPressureSets[i] = |
298 | ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); |
299 | } |
300 | } |
301 | |
302 | VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( |
303 | const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { |
304 | return new VLIWResourceModel(STI, SchedModel); |
305 | } |
306 | |
307 | void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { |
308 | for (const SDep &PI : SU->Preds) { |
309 | unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; |
310 | unsigned MinLatency = PI.getLatency(); |
311 | #ifndef NDEBUG |
312 | Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); |
313 | #endif |
314 | if (SU->TopReadyCycle < PredReadyCycle + MinLatency) |
315 | SU->TopReadyCycle = PredReadyCycle + MinLatency; |
316 | } |
317 | |
318 | if (!SU->isScheduled) |
319 | Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle); |
320 | } |
321 | |
322 | void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { |
323 | assert(SU->getInstr() && "Scheduled SUnit must have instr" ); |
324 | |
325 | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; |
326 | ++I) { |
327 | unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; |
328 | unsigned MinLatency = I->getLatency(); |
329 | #ifndef NDEBUG |
330 | Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); |
331 | #endif |
332 | if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) |
333 | SU->BotReadyCycle = SuccReadyCycle + MinLatency; |
334 | } |
335 | |
336 | if (!SU->isScheduled) |
337 | Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle); |
338 | } |
339 | |
340 | ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { |
341 | delete ResourceModel; |
342 | delete HazardRec; |
343 | } |
344 | |
345 | /// Does this SU have a hazard within the current instruction group. |
346 | /// |
347 | /// The scheduler supports two modes of hazard recognition. The first is the |
348 | /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that |
349 | /// supports highly complicated in-order reservation tables |
350 | /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. |
351 | /// |
352 | /// The second is a streamlined mechanism that checks for hazards based on |
353 | /// simple counters that the scheduler itself maintains. It explicitly checks |
354 | /// for instruction dispatch limitations, including the number of micro-ops that |
355 | /// can dispatch per cycle. |
356 | /// |
357 | /// TODO: Also check whether the SU must start a new group. |
358 | bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { |
359 | if (HazardRec->isEnabled()) |
360 | return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; |
361 | |
362 | unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr()); |
363 | if (IssueCount + uops > SchedModel->getIssueWidth()) |
364 | return true; |
365 | |
366 | return false; |
367 | } |
368 | |
369 | void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( |
370 | SUnit *SU, unsigned ReadyCycle) { |
371 | if (ReadyCycle < MinReadyCycle) |
372 | MinReadyCycle = ReadyCycle; |
373 | |
374 | // Check for interlocks first. For the purpose of other heuristics, an |
375 | // instruction that cannot issue appears as if it's not in the ReadyQueue. |
376 | if (ReadyCycle > CurrCycle || checkHazard(SU)) |
377 | |
378 | Pending.push(SU); |
379 | else |
380 | Available.push(SU); |
381 | } |
382 | |
383 | /// Move the boundary of scheduled code by one cycle. |
384 | void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { |
385 | unsigned Width = SchedModel->getIssueWidth(); |
386 | IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; |
387 | |
388 | assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && |
389 | "MinReadyCycle uninitialized" ); |
390 | unsigned NextCycle = std::max(a: CurrCycle + 1, b: MinReadyCycle); |
391 | |
392 | if (!HazardRec->isEnabled()) { |
393 | // Bypass HazardRec virtual calls. |
394 | CurrCycle = NextCycle; |
395 | } else { |
396 | // Bypass getHazardType calls in case of long latency. |
397 | for (; CurrCycle != NextCycle; ++CurrCycle) { |
398 | if (isTop()) |
399 | HazardRec->AdvanceCycle(); |
400 | else |
401 | HazardRec->RecedeCycle(); |
402 | } |
403 | } |
404 | CheckPending = true; |
405 | |
406 | LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " |
407 | << CurrCycle << '\n'); |
408 | } |
409 | |
410 | /// Move the boundary of scheduled code by one SUnit. |
411 | void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { |
412 | bool startNewCycle = false; |
413 | |
414 | // Update the reservation table. |
415 | if (HazardRec->isEnabled()) { |
416 | if (!isTop() && SU->isCall) { |
417 | // Calls are scheduled with their preceding instructions. For bottom-up |
418 | // scheduling, clear the pipeline state before emitting. |
419 | HazardRec->Reset(); |
420 | } |
421 | HazardRec->EmitInstruction(SU); |
422 | } |
423 | |
424 | // Update DFA model. |
425 | startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop()); |
426 | |
427 | // Check the instruction group dispatch limit. |
428 | // TODO: Check if this SU must end a dispatch group. |
429 | IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr()); |
430 | if (startNewCycle) { |
431 | LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); |
432 | bumpCycle(); |
433 | } else |
434 | LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " |
435 | << CurrCycle << '\n'); |
436 | } |
437 | |
438 | /// Release pending ready nodes in to the available queue. This makes them |
439 | /// visible to heuristics. |
440 | void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { |
441 | // If the available queue is empty, it is safe to reset MinReadyCycle. |
442 | if (Available.empty()) |
443 | MinReadyCycle = std::numeric_limits<unsigned>::max(); |
444 | |
445 | // Check to see if any of the pending instructions are ready to issue. If |
446 | // so, add them to the available queue. |
447 | for (unsigned i = 0, e = Pending.size(); i != e; ++i) { |
448 | SUnit *SU = *(Pending.begin() + i); |
449 | unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; |
450 | |
451 | if (ReadyCycle < MinReadyCycle) |
452 | MinReadyCycle = ReadyCycle; |
453 | |
454 | if (ReadyCycle > CurrCycle) |
455 | continue; |
456 | |
457 | if (checkHazard(SU)) |
458 | continue; |
459 | |
460 | Available.push(SU); |
461 | Pending.remove(I: Pending.begin() + i); |
462 | --i; |
463 | --e; |
464 | } |
465 | CheckPending = false; |
466 | } |
467 | |
468 | /// Remove SU from the ready set for this boundary. |
469 | void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { |
470 | if (Available.isInQueue(SU)) |
471 | Available.remove(I: Available.find(SU)); |
472 | else { |
473 | assert(Pending.isInQueue(SU) && "bad ready count" ); |
474 | Pending.remove(I: Pending.find(SU)); |
475 | } |
476 | } |
477 | |
478 | /// If this queue only has one ready candidate, return it. As a side effect, |
479 | /// advance the cycle until at least one node is ready. If multiple instructions |
480 | /// are ready, return NULL. |
481 | SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { |
482 | if (CheckPending) |
483 | releasePending(); |
484 | |
485 | auto AdvanceCycle = [this]() { |
486 | if (Available.empty()) |
487 | return true; |
488 | if (Available.size() == 1 && Pending.size() > 0) |
489 | return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) || |
490 | getWeakLeft(SU: *Available.begin(), isTop: isTop()) != 0; |
491 | return false; |
492 | }; |
493 | for (unsigned i = 0; AdvanceCycle(); ++i) { |
494 | assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && |
495 | "permanent hazard" ); |
496 | (void)i; |
497 | ResourceModel->reserveResources(SU: nullptr, IsTop: isTop()); |
498 | bumpCycle(); |
499 | releasePending(); |
500 | } |
501 | if (Available.size() == 1) |
502 | return *Available.begin(); |
503 | return nullptr; |
504 | } |
505 | |
506 | #ifndef NDEBUG |
507 | void ConvergingVLIWScheduler::traceCandidate(const char *Label, |
508 | const ReadyQueue &Q, SUnit *SU, |
509 | int Cost, PressureChange P) { |
510 | dbgs() << Label << " " << Q.getName() << " " ; |
511 | if (P.isValid()) |
512 | dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" |
513 | << P.getUnitInc() << " " ; |
514 | else |
515 | dbgs() << " " ; |
516 | dbgs() << "cost(" << Cost << ")\t" ; |
517 | DAG->dumpNode(*SU); |
518 | } |
519 | |
520 | // Very detailed queue dump, to be used with higher verbosity levels. |
521 | void ConvergingVLIWScheduler::readyQueueVerboseDump( |
522 | const RegPressureTracker &RPTracker, SchedCandidate &Candidate, |
523 | ReadyQueue &Q) { |
524 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); |
525 | |
526 | dbgs() << ">>> " << Q.getName() << "\n" ; |
527 | for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { |
528 | RegPressureDelta RPDelta; |
529 | TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, |
530 | DAG->getRegionCriticalPSets(), |
531 | DAG->getRegPressure().MaxSetPressure); |
532 | std::stringstream dbgstr; |
533 | dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")" ; |
534 | dbgs() << dbgstr.str(); |
535 | SchedulingCost(Q, *I, Candidate, RPDelta, true); |
536 | dbgs() << "\t" ; |
537 | (*I)->getInstr()->dump(); |
538 | } |
539 | dbgs() << "\n" ; |
540 | } |
541 | #endif |
542 | |
543 | /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor |
544 | /// of SU, return true (we may have duplicates) |
545 | static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { |
546 | if (SU->NumPredsLeft == 0) |
547 | return false; |
548 | |
549 | for (auto &Pred : SU->Preds) { |
550 | // We found an available, but not scheduled, predecessor. |
551 | if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) |
552 | return false; |
553 | } |
554 | |
555 | return true; |
556 | } |
557 | |
558 | /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor |
559 | /// of SU, return true (we may have duplicates) |
560 | static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { |
561 | if (SU->NumSuccsLeft == 0) |
562 | return false; |
563 | |
564 | for (auto &Succ : SU->Succs) { |
565 | // We found an available, but not scheduled, successor. |
566 | if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) |
567 | return false; |
568 | } |
569 | return true; |
570 | } |
571 | |
572 | /// Check if the instruction changes the register pressure of a register in the |
573 | /// high pressure set. The function returns a negative value if the pressure |
574 | /// decreases and a positive value is the pressure increases. If the instruction |
575 | /// doesn't use a high pressure register or doesn't change the register |
576 | /// pressure, then return 0. |
577 | int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { |
578 | PressureDiff &PD = DAG->getPressureDiff(SU); |
579 | for (const auto &P : PD) { |
580 | if (!P.isValid()) |
581 | continue; |
582 | // The pressure differences are computed bottom-up, so the comparison for |
583 | // an increase is positive in the bottom direction, but negative in the |
584 | // top-down direction. |
585 | if (HighPressureSets[P.getPSet()]) |
586 | return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); |
587 | } |
588 | return 0; |
589 | } |
590 | |
591 | /// Single point to compute overall scheduling cost. |
592 | /// TODO: More heuristics will be used soon. |
593 | int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, |
594 | SchedCandidate &Candidate, |
595 | RegPressureDelta &Delta, |
596 | bool verbose) { |
597 | // Initial trivial priority. |
598 | int ResCount = 1; |
599 | |
600 | // Do not waste time on a node that is already scheduled. |
601 | if (!SU || SU->isScheduled) |
602 | return ResCount; |
603 | |
604 | LLVM_DEBUG(if (verbose) dbgs() |
605 | << ((Q.getID() == TopQID) ? "(top|" : "(bot|" )); |
606 | // Forced priority is high. |
607 | if (SU->isScheduleHigh) { |
608 | ResCount += PriorityOne; |
609 | LLVM_DEBUG(dbgs() << "H|" ); |
610 | } |
611 | |
612 | unsigned IsAvailableAmt = 0; |
613 | // Critical path first. |
614 | if (Q.getID() == TopQID) { |
615 | if (Top.isLatencyBound(SU)) { |
616 | LLVM_DEBUG(if (verbose) dbgs() << "LB|" ); |
617 | ResCount += (SU->getHeight() * ScaleTwo); |
618 | } |
619 | |
620 | LLVM_DEBUG(if (verbose) { |
621 | std::stringstream dbgstr; |
622 | dbgstr << "h" << std::setw(3) << SU->getHeight() << "|" ; |
623 | dbgs() << dbgstr.str(); |
624 | }); |
625 | |
626 | // If resources are available for it, multiply the |
627 | // chance of scheduling. |
628 | if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) { |
629 | IsAvailableAmt = (PriorityTwo + PriorityThree); |
630 | ResCount += IsAvailableAmt; |
631 | LLVM_DEBUG(if (verbose) dbgs() << "A|" ); |
632 | } else |
633 | LLVM_DEBUG(if (verbose) dbgs() << " |" ); |
634 | } else { |
635 | if (Bot.isLatencyBound(SU)) { |
636 | LLVM_DEBUG(if (verbose) dbgs() << "LB|" ); |
637 | ResCount += (SU->getDepth() * ScaleTwo); |
638 | } |
639 | |
640 | LLVM_DEBUG(if (verbose) { |
641 | std::stringstream dbgstr; |
642 | dbgstr << "d" << std::setw(3) << SU->getDepth() << "|" ; |
643 | dbgs() << dbgstr.str(); |
644 | }); |
645 | |
646 | // If resources are available for it, multiply the |
647 | // chance of scheduling. |
648 | if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) { |
649 | IsAvailableAmt = (PriorityTwo + PriorityThree); |
650 | ResCount += IsAvailableAmt; |
651 | LLVM_DEBUG(if (verbose) dbgs() << "A|" ); |
652 | } else |
653 | LLVM_DEBUG(if (verbose) dbgs() << " |" ); |
654 | } |
655 | |
656 | unsigned NumNodesBlocking = 0; |
657 | if (Q.getID() == TopQID) { |
658 | // How many SUs does it block from scheduling? |
659 | // Look at all of the successors of this node. |
660 | // Count the number of nodes that |
661 | // this node is the sole unscheduled node for. |
662 | if (Top.isLatencyBound(SU)) |
663 | for (const SDep &SI : SU->Succs) |
664 | if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU)) |
665 | ++NumNodesBlocking; |
666 | } else { |
667 | // How many unscheduled predecessors block this node? |
668 | if (Bot.isLatencyBound(SU)) |
669 | for (const SDep &PI : SU->Preds) |
670 | if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU)) |
671 | ++NumNodesBlocking; |
672 | } |
673 | ResCount += (NumNodesBlocking * ScaleTwo); |
674 | |
675 | LLVM_DEBUG(if (verbose) { |
676 | std::stringstream dbgstr; |
677 | dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|" ; |
678 | dbgs() << dbgstr.str(); |
679 | }); |
680 | |
681 | // Factor in reg pressure as a heuristic. |
682 | if (!IgnoreBBRegPressure) { |
683 | // Decrease priority by the amount that register pressure exceeds the limit. |
684 | ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); |
685 | // Decrease priority if register pressure exceeds the limit. |
686 | ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); |
687 | // Decrease priority slightly if register pressure would increase over the |
688 | // current maximum. |
689 | ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); |
690 | // If there are register pressure issues, then we remove the value added for |
691 | // the instruction being available. The rationale is that we really don't |
692 | // want to schedule an instruction that causes a spill. |
693 | if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > 0 && |
694 | (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || |
695 | Delta.CurrentMax.getUnitInc())) |
696 | ResCount -= IsAvailableAmt; |
697 | LLVM_DEBUG(if (verbose) { |
698 | dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" |
699 | << Delta.CriticalMax.getUnitInc() << "/" |
700 | << Delta.CurrentMax.getUnitInc() << ")|" ; |
701 | }); |
702 | } |
703 | |
704 | // Give preference to a zero latency instruction if the dependent |
705 | // instruction is in the current packet. |
706 | if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == 0) { |
707 | for (const SDep &PI : SU->Preds) { |
708 | if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && |
709 | PI.getLatency() == 0 && |
710 | Top.ResourceModel->isInPacket(SU: PI.getSUnit())) { |
711 | ResCount += PriorityThree; |
712 | LLVM_DEBUG(if (verbose) dbgs() << "Z|" ); |
713 | } |
714 | } |
715 | } else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == 0) { |
716 | for (const SDep &SI : SU->Succs) { |
717 | if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && |
718 | SI.getLatency() == 0 && |
719 | Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) { |
720 | ResCount += PriorityThree; |
721 | LLVM_DEBUG(if (verbose) dbgs() << "Z|" ); |
722 | } |
723 | } |
724 | } |
725 | |
726 | // If the instruction has a non-zero latency dependence with an instruction in |
727 | // the current packet, then it should not be scheduled yet. The case occurs |
728 | // when the dependent instruction is scheduled in a new packet, so the |
729 | // scheduler updates the current cycle and pending instructions become |
730 | // available. |
731 | if (CheckEarlyAvail) { |
732 | if (Q.getID() == TopQID) { |
733 | for (const auto &PI : SU->Preds) { |
734 | if (PI.getLatency() > 0 && |
735 | Top.ResourceModel->isInPacket(SU: PI.getSUnit())) { |
736 | ResCount -= PriorityOne; |
737 | LLVM_DEBUG(if (verbose) dbgs() << "D|" ); |
738 | } |
739 | } |
740 | } else { |
741 | for (const auto &SI : SU->Succs) { |
742 | if (SI.getLatency() > 0 && |
743 | Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) { |
744 | ResCount -= PriorityOne; |
745 | LLVM_DEBUG(if (verbose) dbgs() << "D|" ); |
746 | } |
747 | } |
748 | } |
749 | } |
750 | |
751 | LLVM_DEBUG(if (verbose) { |
752 | std::stringstream dbgstr; |
753 | dbgstr << "Total " << std::setw(4) << ResCount << ")" ; |
754 | dbgs() << dbgstr.str(); |
755 | }); |
756 | |
757 | return ResCount; |
758 | } |
759 | |
760 | /// Pick the best candidate from the top queue. |
761 | /// |
762 | /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during |
763 | /// DAG building. To adjust for the current scheduling location we need to |
764 | /// maintain the number of vreg uses remaining to be top-scheduled. |
765 | ConvergingVLIWScheduler::CandResult |
766 | ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, |
767 | const RegPressureTracker &RPTracker, |
768 | SchedCandidate &Candidate) { |
769 | ReadyQueue &Q = Zone.Available; |
770 | LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) |
771 | readyQueueVerboseDump(RPTracker, Candidate, Q); |
772 | else Q.dump();); |
773 | |
774 | // getMaxPressureDelta temporarily modifies the tracker. |
775 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); |
776 | |
777 | // BestSU remains NULL if no top candidates beat the best existing candidate. |
778 | CandResult FoundCandidate = NoCand; |
779 | for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { |
780 | RegPressureDelta RPDelta; |
781 | TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta, |
782 | CriticalPSets: DAG->getRegionCriticalPSets(), |
783 | MaxPressureLimit: DAG->getRegPressure().MaxSetPressure); |
784 | |
785 | int CurrentCost = SchedulingCost(Q, SU: *I, Candidate, Delta&: RPDelta, verbose: false); |
786 | |
787 | // Initialize the candidate if needed. |
788 | if (!Candidate.SU) { |
789 | LLVM_DEBUG(traceCandidate("DCAND" , Q, *I, CurrentCost)); |
790 | Candidate.SU = *I; |
791 | Candidate.RPDelta = RPDelta; |
792 | Candidate.SCost = CurrentCost; |
793 | FoundCandidate = NodeOrder; |
794 | continue; |
795 | } |
796 | |
797 | // Choose node order for negative cost candidates. There is no good |
798 | // candidate in this case. |
799 | if (CurrentCost < 0 && Candidate.SCost < 0) { |
800 | if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || |
801 | (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { |
802 | LLVM_DEBUG(traceCandidate("NCAND" , Q, *I, CurrentCost)); |
803 | Candidate.SU = *I; |
804 | Candidate.RPDelta = RPDelta; |
805 | Candidate.SCost = CurrentCost; |
806 | FoundCandidate = NodeOrder; |
807 | } |
808 | continue; |
809 | } |
810 | |
811 | // Best cost. |
812 | if (CurrentCost > Candidate.SCost) { |
813 | LLVM_DEBUG(traceCandidate("CCAND" , Q, *I, CurrentCost)); |
814 | Candidate.SU = *I; |
815 | Candidate.RPDelta = RPDelta; |
816 | Candidate.SCost = CurrentCost; |
817 | FoundCandidate = BestCost; |
818 | continue; |
819 | } |
820 | |
821 | // Choose an instruction that does not depend on an artificial edge. |
822 | unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID)); |
823 | unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID)); |
824 | if (CurrWeak != CandWeak) { |
825 | if (CurrWeak < CandWeak) { |
826 | LLVM_DEBUG(traceCandidate("WCAND" , Q, *I, CurrentCost)); |
827 | Candidate.SU = *I; |
828 | Candidate.RPDelta = RPDelta; |
829 | Candidate.SCost = CurrentCost; |
830 | FoundCandidate = Weak; |
831 | } |
832 | continue; |
833 | } |
834 | |
835 | if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) { |
836 | unsigned CurrSize, CandSize; |
837 | if (Q.getID() == TopQID) { |
838 | CurrSize = (*I)->Succs.size(); |
839 | CandSize = Candidate.SU->Succs.size(); |
840 | } else { |
841 | CurrSize = (*I)->Preds.size(); |
842 | CandSize = Candidate.SU->Preds.size(); |
843 | } |
844 | if (CurrSize > CandSize) { |
845 | LLVM_DEBUG(traceCandidate("SPCAND" , Q, *I, CurrentCost)); |
846 | Candidate.SU = *I; |
847 | Candidate.RPDelta = RPDelta; |
848 | Candidate.SCost = CurrentCost; |
849 | FoundCandidate = BestCost; |
850 | } |
851 | // Keep the old candidate if it's a better candidate. That is, don't use |
852 | // the subsequent tie breaker. |
853 | if (CurrSize != CandSize) |
854 | continue; |
855 | } |
856 | |
857 | // Tie breaker. |
858 | // To avoid scheduling indeterminism, we need a tie breaker |
859 | // for the case when cost is identical for two nodes. |
860 | if (UseNewerCandidate && CurrentCost == Candidate.SCost) { |
861 | if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || |
862 | (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { |
863 | LLVM_DEBUG(traceCandidate("TCAND" , Q, *I, CurrentCost)); |
864 | Candidate.SU = *I; |
865 | Candidate.RPDelta = RPDelta; |
866 | Candidate.SCost = CurrentCost; |
867 | FoundCandidate = NodeOrder; |
868 | continue; |
869 | } |
870 | } |
871 | |
872 | // Fall through to original instruction order. |
873 | // Only consider node order if Candidate was chosen from this Q. |
874 | if (FoundCandidate == NoCand) |
875 | continue; |
876 | } |
877 | return FoundCandidate; |
878 | } |
879 | |
880 | /// Pick the best candidate node from either the top or bottom queue. |
881 | SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { |
882 | // Schedule as far as possible in the direction of no choice. This is most |
883 | // efficient, but also provides the best heuristics for CriticalPSets. |
884 | if (SUnit *SU = Bot.pickOnlyChoice()) { |
885 | LLVM_DEBUG(dbgs() << "Picked only Bottom\n" ); |
886 | IsTopNode = false; |
887 | return SU; |
888 | } |
889 | if (SUnit *SU = Top.pickOnlyChoice()) { |
890 | LLVM_DEBUG(dbgs() << "Picked only Top\n" ); |
891 | IsTopNode = true; |
892 | return SU; |
893 | } |
894 | SchedCandidate BotCand; |
895 | // Prefer bottom scheduling when heuristics are silent. |
896 | CandResult BotResult = |
897 | pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand); |
898 | assert(BotResult != NoCand && "failed to find the first candidate" ); |
899 | |
900 | // If either Q has a single candidate that provides the least increase in |
901 | // Excess pressure, we can immediately schedule from that Q. |
902 | // |
903 | // RegionCriticalPSets summarizes the pressure within the scheduled region and |
904 | // affects picking from either Q. If scheduling in one direction must |
905 | // increase pressure for one of the excess PSets, then schedule in that |
906 | // direction first to provide more freedom in the other direction. |
907 | if (BotResult == SingleExcess || BotResult == SingleCritical) { |
908 | LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n" ); |
909 | IsTopNode = false; |
910 | return BotCand.SU; |
911 | } |
912 | // Check if the top Q has a better candidate. |
913 | SchedCandidate TopCand; |
914 | CandResult TopResult = |
915 | pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand); |
916 | assert(TopResult != NoCand && "failed to find the first candidate" ); |
917 | |
918 | if (TopResult == SingleExcess || TopResult == SingleCritical) { |
919 | LLVM_DEBUG(dbgs() << "Prefered Top Node\n" ); |
920 | IsTopNode = true; |
921 | return TopCand.SU; |
922 | } |
923 | // If either Q has a single candidate that minimizes pressure above the |
924 | // original region's pressure pick it. |
925 | if (BotResult == SingleMax) { |
926 | LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n" ); |
927 | IsTopNode = false; |
928 | return BotCand.SU; |
929 | } |
930 | if (TopResult == SingleMax) { |
931 | LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n" ); |
932 | IsTopNode = true; |
933 | return TopCand.SU; |
934 | } |
935 | if (TopCand.SCost > BotCand.SCost) { |
936 | LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n" ); |
937 | IsTopNode = true; |
938 | return TopCand.SU; |
939 | } |
940 | // Otherwise prefer the bottom candidate in node order. |
941 | LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n" ); |
942 | IsTopNode = false; |
943 | return BotCand.SU; |
944 | } |
945 | |
946 | /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. |
947 | SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { |
948 | if (DAG->top() == DAG->bottom()) { |
949 | assert(Top.Available.empty() && Top.Pending.empty() && |
950 | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage" ); |
951 | return nullptr; |
952 | } |
953 | SUnit *SU; |
954 | if (PreRADirection == MISched::TopDown) { |
955 | SU = Top.pickOnlyChoice(); |
956 | if (!SU) { |
957 | SchedCandidate TopCand; |
958 | CandResult TopResult = |
959 | pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand); |
960 | assert(TopResult != NoCand && "failed to find the first candidate" ); |
961 | (void)TopResult; |
962 | SU = TopCand.SU; |
963 | } |
964 | IsTopNode = true; |
965 | } else if (PreRADirection == MISched::BottomUp) { |
966 | SU = Bot.pickOnlyChoice(); |
967 | if (!SU) { |
968 | SchedCandidate BotCand; |
969 | CandResult BotResult = |
970 | pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand); |
971 | assert(BotResult != NoCand && "failed to find the first candidate" ); |
972 | (void)BotResult; |
973 | SU = BotCand.SU; |
974 | } |
975 | IsTopNode = false; |
976 | } else { |
977 | SU = pickNodeBidrectional(IsTopNode); |
978 | } |
979 | if (SU->isTopReady()) |
980 | Top.removeReady(SU); |
981 | if (SU->isBottomReady()) |
982 | Bot.removeReady(SU); |
983 | |
984 | LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom" ) |
985 | << " Scheduling instruction in cycle " |
986 | << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" |
987 | << reportPackets() << ")\n" ; |
988 | DAG->dumpNode(*SU)); |
989 | return SU; |
990 | } |
991 | |
992 | /// Update the scheduler's state after scheduling a node. This is the same node |
993 | /// that was just returned by pickNode(). However, VLIWMachineScheduler needs |
994 | /// to update it's state based on the current cycle before MachineSchedStrategy |
995 | /// does. |
996 | void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
997 | if (IsTopNode) { |
998 | Top.bumpNode(SU); |
999 | SU->TopReadyCycle = Top.CurrCycle; |
1000 | } else { |
1001 | Bot.bumpNode(SU); |
1002 | SU->BotReadyCycle = Bot.CurrCycle; |
1003 | } |
1004 | } |
1005 | |