1//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ResourcePriorityQueue class, which is a
10// SchedulingPriorityQueue that prioritizes instructions using DFA state to
11// reduce the length of the critical path through the basic block
12// on VLIW platforms.
13// The scheduler is basically a top-down adaptable list scheduler with DFA
14// resource tracking added to the cost function.
15// DFA is queried as a state machine to model "packets/bundles" during
16// schedule. Currently packets/bundles are discarded at the end of
17// scheduling, affecting only order of instructions.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/CodeGen/ResourcePriorityQueue.h"
22#include "llvm/CodeGen/DFAPacketizer.h"
23#include "llvm/CodeGen/SelectionDAGISel.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetInstrInfo.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSubtargetInfo.h"
29#include "llvm/Support/CommandLine.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "scheduler"
34
35static cl::opt<bool>
36 DisableDFASched("disable-dfa-sched", cl::Hidden,
37 cl::desc("Disable use of DFA during scheduling"));
38
39static cl::opt<int> RegPressureThreshold(
40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(Val: 5),
41 cl::desc("Track reg pressure and switch priority to in-depth"));
42
43ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46 TRI = STI.getRegisterInfo();
47 TLI = IS->TLI;
48 TII = STI.getInstrInfo();
49 ResourcesModel.reset(p: TII->CreateTargetScheduleState(STI));
50 // This hard requirement could be relaxed, but for now
51 // do not let it proceed.
52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53
54 unsigned NumRC = TRI->getNumRegClasses();
55 RegLimit.resize(new_size: NumRC);
56 RegPressure.resize(new_size: NumRC);
57 llvm::fill(Range&: RegLimit, Value: 0);
58 llvm::fill(Range&: RegPressure, Value: 0);
59 for (const TargetRegisterClass *RC : TRI->regclasses())
60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, MF&: *IS->MF);
61
62 ParallelLiveRanges = 0;
63 HorizontalVerticalBalance = 0;
64}
65
66ResourcePriorityQueue::~ResourcePriorityQueue() = default;
67
68unsigned
69ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
70 unsigned NumberDeps = 0;
71 for (SDep &Pred : SU->Preds) {
72 if (Pred.isCtrl())
73 continue;
74
75 SUnit *PredSU = Pred.getSUnit();
76 const SDNode *ScegN = PredSU->getNode();
77
78 if (!ScegN)
79 continue;
80
81 // If value is passed to CopyToReg, it is probably
82 // live outside BB.
83 switch (ScegN->getOpcode()) {
84 default: break;
85 case ISD::TokenFactor: break;
86 case ISD::CopyFromReg: NumberDeps++; break;
87 case ISD::CopyToReg: break;
88 case ISD::INLINEASM: break;
89 case ISD::INLINEASM_BR: break;
90 }
91 if (!ScegN->isMachineOpcode())
92 continue;
93
94 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
95 MVT VT = ScegN->getSimpleValueType(ResNo: i);
96 if (TLI->isTypeLegal(VT)
97 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
98 NumberDeps++;
99 break;
100 }
101 }
102 }
103 return NumberDeps;
104}
105
106unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
107 unsigned RCId) {
108 unsigned NumberDeps = 0;
109 for (const SDep &Succ : SU->Succs) {
110 if (Succ.isCtrl())
111 continue;
112
113 SUnit *SuccSU = Succ.getSUnit();
114 const SDNode *ScegN = SuccSU->getNode();
115 if (!ScegN)
116 continue;
117
118 // If value is passed to CopyToReg, it is probably
119 // live outside BB.
120 switch (ScegN->getOpcode()) {
121 default: break;
122 case ISD::TokenFactor: break;
123 case ISD::CopyFromReg: break;
124 case ISD::CopyToReg: NumberDeps++; break;
125 case ISD::INLINEASM: break;
126 case ISD::INLINEASM_BR: break;
127 }
128 if (!ScegN->isMachineOpcode())
129 continue;
130
131 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
132 const SDValue &Op = ScegN->getOperand(Num: i);
133 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
134 if (TLI->isTypeLegal(VT)
135 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
136 NumberDeps++;
137 break;
138 }
139 }
140 }
141 return NumberDeps;
142}
143
144static unsigned numberCtrlDepsInSU(SUnit *SU) {
145 unsigned NumberDeps = 0;
146 for (const SDep &Succ : SU->Succs)
147 if (Succ.isCtrl())
148 NumberDeps++;
149
150 return NumberDeps;
151}
152
153static unsigned numberCtrlPredInSU(SUnit *SU) {
154 unsigned NumberDeps = 0;
155 for (SDep &Pred : SU->Preds)
156 if (Pred.isCtrl())
157 NumberDeps++;
158
159 return NumberDeps;
160}
161
162///
163/// Initialize nodes.
164///
165void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
166 SUnits = &sunits;
167 NumNodesSolelyBlocking.resize(new_size: SUnits->size(), x: 0);
168
169 for (SUnit &SU : *SUnits) {
170 initNumRegDefsLeft(SU: &SU);
171 SU.NodeQueueId = 0;
172 }
173}
174
175/// This heuristic is used if DFA scheduling is not desired
176/// for some VLIW platform.
177bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
178 // The isScheduleHigh flag allows nodes with wraparound dependencies that
179 // cannot easily be modeled as edges with latencies to be scheduled as
180 // soon as possible in a top-down schedule.
181 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
182 return false;
183
184 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
185 return true;
186
187 unsigned LHSNum = LHS->NodeNum;
188 unsigned RHSNum = RHS->NodeNum;
189
190 // The most important heuristic is scheduling the critical path.
191 unsigned LHSLatency = PQ->getLatency(NodeNum: LHSNum);
192 unsigned RHSLatency = PQ->getLatency(NodeNum: RHSNum);
193 if (LHSLatency < RHSLatency) return true;
194 if (LHSLatency > RHSLatency) return false;
195
196 // After that, if two nodes have identical latencies, look to see if one will
197 // unblock more other nodes than the other.
198 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(NodeNum: LHSNum);
199 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(NodeNum: RHSNum);
200 if (LHSBlocked < RHSBlocked) return true;
201 if (LHSBlocked > RHSBlocked) return false;
202
203 // Finally, just to provide a stable ordering, use the node number as a
204 // deciding factor.
205 return LHSNum < RHSNum;
206}
207
208
209/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
210/// of SU, return it, otherwise return null.
211SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
212 SUnit *OnlyAvailablePred = nullptr;
213 for (const SDep &Pred : SU->Preds) {
214 SUnit &PredSU = *Pred.getSUnit();
215 if (!PredSU.isScheduled) {
216 // We found an available, but not scheduled, predecessor. If it's the
217 // only one we have found, keep track of it... otherwise give up.
218 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
219 return nullptr;
220 OnlyAvailablePred = &PredSU;
221 }
222 }
223 return OnlyAvailablePred;
224}
225
226void ResourcePriorityQueue::push(SUnit *SU) {
227 // Look at all of the successors of this node. Count the number of nodes that
228 // this node is the sole unscheduled node for.
229 unsigned NumNodesBlocking = 0;
230 for (const SDep &Succ : SU->Succs)
231 if (getSingleUnscheduledPred(SU: Succ.getSUnit()) == SU)
232 ++NumNodesBlocking;
233
234 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
235 Queue.push_back(x: SU);
236}
237
238/// Check if scheduling of this SU is possible
239/// in the current packet.
240bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
241 if (!SU || !SU->getNode())
242 return false;
243
244 // If this is a compound instruction,
245 // it is likely to be a call. Do not delay it.
246 if (SU->getNode()->getGluedNode())
247 return true;
248
249 // First see if the pipeline could receive this instruction
250 // in the current cycle.
251 if (SU->getNode()->isMachineOpcode())
252 switch (SU->getNode()->getMachineOpcode()) {
253 default:
254 if (!ResourcesModel->canReserveResources(MID: &TII->get(
255 Opcode: SU->getNode()->getMachineOpcode())))
256 return false;
257 break;
258 case TargetOpcode::EXTRACT_SUBREG:
259 case TargetOpcode::INSERT_SUBREG:
260 case TargetOpcode::SUBREG_TO_REG:
261 case TargetOpcode::REG_SEQUENCE:
262 case TargetOpcode::IMPLICIT_DEF:
263 break;
264 }
265
266 // Now see if there are no other dependencies
267 // to instructions already in the packet.
268 for (const SUnit *S : Packet)
269 for (const SDep &Succ : S->Succs) {
270 // Since we do not add pseudos to packets, might as well
271 // ignore order deps.
272 if (Succ.isCtrl())
273 continue;
274
275 if (Succ.getSUnit() == SU)
276 return false;
277 }
278
279 return true;
280}
281
282/// Keep track of available resources.
283void ResourcePriorityQueue::reserveResources(SUnit *SU) {
284 // If this SU does not fit in the packet
285 // start a new one.
286 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
287 ResourcesModel->clearResources();
288 Packet.clear();
289 }
290
291 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
292 switch (SU->getNode()->getMachineOpcode()) {
293 default:
294 ResourcesModel->reserveResources(MID: &TII->get(
295 Opcode: SU->getNode()->getMachineOpcode()));
296 break;
297 case TargetOpcode::EXTRACT_SUBREG:
298 case TargetOpcode::INSERT_SUBREG:
299 case TargetOpcode::SUBREG_TO_REG:
300 case TargetOpcode::REG_SEQUENCE:
301 case TargetOpcode::IMPLICIT_DEF:
302 break;
303 }
304 Packet.push_back(x: SU);
305 }
306 // Forcefully end packet for PseudoOps.
307 else {
308 ResourcesModel->clearResources();
309 Packet.clear();
310 }
311
312 // If packet is now full, reset the state so in the next cycle
313 // we start fresh.
314 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
315 ResourcesModel->clearResources();
316 Packet.clear();
317 }
318}
319
320int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
321 int RegBalance = 0;
322
323 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
324 return RegBalance;
325
326 // Gen estimate.
327 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
328 MVT VT = SU->getNode()->getSimpleValueType(ResNo: i);
329 if (TLI->isTypeLegal(VT)
330 && TLI->getRegClassFor(VT)
331 && TLI->getRegClassFor(VT)->getID() == RCId)
332 RegBalance += numberRCValSuccInSU(SU, RCId);
333 }
334 // Kill estimate.
335 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
336 const SDValue &Op = SU->getNode()->getOperand(Num: i);
337 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
338 if (isa<ConstantSDNode>(Val: Op.getNode()))
339 continue;
340
341 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
342 && TLI->getRegClassFor(VT)->getID() == RCId)
343 RegBalance -= numberRCValPredInSU(SU, RCId);
344 }
345 return RegBalance;
346}
347
348/// Estimates change in reg pressure from this SU.
349/// It is achieved by trivial tracking of defined
350/// and used vregs in dependent instructions.
351/// The RawPressure flag makes this function to ignore
352/// existing reg file sizes, and report raw def/use
353/// balance.
354int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
355 int RegBalance = 0;
356
357 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
358 return RegBalance;
359
360 if (RawPressure) {
361 for (const TargetRegisterClass *RC : TRI->regclasses())
362 RegBalance += rawRegPressureDelta(SU, RCId: RC->getID());
363 }
364 else {
365 for (const TargetRegisterClass *RC : TRI->regclasses()) {
366 if ((RegPressure[RC->getID()] +
367 rawRegPressureDelta(SU, RCId: RC->getID()) > 0) &&
368 (RegPressure[RC->getID()] +
369 rawRegPressureDelta(SU, RCId: RC->getID()) >= RegLimit[RC->getID()]))
370 RegBalance += rawRegPressureDelta(SU, RCId: RC->getID());
371 }
372 }
373
374 return RegBalance;
375}
376
377// Constants used to denote relative importance of
378// heuristic components for cost computation.
379static const unsigned PriorityOne = 200;
380static const unsigned PriorityTwo = 50;
381static const unsigned PriorityThree = 15;
382static const unsigned PriorityFour = 5;
383static const unsigned ScaleOne = 20;
384static const unsigned ScaleTwo = 10;
385static const unsigned ScaleThree = 5;
386static const unsigned FactorOne = 2;
387
388/// Returns single number reflecting benefit of scheduling SU
389/// in the current cycle.
390int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
391 // Initial trivial priority.
392 int ResCount = 1;
393
394 // Do not waste time on a node that is already scheduled.
395 if (SU->isScheduled)
396 return ResCount;
397
398 // Forced priority is high.
399 if (SU->isScheduleHigh)
400 ResCount += PriorityOne;
401
402 // Adaptable scheduling
403 // A small, but very parallel
404 // region, where reg pressure is an issue.
405 if (HorizontalVerticalBalance > RegPressureThreshold) {
406 // Critical path first
407 ResCount += (SU->getHeight() * ScaleTwo);
408 // If resources are available for it, multiply the
409 // chance of scheduling.
410 if (isResourceAvailable(SU))
411 ResCount <<= FactorOne;
412
413 // Consider change to reg pressure from scheduling
414 // this SU.
415 ResCount -= (regPressureDelta(SU,RawPressure: true) * ScaleOne);
416 }
417 // Default heuristic, greeady and
418 // critical path driven.
419 else {
420 // Critical path first.
421 ResCount += (SU->getHeight() * ScaleTwo);
422 // Now see how many instructions is blocked by this SU.
423 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
424 // If resources are available for it, multiply the
425 // chance of scheduling.
426 if (isResourceAvailable(SU))
427 ResCount <<= FactorOne;
428
429 ResCount -= (regPressureDelta(SU) * ScaleTwo);
430 }
431
432 // These are platform-specific things.
433 // Will need to go into the back end
434 // and accessed from here via a hook.
435 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
436 if (N->isMachineOpcode()) {
437 const MCInstrDesc &TID = TII->get(Opcode: N->getMachineOpcode());
438 if (TID.isCall())
439 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
440 }
441 else
442 switch (N->getOpcode()) {
443 default: break;
444 case ISD::TokenFactor:
445 case ISD::CopyFromReg:
446 case ISD::CopyToReg:
447 ResCount += PriorityFour;
448 break;
449
450 case ISD::INLINEASM:
451 case ISD::INLINEASM_BR:
452 ResCount += PriorityThree;
453 break;
454 }
455 }
456 return ResCount;
457}
458
459
460/// Main resource tracking point.
461void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
462 // Use NULL entry as an event marker to reset
463 // the DFA state.
464 if (!SU) {
465 ResourcesModel->clearResources();
466 Packet.clear();
467 return;
468 }
469
470 const SDNode *ScegN = SU->getNode();
471 // Update reg pressure tracking.
472 // First update current node.
473 if (ScegN->isMachineOpcode()) {
474 // Estimate generated regs.
475 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
476 MVT VT = ScegN->getSimpleValueType(ResNo: i);
477
478 if (TLI->isTypeLegal(VT)) {
479 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
480 if (RC)
481 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RCId: RC->getID());
482 }
483 }
484 // Estimate killed regs.
485 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
486 const SDValue &Op = ScegN->getOperand(Num: i);
487 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
488
489 if (TLI->isTypeLegal(VT)) {
490 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
491 if (RC) {
492 if (RegPressure[RC->getID()] >
493 (numberRCValPredInSU(SU, RCId: RC->getID())))
494 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RCId: RC->getID());
495 else RegPressure[RC->getID()] = 0;
496 }
497 }
498 }
499 for (SDep &Pred : SU->Preds) {
500 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
501 continue;
502 --Pred.getSUnit()->NumRegDefsLeft;
503 }
504 }
505
506 // Reserve resources for this SU.
507 reserveResources(SU);
508
509 // Adjust number of parallel live ranges.
510 // Heuristic is simple - node with no data successors reduces
511 // number of live ranges. All others, increase it.
512 unsigned NumberNonControlDeps = 0;
513
514 for (const SDep &Succ : SU->Succs) {
515 adjustPriorityOfUnscheduledPreds(SU: Succ.getSUnit());
516 if (!Succ.isCtrl())
517 NumberNonControlDeps++;
518 }
519
520 if (!NumberNonControlDeps) {
521 if (ParallelLiveRanges >= SU->NumPreds)
522 ParallelLiveRanges -= SU->NumPreds;
523 else
524 ParallelLiveRanges = 0;
525
526 }
527 else
528 ParallelLiveRanges += SU->NumRegDefsLeft;
529
530 // Track parallel live chains.
531 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
532 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
533}
534
535void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
536 unsigned NodeNumDefs = 0;
537 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
538 if (N->isMachineOpcode()) {
539 const MCInstrDesc &TID = TII->get(Opcode: N->getMachineOpcode());
540 // No register need be allocated for this.
541 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
542 NodeNumDefs = 0;
543 break;
544 }
545 NodeNumDefs = std::min(a: N->getNumValues(), b: TID.getNumDefs());
546 }
547 else
548 switch(N->getOpcode()) {
549 default: break;
550 case ISD::CopyFromReg:
551 NodeNumDefs++;
552 break;
553 case ISD::INLINEASM:
554 case ISD::INLINEASM_BR:
555 NodeNumDefs++;
556 break;
557 }
558
559 SU->NumRegDefsLeft = NodeNumDefs;
560}
561
562/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
563/// scheduled. If SU is not itself available, then there is at least one
564/// predecessor node that has not been scheduled yet. If SU has exactly ONE
565/// unscheduled predecessor, we want to increase its priority: it getting
566/// scheduled will make this node available, so it is better than some other
567/// node of the same priority that will not make a node available.
568void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
569 if (SU->isAvailable) return; // All preds scheduled.
570
571 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
572 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
573 return;
574
575 // Okay, we found a single predecessor that is available, but not scheduled.
576 // Since it is available, it must be in the priority queue. First remove it.
577 remove(SU: OnlyAvailablePred);
578
579 // Reinsert the node into the priority queue, which recomputes its
580 // NumNodesSolelyBlocking value.
581 push(SU: OnlyAvailablePred);
582}
583
584
585/// Main access point - returns next instructions
586/// to be placed in scheduling sequence.
587SUnit *ResourcePriorityQueue::pop() {
588 if (empty())
589 return nullptr;
590
591 std::vector<SUnit *>::iterator Best = Queue.begin();
592 if (!DisableDFASched) {
593 int BestCost = SUSchedulingCost(SU: *Best);
594 for (auto I = std::next(x: Queue.begin()), E = Queue.end(); I != E; ++I) {
595
596 if (SUSchedulingCost(SU: *I) > BestCost) {
597 BestCost = SUSchedulingCost(SU: *I);
598 Best = I;
599 }
600 }
601 }
602 // Use default TD scheduling mechanism.
603 else {
604 for (auto I = std::next(x: Queue.begin()), E = Queue.end(); I != E; ++I)
605 if (Picker(*Best, *I))
606 Best = I;
607 }
608
609 SUnit *V = *Best;
610 if (Best != std::prev(x: Queue.end()))
611 std::swap(a&: *Best, b&: Queue.back());
612
613 Queue.pop_back();
614
615 return V;
616}
617
618
619void ResourcePriorityQueue::remove(SUnit *SU) {
620 assert(!Queue.empty() && "Queue is empty!");
621 std::vector<SUnit *>::iterator I = find(Range&: Queue, Val: SU);
622 if (I != std::prev(x: Queue.end()))
623 std::swap(a&: *I, b&: Queue.back());
624
625 Queue.pop_back();
626}
627