1//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ResourcePriorityQueue class, which is a
10// SchedulingPriorityQueue that prioritizes instructions using DFA state to
11// reduce the length of the critical path through the basic block
12// on VLIW platforms.
13// The scheduler is basically a top-down adaptable list scheduler with DFA
14// resource tracking added to the cost function.
15// DFA is queried as a state machine to model "packets/bundles" during
16// schedule. Currently packets/bundles are discarded at the end of
17// scheduling, affecting only order of instructions.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/CodeGen/ResourcePriorityQueue.h"
22#include "llvm/CodeGen/DFAPacketizer.h"
23#include "llvm/CodeGen/SelectionDAGISel.h"
24#include "llvm/CodeGen/SelectionDAGNodes.h"
25#include "llvm/CodeGen/TargetInstrInfo.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/CodeGen/TargetRegisterInfo.h"
28#include "llvm/CodeGen/TargetSubtargetInfo.h"
29#include "llvm/Support/CommandLine.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "scheduler"
34
35static cl::opt<bool>
36 DisableDFASched("disable-dfa-sched", cl::Hidden,
37 cl::desc("Disable use of DFA during scheduling"));
38
39static cl::opt<int> RegPressureThreshold(
40 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(Val: 5),
41 cl::desc("Track reg pressure and switch priority to in-depth"));
42
43ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44 : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46 TRI = STI.getRegisterInfo();
47 TLI = IS->TLI;
48 TII = STI.getInstrInfo();
49 ResourcesModel.reset(p: TII->CreateTargetScheduleState(STI));
50 // This hard requirement could be relaxed, but for now
51 // do not let it proceed.
52 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53
54 unsigned NumRC = TRI->getNumRegClasses();
55 RegLimit.resize(new_size: NumRC);
56 RegPressure.resize(new_size: NumRC);
57 std::fill(RegLimit.begin(), RegLimit.end(), 0);
58 std::fill(RegPressure.begin(), RegPressure.end(), 0);
59 for (const TargetRegisterClass *RC : TRI->regclasses())
60 RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, MF&: *IS->MF);
61
62 ParallelLiveRanges = 0;
63 HorizontalVerticalBalance = 0;
64}
65
66unsigned
67ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68 unsigned NumberDeps = 0;
69 for (SDep &Pred : SU->Preds) {
70 if (Pred.isCtrl())
71 continue;
72
73 SUnit *PredSU = Pred.getSUnit();
74 const SDNode *ScegN = PredSU->getNode();
75
76 if (!ScegN)
77 continue;
78
79 // If value is passed to CopyToReg, it is probably
80 // live outside BB.
81 switch (ScegN->getOpcode()) {
82 default: break;
83 case ISD::TokenFactor: break;
84 case ISD::CopyFromReg: NumberDeps++; break;
85 case ISD::CopyToReg: break;
86 case ISD::INLINEASM: break;
87 case ISD::INLINEASM_BR: break;
88 }
89 if (!ScegN->isMachineOpcode())
90 continue;
91
92 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93 MVT VT = ScegN->getSimpleValueType(ResNo: i);
94 if (TLI->isTypeLegal(VT)
95 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96 NumberDeps++;
97 break;
98 }
99 }
100 }
101 return NumberDeps;
102}
103
104unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105 unsigned RCId) {
106 unsigned NumberDeps = 0;
107 for (const SDep &Succ : SU->Succs) {
108 if (Succ.isCtrl())
109 continue;
110
111 SUnit *SuccSU = Succ.getSUnit();
112 const SDNode *ScegN = SuccSU->getNode();
113 if (!ScegN)
114 continue;
115
116 // If value is passed to CopyToReg, it is probably
117 // live outside BB.
118 switch (ScegN->getOpcode()) {
119 default: break;
120 case ISD::TokenFactor: break;
121 case ISD::CopyFromReg: break;
122 case ISD::CopyToReg: NumberDeps++; break;
123 case ISD::INLINEASM: break;
124 case ISD::INLINEASM_BR: break;
125 }
126 if (!ScegN->isMachineOpcode())
127 continue;
128
129 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130 const SDValue &Op = ScegN->getOperand(Num: i);
131 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
132 if (TLI->isTypeLegal(VT)
133 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134 NumberDeps++;
135 break;
136 }
137 }
138 }
139 return NumberDeps;
140}
141
142static unsigned numberCtrlDepsInSU(SUnit *SU) {
143 unsigned NumberDeps = 0;
144 for (const SDep &Succ : SU->Succs)
145 if (Succ.isCtrl())
146 NumberDeps++;
147
148 return NumberDeps;
149}
150
151static unsigned numberCtrlPredInSU(SUnit *SU) {
152 unsigned NumberDeps = 0;
153 for (SDep &Pred : SU->Preds)
154 if (Pred.isCtrl())
155 NumberDeps++;
156
157 return NumberDeps;
158}
159
160///
161/// Initialize nodes.
162///
163void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164 SUnits = &sunits;
165 NumNodesSolelyBlocking.resize(new_size: SUnits->size(), x: 0);
166
167 for (SUnit &SU : *SUnits) {
168 initNumRegDefsLeft(SU: &SU);
169 SU.NodeQueueId = 0;
170 }
171}
172
173/// This heuristic is used if DFA scheduling is not desired
174/// for some VLIW platform.
175bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
176 // The isScheduleHigh flag allows nodes with wraparound dependencies that
177 // cannot easily be modeled as edges with latencies to be scheduled as
178 // soon as possible in a top-down schedule.
179 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
180 return false;
181
182 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
183 return true;
184
185 unsigned LHSNum = LHS->NodeNum;
186 unsigned RHSNum = RHS->NodeNum;
187
188 // The most important heuristic is scheduling the critical path.
189 unsigned LHSLatency = PQ->getLatency(NodeNum: LHSNum);
190 unsigned RHSLatency = PQ->getLatency(NodeNum: RHSNum);
191 if (LHSLatency < RHSLatency) return true;
192 if (LHSLatency > RHSLatency) return false;
193
194 // After that, if two nodes have identical latencies, look to see if one will
195 // unblock more other nodes than the other.
196 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(NodeNum: LHSNum);
197 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(NodeNum: RHSNum);
198 if (LHSBlocked < RHSBlocked) return true;
199 if (LHSBlocked > RHSBlocked) return false;
200
201 // Finally, just to provide a stable ordering, use the node number as a
202 // deciding factor.
203 return LHSNum < RHSNum;
204}
205
206
207/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
208/// of SU, return it, otherwise return null.
209SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
210 SUnit *OnlyAvailablePred = nullptr;
211 for (const SDep &Pred : SU->Preds) {
212 SUnit &PredSU = *Pred.getSUnit();
213 if (!PredSU.isScheduled) {
214 // We found an available, but not scheduled, predecessor. If it's the
215 // only one we have found, keep track of it... otherwise give up.
216 if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
217 return nullptr;
218 OnlyAvailablePred = &PredSU;
219 }
220 }
221 return OnlyAvailablePred;
222}
223
224void ResourcePriorityQueue::push(SUnit *SU) {
225 // Look at all of the successors of this node. Count the number of nodes that
226 // this node is the sole unscheduled node for.
227 unsigned NumNodesBlocking = 0;
228 for (const SDep &Succ : SU->Succs)
229 if (getSingleUnscheduledPred(SU: Succ.getSUnit()) == SU)
230 ++NumNodesBlocking;
231
232 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
233 Queue.push_back(x: SU);
234}
235
236/// Check if scheduling of this SU is possible
237/// in the current packet.
238bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
239 if (!SU || !SU->getNode())
240 return false;
241
242 // If this is a compound instruction,
243 // it is likely to be a call. Do not delay it.
244 if (SU->getNode()->getGluedNode())
245 return true;
246
247 // First see if the pipeline could receive this instruction
248 // in the current cycle.
249 if (SU->getNode()->isMachineOpcode())
250 switch (SU->getNode()->getMachineOpcode()) {
251 default:
252 if (!ResourcesModel->canReserveResources(MID: &TII->get(
253 Opcode: SU->getNode()->getMachineOpcode())))
254 return false;
255 break;
256 case TargetOpcode::EXTRACT_SUBREG:
257 case TargetOpcode::INSERT_SUBREG:
258 case TargetOpcode::SUBREG_TO_REG:
259 case TargetOpcode::REG_SEQUENCE:
260 case TargetOpcode::IMPLICIT_DEF:
261 break;
262 }
263
264 // Now see if there are no other dependencies
265 // to instructions already in the packet.
266 for (const SUnit *S : Packet)
267 for (const SDep &Succ : S->Succs) {
268 // Since we do not add pseudos to packets, might as well
269 // ignore order deps.
270 if (Succ.isCtrl())
271 continue;
272
273 if (Succ.getSUnit() == SU)
274 return false;
275 }
276
277 return true;
278}
279
280/// Keep track of available resources.
281void ResourcePriorityQueue::reserveResources(SUnit *SU) {
282 // If this SU does not fit in the packet
283 // start a new one.
284 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
285 ResourcesModel->clearResources();
286 Packet.clear();
287 }
288
289 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
290 switch (SU->getNode()->getMachineOpcode()) {
291 default:
292 ResourcesModel->reserveResources(MID: &TII->get(
293 Opcode: SU->getNode()->getMachineOpcode()));
294 break;
295 case TargetOpcode::EXTRACT_SUBREG:
296 case TargetOpcode::INSERT_SUBREG:
297 case TargetOpcode::SUBREG_TO_REG:
298 case TargetOpcode::REG_SEQUENCE:
299 case TargetOpcode::IMPLICIT_DEF:
300 break;
301 }
302 Packet.push_back(x: SU);
303 }
304 // Forcefully end packet for PseudoOps.
305 else {
306 ResourcesModel->clearResources();
307 Packet.clear();
308 }
309
310 // If packet is now full, reset the state so in the next cycle
311 // we start fresh.
312 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
313 ResourcesModel->clearResources();
314 Packet.clear();
315 }
316}
317
318int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
319 int RegBalance = 0;
320
321 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
322 return RegBalance;
323
324 // Gen estimate.
325 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
326 MVT VT = SU->getNode()->getSimpleValueType(ResNo: i);
327 if (TLI->isTypeLegal(VT)
328 && TLI->getRegClassFor(VT)
329 && TLI->getRegClassFor(VT)->getID() == RCId)
330 RegBalance += numberRCValSuccInSU(SU, RCId);
331 }
332 // Kill estimate.
333 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
334 const SDValue &Op = SU->getNode()->getOperand(Num: i);
335 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
336 if (isa<ConstantSDNode>(Val: Op.getNode()))
337 continue;
338
339 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
340 && TLI->getRegClassFor(VT)->getID() == RCId)
341 RegBalance -= numberRCValPredInSU(SU, RCId);
342 }
343 return RegBalance;
344}
345
346/// Estimates change in reg pressure from this SU.
347/// It is achieved by trivial tracking of defined
348/// and used vregs in dependent instructions.
349/// The RawPressure flag makes this function to ignore
350/// existing reg file sizes, and report raw def/use
351/// balance.
352int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
353 int RegBalance = 0;
354
355 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
356 return RegBalance;
357
358 if (RawPressure) {
359 for (const TargetRegisterClass *RC : TRI->regclasses())
360 RegBalance += rawRegPressureDelta(SU, RCId: RC->getID());
361 }
362 else {
363 for (const TargetRegisterClass *RC : TRI->regclasses()) {
364 if ((RegPressure[RC->getID()] +
365 rawRegPressureDelta(SU, RCId: RC->getID()) > 0) &&
366 (RegPressure[RC->getID()] +
367 rawRegPressureDelta(SU, RCId: RC->getID()) >= RegLimit[RC->getID()]))
368 RegBalance += rawRegPressureDelta(SU, RCId: RC->getID());
369 }
370 }
371
372 return RegBalance;
373}
374
375// Constants used to denote relative importance of
376// heuristic components for cost computation.
377static const unsigned PriorityOne = 200;
378static const unsigned PriorityTwo = 50;
379static const unsigned PriorityThree = 15;
380static const unsigned PriorityFour = 5;
381static const unsigned ScaleOne = 20;
382static const unsigned ScaleTwo = 10;
383static const unsigned ScaleThree = 5;
384static const unsigned FactorOne = 2;
385
386/// Returns single number reflecting benefit of scheduling SU
387/// in the current cycle.
388int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
389 // Initial trivial priority.
390 int ResCount = 1;
391
392 // Do not waste time on a node that is already scheduled.
393 if (SU->isScheduled)
394 return ResCount;
395
396 // Forced priority is high.
397 if (SU->isScheduleHigh)
398 ResCount += PriorityOne;
399
400 // Adaptable scheduling
401 // A small, but very parallel
402 // region, where reg pressure is an issue.
403 if (HorizontalVerticalBalance > RegPressureThreshold) {
404 // Critical path first
405 ResCount += (SU->getHeight() * ScaleTwo);
406 // If resources are available for it, multiply the
407 // chance of scheduling.
408 if (isResourceAvailable(SU))
409 ResCount <<= FactorOne;
410
411 // Consider change to reg pressure from scheduling
412 // this SU.
413 ResCount -= (regPressureDelta(SU,RawPressure: true) * ScaleOne);
414 }
415 // Default heuristic, greeady and
416 // critical path driven.
417 else {
418 // Critical path first.
419 ResCount += (SU->getHeight() * ScaleTwo);
420 // Now see how many instructions is blocked by this SU.
421 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
422 // If resources are available for it, multiply the
423 // chance of scheduling.
424 if (isResourceAvailable(SU))
425 ResCount <<= FactorOne;
426
427 ResCount -= (regPressureDelta(SU) * ScaleTwo);
428 }
429
430 // These are platform-specific things.
431 // Will need to go into the back end
432 // and accessed from here via a hook.
433 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
434 if (N->isMachineOpcode()) {
435 const MCInstrDesc &TID = TII->get(Opcode: N->getMachineOpcode());
436 if (TID.isCall())
437 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
438 }
439 else
440 switch (N->getOpcode()) {
441 default: break;
442 case ISD::TokenFactor:
443 case ISD::CopyFromReg:
444 case ISD::CopyToReg:
445 ResCount += PriorityFour;
446 break;
447
448 case ISD::INLINEASM:
449 case ISD::INLINEASM_BR:
450 ResCount += PriorityThree;
451 break;
452 }
453 }
454 return ResCount;
455}
456
457
458/// Main resource tracking point.
459void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
460 // Use NULL entry as an event marker to reset
461 // the DFA state.
462 if (!SU) {
463 ResourcesModel->clearResources();
464 Packet.clear();
465 return;
466 }
467
468 const SDNode *ScegN = SU->getNode();
469 // Update reg pressure tracking.
470 // First update current node.
471 if (ScegN->isMachineOpcode()) {
472 // Estimate generated regs.
473 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
474 MVT VT = ScegN->getSimpleValueType(ResNo: i);
475
476 if (TLI->isTypeLegal(VT)) {
477 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
478 if (RC)
479 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RCId: RC->getID());
480 }
481 }
482 // Estimate killed regs.
483 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
484 const SDValue &Op = ScegN->getOperand(Num: i);
485 MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
486
487 if (TLI->isTypeLegal(VT)) {
488 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
489 if (RC) {
490 if (RegPressure[RC->getID()] >
491 (numberRCValPredInSU(SU, RCId: RC->getID())))
492 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RCId: RC->getID());
493 else RegPressure[RC->getID()] = 0;
494 }
495 }
496 }
497 for (SDep &Pred : SU->Preds) {
498 if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
499 continue;
500 --Pred.getSUnit()->NumRegDefsLeft;
501 }
502 }
503
504 // Reserve resources for this SU.
505 reserveResources(SU);
506
507 // Adjust number of parallel live ranges.
508 // Heuristic is simple - node with no data successors reduces
509 // number of live ranges. All others, increase it.
510 unsigned NumberNonControlDeps = 0;
511
512 for (const SDep &Succ : SU->Succs) {
513 adjustPriorityOfUnscheduledPreds(SU: Succ.getSUnit());
514 if (!Succ.isCtrl())
515 NumberNonControlDeps++;
516 }
517
518 if (!NumberNonControlDeps) {
519 if (ParallelLiveRanges >= SU->NumPreds)
520 ParallelLiveRanges -= SU->NumPreds;
521 else
522 ParallelLiveRanges = 0;
523
524 }
525 else
526 ParallelLiveRanges += SU->NumRegDefsLeft;
527
528 // Track parallel live chains.
529 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
530 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
531}
532
533void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
534 unsigned NodeNumDefs = 0;
535 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
536 if (N->isMachineOpcode()) {
537 const MCInstrDesc &TID = TII->get(Opcode: N->getMachineOpcode());
538 // No register need be allocated for this.
539 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
540 NodeNumDefs = 0;
541 break;
542 }
543 NodeNumDefs = std::min(a: N->getNumValues(), b: TID.getNumDefs());
544 }
545 else
546 switch(N->getOpcode()) {
547 default: break;
548 case ISD::CopyFromReg:
549 NodeNumDefs++;
550 break;
551 case ISD::INLINEASM:
552 case ISD::INLINEASM_BR:
553 NodeNumDefs++;
554 break;
555 }
556
557 SU->NumRegDefsLeft = NodeNumDefs;
558}
559
560/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
561/// scheduled. If SU is not itself available, then there is at least one
562/// predecessor node that has not been scheduled yet. If SU has exactly ONE
563/// unscheduled predecessor, we want to increase its priority: it getting
564/// scheduled will make this node available, so it is better than some other
565/// node of the same priority that will not make a node available.
566void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
567 if (SU->isAvailable) return; // All preds scheduled.
568
569 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
570 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
571 return;
572
573 // Okay, we found a single predecessor that is available, but not scheduled.
574 // Since it is available, it must be in the priority queue. First remove it.
575 remove(SU: OnlyAvailablePred);
576
577 // Reinsert the node into the priority queue, which recomputes its
578 // NumNodesSolelyBlocking value.
579 push(SU: OnlyAvailablePred);
580}
581
582
583/// Main access point - returns next instructions
584/// to be placed in scheduling sequence.
585SUnit *ResourcePriorityQueue::pop() {
586 if (empty())
587 return nullptr;
588
589 std::vector<SUnit *>::iterator Best = Queue.begin();
590 if (!DisableDFASched) {
591 int BestCost = SUSchedulingCost(SU: *Best);
592 for (auto I = std::next(x: Queue.begin()), E = Queue.end(); I != E; ++I) {
593
594 if (SUSchedulingCost(SU: *I) > BestCost) {
595 BestCost = SUSchedulingCost(SU: *I);
596 Best = I;
597 }
598 }
599 }
600 // Use default TD scheduling mechanism.
601 else {
602 for (auto I = std::next(x: Queue.begin()), E = Queue.end(); I != E; ++I)
603 if (Picker(*Best, *I))
604 Best = I;
605 }
606
607 SUnit *V = *Best;
608 if (Best != std::prev(x: Queue.end()))
609 std::swap(a&: *Best, b&: Queue.back());
610
611 Queue.pop_back();
612
613 return V;
614}
615
616
617void ResourcePriorityQueue::remove(SUnit *SU) {
618 assert(!Queue.empty() && "Queue is empty!");
619 std::vector<SUnit *>::iterator I = find(Range&: Queue, Val: SU);
620 if (I != std::prev(x: Queue.end()))
621 std::swap(a&: *I, b&: Queue.back());
622
623 Queue.pop_back();
624}
625