ScheduleDAGRRList.cpp source code [llvm_projects/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp]

1	//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements bottom-up and top-down register pressure reduction list
10	// schedulers, using standard algorithms. The basic approach uses a priority
11	// queue of available nodes to schedule. One at a time, nodes are taken from
12	// the priority queue (thus in priority order), checked for legality to
13	// schedule, and emitted if legal.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#include "ScheduleDAGSDNodes.h"
18	#include "llvm/ADT/ArrayRef.h"
19	#include "llvm/ADT/DenseMap.h"
20	#include "llvm/ADT/STLExtras.h"
21	#include "llvm/ADT/SmallSet.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/ADT/Statistic.h"
24	#include "llvm/CodeGen/ISDOpcodes.h"
25	#include "llvm/CodeGen/MachineFunction.h"
26	#include "llvm/CodeGen/MachineOperand.h"
27	#include "llvm/CodeGen/Register.h"
28	#include "llvm/CodeGen/ScheduleDAG.h"
29	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
30	#include "llvm/CodeGen/SchedulerRegistry.h"
31	#include "llvm/CodeGen/SelectionDAGISel.h"
32	#include "llvm/CodeGen/SelectionDAGNodes.h"
33	#include "llvm/CodeGen/TargetInstrInfo.h"
34	#include "llvm/CodeGen/TargetLowering.h"
35	#include "llvm/CodeGen/TargetOpcodes.h"
36	#include "llvm/CodeGen/TargetRegisterInfo.h"
37	#include "llvm/CodeGen/TargetSubtargetInfo.h"
38	#include "llvm/CodeGenTypes/MachineValueType.h"
39	#include "llvm/Config/llvm-config.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/MC/MCInstrDesc.h"
42	#include "llvm/MC/MCRegisterInfo.h"
43	#include "llvm/Support/Casting.h"
44	#include "llvm/Support/CodeGen.h"
45	#include "llvm/Support/CommandLine.h"
46	#include "llvm/Support/Compiler.h"
47	#include "llvm/Support/Debug.h"
48	#include "llvm/Support/ErrorHandling.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <algorithm>
51	#include <cassert>
52	#include <cstdint>
53	#include <cstdlib>
54	#include <iterator>
55	#include <limits>
56	#include <memory>
57	#include <utility>
58	#include <vector>
59
60	using namespace llvm;
61
62	#define DEBUG_TYPE "pre-RA-sched"
63
64	STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65	STATISTIC(NumUnfolds, "Number of nodes unfolded");
66	STATISTIC(NumDups, "Number of duplicated nodes");
67	STATISTIC(NumPRCopies, "Number of physical register copies");
68
69	static RegisterScheduler
70	burrListDAGScheduler("list-burr",
71	"Bottom-up register reduction list scheduling",
72	createBURRListDAGScheduler);
73
74	static RegisterScheduler
75	sourceListDAGScheduler("source",
76	"Similar to list-burr but schedules in source "
77	"order when possible",
78	createSourceListDAGScheduler);
79
80	static RegisterScheduler
81	hybridListDAGScheduler("list-hybrid",
82	"Bottom-up register pressure aware list scheduling "
83	"which tries to balance latency and register pressure",
84	createHybridListDAGScheduler);
85
86	static RegisterScheduler
87	ILPListDAGScheduler("list-ilp",
88	"Bottom-up register pressure aware list scheduling "
89	"which tries to balance ILP and register pressure",
90	createILPListDAGScheduler);
91
92	static cl::opt<bool> DisableSchedCycles(
93	"disable-sched-cycles", cl::Hidden, cl::init(Val: false),
94	cl::desc ("Disable cycle-level precision during preRA scheduling"));
95
96	// Temporary sched=list-ilp flags until the heuristics are robust.
97	// Some options are also available under sched=list-hybrid.
98	static cl::opt<bool> DisableSchedRegPressure(
99	"disable-sched-reg-pressure", cl::Hidden, cl::init(Val: false),
100	cl::desc ("Disable regpressure priority in sched=list-ilp"));
101	static cl::opt<bool> DisableSchedLiveUses(
102	"disable-sched-live-uses", cl::Hidden, cl::init(Val: true),
103	cl::desc ("Disable live use priority in sched=list-ilp"));
104	static cl::opt<bool> DisableSchedVRegCycle(
105	"disable-sched-vrcycle", cl::Hidden, cl::init(Val: false),
106	cl::desc ("Disable virtual register cycle interference checks"));
107	static cl::opt<bool> DisableSchedPhysRegJoin(
108	"disable-sched-physreg-join", cl::Hidden, cl::init(Val: false),
109	cl::desc ("Disable physreg def-use affinity"));
110	static cl::opt<bool> DisableSchedStalls(
111	"disable-sched-stalls", cl::Hidden, cl::init(Val: true),
112	cl::desc ("Disable no-stall priority in sched=list-ilp"));
113	static cl::opt<bool> DisableSchedCriticalPath(
114	"disable-sched-critical-path", cl::Hidden, cl::init(Val: false),
115	cl::desc ("Disable critical path priority in sched=list-ilp"));
116	static cl::opt<bool> DisableSchedHeight(
117	"disable-sched-height", cl::Hidden, cl::init(Val: false),
118	cl::desc ("Disable scheduled-height priority in sched=list-ilp"));
119	static cl::opt<bool> Disable2AddrHack(
120	"disable-2addr-hack", cl::Hidden, cl::init(Val: true),
121	cl::desc ("Disable scheduler's two-address hack"));
122
123	static cl::opt<int> MaxReorderWindow(
124	"max-sched-reorder", cl::Hidden, cl::init(Val: `6`),
125	cl::desc ("Number of instructions to allow ahead of the critical path "
126	"in sched=list-ilp"));
127
128	static cl::opt<unsigned> AvgIPC(
129	"sched-avg-ipc", cl::Hidden, cl::init(Val: `1`),
130	cl::desc ("Average inst/cycle whan no target itinerary exists."));
131
132	namespace {
133
134	//===----------------------------------------------------------------------===//
135	/// ScheduleDAGRRList - The actual register reduction list scheduler
136	/// implementation. This supports both top-down and bottom-up scheduling.
137	///
138	class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139	private:
140	/// NeedLatency - True if the scheduler will make use of latency information.
141	bool NeedLatency;
142
143	/// AvailableQueue - The priority queue to use for the available SUnits.
144	SchedulingPriorityQueue *AvailableQueue;
145
146	/// PendingQueue - This contains all of the instructions whose operands have
147	/// been issued, but their results are not ready yet (due to the latency of
148	/// the operation). Once the operands becomes available, the instruction is
149	/// added to the AvailableQueue.
150	std::vector<SUnit *> PendingQueue;
151
152	/// HazardRec - The hazard recognizer to use.
153	ScheduleHazardRecognizer *HazardRec;
154
155	/// CurCycle - The current scheduler state corresponds to this cycle.
156	unsigned CurCycle = `0`;
157
158	/// MinAvailableCycle - Cycle of the soonest available instruction.
159	unsigned MinAvailableCycle = ~`0u`;
160
161	/// IssueCount - Count instructions issued in this cycle
162	/// Currently valid only for bottom-up scheduling.
163	unsigned IssueCount = `0u`;
164
165	/// LiveRegDefs - A set of physical registers and their definition
166	/// that are "live". These nodes must be scheduled before any other nodes that
167	/// modifies the registers can be scheduled.
168	unsigned NumLiveRegs = `0u`;
169	std::unique_ptr<SUnit*[]> LiveRegDefs;
170	std::unique_ptr<SUnit*[]> LiveRegGens;
171
172	// Collect interferences between physical register use/defs.
173	// Each interference is an SUnit and set of physical registers.
174	SmallVector<SUnit*, `4`> Interferences;
175
176	using LRegsMapT = DenseMap<SUnit , SmallVector<unsigned*, `4`>>;
177
178	LRegsMapT LRegsMap;
179
180	/// Topo - A topological ordering for SUnits which permits fast IsReachable
181	/// and similar queries.
182	ScheduleDAGTopologicalSort Topo;
183
184	// Hack to keep track of the inverse of FindCallSeqStart without more crazy
185	// DAG crawling.
186	DenseMap<SUnit, SUnit> CallSeqEndForStart;
187
188	public:
189	ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190	SchedulingPriorityQueue *availqueue,
191	CodeGenOptLevel OptLevel)
192	: ScheduleDAGSDNodes (mf), NeedLatency(needlatency),
193	AvailableQueue(availqueue), Topo (SUnits, nullptr) {
194	const TargetSubtargetInfo &STI = mf.getSubtarget();
195	if (DisableSchedCycles \|\| !NeedLatency)
196	HazardRec = new ScheduleHazardRecognizer ();
197	else
198	HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(STI: &STI, DAG: this);
199	}
200
201	~ScheduleDAGRRList() override {
202	delete HazardRec;
203	delete AvailableQueue;
204	}
205
206	void Schedule() override;
207
208	ScheduleHazardRecognizer getHazardRec() { return* HazardRec; }
209
210	/// IsReachable - Checks if SU is reachable from TargetSU.
211	bool IsReachable(const SUnit SU, const* SUnit *TargetSU) {
212	return Topo.IsReachable(SU, TargetSU);
213	}
214
215	/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216	/// create a cycle.
217	bool WillCreateCycle(SUnit SU, SUnit TargetSU) {
218	return Topo.WillCreateCycle(TargetSU: SU, SU: TargetSU);
219	}
220
221	/// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222	/// This returns true if this is a new predecessor.
223	/// Does NOT* update the topological ordering! It just queues an update.*
224	void AddPredQueued(SUnit SU, const* SDep &D) {
225	Topo.AddPredQueued(Y: SU, X: D.getSUnit());
226	SU->addPred(D);
227	}
228
229	/// AddPred - adds a predecessor edge to SUnit SU.
230	/// This returns true if this is a new predecessor.
231	/// Updates the topological ordering if required.
232	void AddPred(SUnit SU, const* SDep &D) {
233	Topo.AddPred(Y: SU, X: D.getSUnit());
234	SU->addPred(D);
235	}
236
237	/// RemovePred - removes a predecessor edge from SUnit SU.
238	/// This returns true if an edge was removed.
239	/// Updates the topological ordering if required.
240	void RemovePred(SUnit SU, const* SDep &D) {
241	Topo.RemovePred(M: SU, N: D.getSUnit());
242	SU->removePred(D);
243	}
244
245	private:
246	bool isReady(SUnit *SU) {
247	return DisableSchedCycles \|\| !AvailableQueue->hasReadyFilter() \|\|
248	AvailableQueue->isReady(SU);
249	}
250
251	void ReleasePred(SUnit SU, const* SDep *PredEdge);
252	void ReleasePredecessors(SUnit *SU);
253	void ReleasePending();
254	void AdvanceToCycle(unsigned NextCycle);
255	void AdvancePastStalls(SUnit *SU);
256	void EmitNode(SUnit *SU);
257	void ScheduleNodeBottomUp(SUnit*);
258	void CapturePred(SDep *PredEdge);
259	void UnscheduleNodeBottomUp(SUnit*);
260	void RestoreHazardCheckerBottomUp();
261	void BacktrackBottomUp(SUnit, SUnit);
262	SUnit TryUnfoldSU(SUnit );
263	SUnit CopyAndMoveSuccessors(SUnit);
264	void InsertCopiesAndMoveSuccs(SUnit, unsigned*,
265	const TargetRegisterClass*,
266	const TargetRegisterClass*,
267	SmallVectorImpl<SUnit*>&);
268	bool DelayForLiveRegsBottomUp(SUnit, SmallVectorImpl<unsigned*>&);
269
270	void releaseInterferences(unsigned Reg = `0`);
271
272	SUnit *PickNodeToScheduleBottomUp();
273	void ListScheduleBottomUp();
274
275	/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276	SUnit CreateNewSUnit(SDNode N) {
277	unsigned NumSUnits = SUnits.size();
278	SUnit *NewNode = newSUnit(N);
279	// Update the topological ordering.
280	if (NewNode->NodeNum >= NumSUnits)
281	Topo.AddSUnitWithoutPredecessors(SU: NewNode);
282	return NewNode;
283	}
284
285	/// CreateClone - Creates a new SUnit from an existing one.
286	SUnit CreateClone(SUnit N) {
287	unsigned NumSUnits = SUnits.size();
288	SUnit *NewNode = Clone(Old: N);
289	// Update the topological ordering.
290	if (NewNode->NodeNum >= NumSUnits)
291	Topo.AddSUnitWithoutPredecessors(SU: NewNode);
292	return NewNode;
293	}
294
295	/// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296	/// need actual latency information but the hybrid scheduler does.
297	bool forceUnitLatencies() const override {
298	return !NeedLatency;
299	}
300	};
301
302	} // end anonymous namespace
303
304	static constexpr unsigned RegSequenceCost = `1`;
305
306	/// GetCostForDef - Looks up the register class and cost for a given definition.
307	/// Typically this just means looking up the representative register class,
308	/// but for untyped values (MVT::Untyped) it means inspecting the node's
309	/// opcode to determine what register class is being generated.
310	static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
311	const TargetLowering *TLI,
312	const TargetInstrInfo *TII,
313	const TargetRegisterInfo *TRI,
314	unsigned &RegClass, unsigned &Cost,
315	const MachineFunction &MF) {
316	MVT VT = RegDefPos.GetValue();
317
318	// Special handling for untyped values. These values can only come from
319	// the expansion of custom DAG-to-DAG patterns.
320	if (VT == MVT::Untyped) {
321	const SDNode *Node = RegDefPos.GetNode();
322
323	// Special handling for CopyFromReg of untyped values.
324	if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325	Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: `1`))->getReg();
326	const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
327	RegClass = RC->getID();
328	Cost = `1`;
329	return;
330	}
331
332	unsigned Opcode = Node->getMachineOpcode();
333	if (Opcode == TargetOpcode::REG_SEQUENCE) {
334	unsigned DstRCIdx = Node->getConstantOperandVal(Num: `0`);
335	const TargetRegisterClass *RC = TRI->getRegClass(i: DstRCIdx);
336	RegClass = RC->getID();
337	Cost = RegSequenceCost;
338	return;
339	}
340
341	unsigned Idx = RegDefPos.GetIdx();
342	const MCInstrDesc &Desc = TII->get(Opcode);
343	const TargetRegisterClass *RC = TII->getRegClass(MCID: Desc, OpNum: Idx, TRI, MF);
344	assert(RC && "Not a valid register class");
345	RegClass = RC->getID();
346	// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347	// better way to determine it.
348	Cost = `1`;
349	} else {
350	RegClass = TLI->getRepRegClassFor(VT)->getID();
351	Cost = TLI->getRepRegClassCostFor(VT);
352	}
353	}
354
355	/// Schedule - Schedule the DAG using list scheduling.
356	void ScheduleDAGRRList::Schedule() {
357	LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358	<< " '" << BB->getName() << "' **********\n");
359
360	CurCycle = `0`;
361	IssueCount = `0`;
362	MinAvailableCycle =
363	DisableSchedCycles ? `0` : std::numeric_limits<unsigned>::max();
364	NumLiveRegs = `0`;
365	// Allocate slots for each physical register, plus one for a special register
366	// to track the virtual resource of a calling sequence.
367	LiveRegDefs.reset(p: new SUnit*[TRI->getNumRegs() + `1`]());
368	LiveRegGens.reset(p: new SUnit*[TRI->getNumRegs() + `1`]());
369	CallSeqEndForStart.clear();
370	assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372	// Build the scheduling graph.
373	BuildSchedGraph(AA: nullptr);
374
375	LLVM_DEBUG(dump());
376	Topo.MarkDirty();
377
378	AvailableQueue->initNodes(SUnits);
379
380	HazardRec->Reset();
381
382	// Execute the actual scheduling loop.
383	ListScheduleBottomUp();
384
385	AvailableQueue->releaseState();
386
387	LLVM_DEBUG({
388	dbgs() << "* Final schedule *\n";
389	dumpSchedule();
390	dbgs() << `'\n'`;
391	});
392	}
393
394	//===----------------------------------------------------------------------===//
395	// Bottom-Up Scheduling
396	//===----------------------------------------------------------------------===//
397
398	/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399	/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400	void ScheduleDAGRRList::ReleasePred(SUnit SU, const* SDep *PredEdge) {
401	SUnit *PredSU = PredEdge->getSUnit();
402
403	#ifndef NDEBUG
404	if (PredSU->NumSuccsLeft == `0`) {
405	dbgs() << "* Scheduling failed! *\n";
406	dumpNode(*PredSU);
407	dbgs() << " has been released too many times!\n";
408	llvm_unreachable(nullptr);
409	}
410	#endif
411	--PredSU->NumSuccsLeft;
412
413	if (!forceUnitLatencies()) {
414	// Updating predecessor's height. This is now the cycle when the
415	// predecessor can be scheduled without causing a pipeline stall.
416	PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417	}
418
419	// If all the node's successors are scheduled, this node is ready
420	// to be scheduled. Ignore the special EntrySU node.
421	if (PredSU->NumSuccsLeft == `0` && PredSU != &EntrySU) {
422	PredSU->isAvailable = true;
423
424	unsigned Height = PredSU->getHeight();
425	if (Height < MinAvailableCycle)
426	MinAvailableCycle = Height;
427
428	if (isReady(SU: PredSU)) {
429	AvailableQueue->push(U: PredSU);
430	}
431	// CapturePred and others may have left the node in the pending queue, avoid
432	// adding it twice.
433	else if (!PredSU->isPending) {
434	PredSU->isPending = true;
435	PendingQueue.push_back(x: PredSU);
436	}
437	}
438	}
439
440	/// IsChainDependent - Test if Outer is reachable from Inner through
441	/// chain dependencies.
442	static bool IsChainDependent(SDNode Outer, SDNode Inner,
443	unsigned NestLevel,
444	const TargetInstrInfo *TII) {
445	SDNode *N = Outer;
446	while (true) {
447	if (N == Inner)
448	return true;
449	// For a TokenFactor, examine each operand. There may be multiple ways
450	// to get to the CALLSEQ_BEGIN, but we need to find the path with the
451	// most nesting in order to ensure that we find the corresponding match.
452	if (N->getOpcode() == ISD::TokenFactor) {
453	for (const SDValue &Op : N->op_values())
454	if (IsChainDependent(Outer: Op.getNode(), Inner, NestLevel, TII))
455	return true;
456	return false;
457	}
458	// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459	if (N->isMachineOpcode()) {
460	if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461	++NestLevel;
462	} else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463	if (NestLevel == `0`)
464	return false;
465	--NestLevel;
466	}
467	}
468	// Otherwise, find the chain and continue climbing.
469	for (const SDValue &Op : N->op_values())
470	if (Op.getValueType() == MVT::Other) {
471	N = Op.getNode();
472	goto found_chain_operand;
473	}
474	return false;
475	found_chain_operand:;
476	if (N->getOpcode() == ISD::EntryToken)
477	return false;
478	}
479	}
480
481	/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482	/// the corresponding (lowered) CALLSEQ_BEGIN node.
483	///
484	/// NestLevel and MaxNested are used in recursion to indcate the current level
485	/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486	/// level seen so far.
487	///
488	/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489	/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490	static SDNode *
491	FindCallSeqStart(SDNode N, unsigned* &NestLevel, unsigned &MaxNest,
492	const TargetInstrInfo *TII) {
493	while (true) {
494	// For a TokenFactor, examine each operand. There may be multiple ways
495	// to get to the CALLSEQ_BEGIN, but we need to find the path with the
496	// most nesting in order to ensure that we find the corresponding match.
497	if (N->getOpcode() == ISD::TokenFactor) {
498	SDNode Best = nullptr*;
499	unsigned BestMaxNest = MaxNest;
500	for (const SDValue &Op : N->op_values()) {
501	unsigned MyNestLevel = NestLevel;
502	unsigned MyMaxNest = MaxNest;
503	if (SDNode *New = FindCallSeqStart(N: Op.getNode(),
504	NestLevel&: MyNestLevel, MaxNest&: MyMaxNest, TII))
505	if (!Best \|\| (MyMaxNest > BestMaxNest)) {
506	Best = New;
507	BestMaxNest = MyMaxNest;
508	}
509	}
510	assert(Best);
511	MaxNest = BestMaxNest;
512	return Best;
513	}
514	// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515	if (N->isMachineOpcode()) {
516	if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517	++NestLevel;
518	MaxNest = std::max(a: MaxNest, b: NestLevel);
519	} else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520	assert(NestLevel != `0`);
521	--NestLevel;
522	if (NestLevel == `0`)
523	return N;
524	}
525	}
526	// Otherwise, find the chain and continue climbing.
527	for (const SDValue &Op : N->op_values())
528	if (Op.getValueType() == MVT::Other) {
529	N = Op.getNode();
530	goto found_chain_operand;
531	}
532	return nullptr;
533	found_chain_operand:;
534	if (N->getOpcode() == ISD::EntryToken)
535	return nullptr;
536	}
537	}
538
539	/// Call ReleasePred for each predecessor, then update register live def/gen.
540	/// Always update LiveRegDefs for a register dependence even if the current SU
541	/// also defines the register. This effectively create one large live range
542	/// across a sequence of two-address node. This is important because the
543	/// entire chain must be scheduled together. Example:
544	///
545	/// flags = (3) add
546	/// flags = (2) addc flags
547	/// flags = (1) addc flags
548	///
549	/// results in
550	///
551	/// LiveRegDefs[flags] = 3
552	/// LiveRegGens[flags] = 1
553	///
554	/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555	/// interference on flags.
556	void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557	// Bottom up: release predecessors
558	for (SDep &Pred : SU->Preds) {
559	ReleasePred(SU, PredEdge: &Pred);
560	if (Pred.isAssignedRegDep()) {
561	// This is a physical register dependency and it's impossible or
562	// expensive to copy the register. Make sure nothing that can
563	// clobber the register is scheduled between the predecessor and
564	// this node.
565	SUnit RegDef = LiveRegDefs [Pred.getReg()]; (void*)RegDef;
566	assert((!RegDef \|\| RegDef == SU \|\| RegDef == Pred.getSUnit()) &&
567	"interference on register dependence");
568	LiveRegDefs [Pred.getReg()] = Pred.getSUnit();
569	if (!LiveRegGens [Pred.getReg()]) {
570	++NumLiveRegs;
571	LiveRegGens [Pred.getReg()] = SU;
572	}
573	}
574	}
575
576	// If we're scheduling a lowered CALLSEQ_END, find the corresponding
577	// CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578	// these nodes, to prevent other calls from being interscheduled with them.
579	unsigned CallResource = TRI->getNumRegs();
580	if (!LiveRegDefs [CallResource])
581	for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582	if (Node->isMachineOpcode() &&
583	Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584	unsigned NestLevel = `0`;
585	unsigned MaxNest = `0`;
586	SDNode *N = FindCallSeqStart(N: Node, NestLevel, MaxNest, TII);
587	assert(N && "Must find call sequence start");
588
589	SUnit *Def = &SUnits [N->getNodeId()];
590	CallSeqEndForStart [Def] = SU;
591
592	++NumLiveRegs;
593	LiveRegDefs [CallResource] = Def;
594	LiveRegGens [CallResource] = SU;
595	break;
596	}
597	}
598
599	/// Check to see if any of the pending instructions are ready to issue. If
600	/// so, add them to the available queue.
601	void ScheduleDAGRRList::ReleasePending() {
602	if (DisableSchedCycles) {
603	assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604	return;
605	}
606
607	// If the available queue is empty, it is safe to reset MinAvailableCycle.
608	if (AvailableQueue->empty())
609	MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611	// Check to see if any of the pending instructions are ready to issue. If
612	// so, add them to the available queue.
613	for (unsigned i = `0`, e = PendingQueue.size(); i != e; ++i) {
614	unsigned ReadyCycle = PendingQueue [i]->getHeight();
615	if (ReadyCycle < MinAvailableCycle)
616	MinAvailableCycle = ReadyCycle;
617
618	if (PendingQueue [i]->isAvailable) {
619	if (!isReady(SU: PendingQueue [i]))
620	continue;
621	AvailableQueue->push(U: PendingQueue [i]);
622	}
623	PendingQueue [i]->isPending = false;
624	PendingQueue [i] = PendingQueue.back();
625	PendingQueue.pop_back();
626	--i; --e;
627	}
628	}
629
630	/// Move the scheduler state forward by the specified number of Cycles.
631	void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632	if (NextCycle <= CurCycle)
633	return;
634
635	IssueCount = `0`;
636	AvailableQueue->setCurCycle(NextCycle);
637	if (!HazardRec->isEnabled()) {
638	// Bypass lots of virtual calls in case of long latency.
639	CurCycle = NextCycle;
640	}
641	else {
642	for (; CurCycle != NextCycle; ++CurCycle) {
643	HazardRec->RecedeCycle();
644	}
645	}
646	// FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647	// available Q to release pending nodes at least once before popping.
648	ReleasePending();
649	}
650
651	/// Move the scheduler state forward until the specified node's dependents are
652	/// ready and can be scheduled with no resource conflicts.
653	void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
654	if (DisableSchedCycles)
655	return;
656
657	// FIXME: Nodes such as CopyFromReg probably should not advance the current
658	// cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659	// has predecessors the cycle will be advanced when they are scheduled.
660	// But given the crude nature of modeling latency though such nodes, we
661	// currently need to treat these nodes like real instructions.
662	// if (!SU->getNode() \|\| !SU->getNode()->isMachineOpcode()) return;
663
664	unsigned ReadyCycle = SU->getHeight();
665
666	// Bump CurCycle to account for latency. We assume the latency of other
667	// available instructions may be hidden by the stall (not a full pipe stall).
668	// This updates the hazard recognizer's cycle before reserving resources for
669	// this instruction.
670	AdvanceToCycle(NextCycle: ReadyCycle);
671
672	// Calls are scheduled in their preceding cycle, so don't conflict with
673	// hazards from instructions after the call. EmitNode will reset the
674	// scoreboard state before emitting the call.
675	if (SU->isCall)
676	return;
677
678	// FIXME: For resource conflicts in very long non-pipelined stages, we
679	// should probably skip ahead here to avoid useless scoreboard checks.
680	int Stalls = `0`;
681	while (true) {
682	ScheduleHazardRecognizer::HazardType HT =
683	HazardRec->getHazardType(SU, Stalls: -Stalls);
684
685	if (HT == ScheduleHazardRecognizer::NoHazard)
686	break;
687
688	++Stalls;
689	}
690	AdvanceToCycle(NextCycle: CurCycle + Stalls);
691	}
692
693	/// Record this SUnit in the HazardRecognizer.
694	/// Does not update CurCycle.
695	void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696	if (!HazardRec->isEnabled())
697	return;
698
699	// Check for phys reg copy.
700	if (!SU->getNode())
701	return;
702
703	switch (SU->getNode()->getOpcode()) {
704	default:
705	assert(SU->getNode()->isMachineOpcode() &&
706	"This target-independent node should not be scheduled.");
707	break;
708	case ISD::MERGE_VALUES:
709	case ISD::TokenFactor:
710	case ISD::LIFETIME_START:
711	case ISD::LIFETIME_END:
712	case ISD::CopyToReg:
713	case ISD::CopyFromReg:
714	case ISD::EH_LABEL:
715	// Noops don't affect the scoreboard state. Copies are likely to be
716	// removed.
717	return;
718	case ISD::INLINEASM:
719	case ISD::INLINEASM_BR:
720	// For inline asm, clear the pipeline state.
721	HazardRec->Reset();
722	return;
723	}
724	if (SU->isCall) {
725	// Calls are scheduled with their preceding instructions. For bottom-up
726	// scheduling, clear the pipeline state before emitting.
727	HazardRec->Reset();
728	}
729
730	HazardRec->EmitInstruction(SU);
731	}
732
733	static void resetVRegCycle(SUnit *SU);
734
735	/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
736	/// count of its predecessors. If a predecessor pending count is zero, add it to
737	/// the Available queue.
738	void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
739	LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
740	LLVM_DEBUG(dumpNode(*SU));
741
742	#ifndef NDEBUG
743	if (CurCycle < SU->getHeight())
744	LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
745	<< "] pipeline stall!\n");
746	#endif
747
748	// FIXME: Do not modify node height. It may interfere with
749	// backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
750	// node its ready cycle can aid heuristics, and after scheduling it can
751	// indicate the scheduled cycle.
752	SU->setHeightToAtLeast(CurCycle);
753
754	// Reserve resources for the scheduled instruction.
755	EmitNode(SU);
756
757	Sequence.push_back(x: SU);
758
759	AvailableQueue->scheduledNode(SU);
760
761	// If HazardRec is disabled, and each inst counts as one cycle, then
762	// advance CurCycle before ReleasePredecessors to avoid useless pushes to
763	// PendingQueue for schedulers that implement HasReadyFilter.
764	if (!HazardRec->isEnabled() && AvgIPC < `2`)
765	AdvanceToCycle(NextCycle: CurCycle + `1`);
766
767	// Update liveness of predecessors before successors to avoid treating a
768	// two-address node as a live range def.
769	ReleasePredecessors(SU);
770
771	// Release all the implicit physical register defs that are live.
772	for (SDep &Succ : SU->Succs) {
773	// LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
774	if (Succ.isAssignedRegDep() && LiveRegDefs [Succ.getReg()] == SU) {
775	assert(NumLiveRegs > `0` && "NumLiveRegs is already zero!");
776	--NumLiveRegs;
777	LiveRegDefs [Succ.getReg()] = nullptr;
778	LiveRegGens [Succ.getReg()] = nullptr;
779	releaseInterferences(Reg: Succ.getReg());
780	}
781	}
782	// Release the special call resource dependence, if this is the beginning
783	// of a call.
784	unsigned CallResource = TRI->getNumRegs();
785	if (LiveRegDefs [CallResource] == SU)
786	for (const SDNode *SUNode = SU->getNode(); SUNode;
787	SUNode = SUNode->getGluedNode()) {
788	if (SUNode->isMachineOpcode() &&
789	SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
790	assert(NumLiveRegs > `0` && "NumLiveRegs is already zero!");
791	--NumLiveRegs;
792	LiveRegDefs [CallResource] = nullptr;
793	LiveRegGens [CallResource] = nullptr;
794	releaseInterferences(Reg: CallResource);
795	}
796	}
797
798	resetVRegCycle(SU);
799
800	SU->isScheduled = true;
801
802	// Conditions under which the scheduler should eagerly advance the cycle:
803	// (1) No available instructions
804	// (2) All pipelines full, so available instructions must have hazards.
805	//
806	// If HazardRec is disabled, the cycle was pre-advanced before calling
807	// ReleasePredecessors. In that case, IssueCount should remain 0.
808	//
809	// Check AvailableQueue after ReleasePredecessors in case of zero latency.
810	if (HazardRec->isEnabled() \|\| AvgIPC > `1`) {
811	if (SU->getNode() && SU->getNode()->isMachineOpcode())
812	++IssueCount;
813	if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
814	\|\| (!HazardRec->isEnabled() && IssueCount == AvgIPC))
815	AdvanceToCycle(NextCycle: CurCycle + `1`);
816	}
817	}
818
819	/// CapturePred - This does the opposite of ReleasePred. Since SU is being
820	/// unscheduled, increase the succ left count of its predecessors. Remove
821	/// them from AvailableQueue if necessary.
822	void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
823	SUnit *PredSU = PredEdge->getSUnit();
824	if (PredSU->isAvailable) {
825	PredSU->isAvailable = false;
826	if (!PredSU->isPending)
827	AvailableQueue->remove(SU: PredSU);
828	}
829
830	assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
831	"NumSuccsLeft will overflow!");
832	++PredSU->NumSuccsLeft;
833	}
834
835	/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
836	/// its predecessor states to reflect the change.
837	void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
838	LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
839	LLVM_DEBUG(dumpNode(*SU));
840
841	for (SDep &Pred : SU->Preds) {
842	CapturePred(PredEdge: &Pred);
843	if (Pred.isAssignedRegDep() && SU == LiveRegGens [Pred.getReg()]){
844	assert(NumLiveRegs > `0` && "NumLiveRegs is already zero!");
845	assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
846	"Physical register dependency violated?");
847	--NumLiveRegs;
848	LiveRegDefs [Pred.getReg()] = nullptr;
849	LiveRegGens [Pred.getReg()] = nullptr;
850	releaseInterferences(Reg: Pred.getReg());
851	}
852	}
853
854	// Reclaim the special call resource dependence, if this is the beginning
855	// of a call.
856	unsigned CallResource = TRI->getNumRegs();
857	for (const SDNode *SUNode = SU->getNode(); SUNode;
858	SUNode = SUNode->getGluedNode()) {
859	if (SUNode->isMachineOpcode() &&
860	SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
861	SUnit *SeqEnd = CallSeqEndForStart [SU];
862	assert(SeqEnd && "Call sequence start/end must be known");
863	assert(!LiveRegDefs[CallResource]);
864	assert(!LiveRegGens[CallResource]);
865	++NumLiveRegs;
866	LiveRegDefs [CallResource] = SU;
867	LiveRegGens [CallResource] = SeqEnd;
868	}
869	}
870
871	// Release the special call resource dependence, if this is the end
872	// of a call.
873	if (LiveRegGens [CallResource] == SU)
874	for (const SDNode *SUNode = SU->getNode(); SUNode;
875	SUNode = SUNode->getGluedNode()) {
876	if (SUNode->isMachineOpcode() &&
877	SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
878	assert(NumLiveRegs > `0` && "NumLiveRegs is already zero!");
879	assert(LiveRegDefs[CallResource]);
880	assert(LiveRegGens[CallResource]);
881	--NumLiveRegs;
882	LiveRegDefs [CallResource] = nullptr;
883	LiveRegGens [CallResource] = nullptr;
884	releaseInterferences(Reg: CallResource);
885	}
886	}
887
888	for (auto &Succ : SU->Succs) {
889	if (Succ.isAssignedRegDep()) {
890	auto Reg = Succ.getReg();
891	if (!LiveRegDefs [Reg])
892	++NumLiveRegs;
893	// This becomes the nearest def. Note that an earlier def may still be
894	// pending if this is a two-address node.
895	LiveRegDefs [Reg] = SU;
896
897	// Update LiveRegGen only if was empty before this unscheduling.
898	// This is to avoid incorrect updating LiveRegGen set in previous run.
899	if (!LiveRegGens [Reg]) {
900	// Find the successor with the lowest height.
901	LiveRegGens [Reg] = Succ.getSUnit();
902	for (auto &Succ2 : SU->Succs) {
903	if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
904	Succ2.getSUnit()->getHeight() < LiveRegGens [Reg]->getHeight())
905	LiveRegGens [Reg] = Succ2.getSUnit();
906	}
907	}
908	}
909	}
910	if (SU->getHeight() < MinAvailableCycle)
911	MinAvailableCycle = SU->getHeight();
912
913	SU->setHeightDirty();
914	SU->isScheduled = false;
915	SU->isAvailable = true;
916	if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
917	// Don't make available until backtracking is complete.
918	SU->isPending = true;
919	PendingQueue.push_back(x: SU);
920	}
921	else {
922	AvailableQueue->push(U: SU);
923	}
924	AvailableQueue->unscheduledNode(SU);
925	}
926
927	/// After backtracking, the hazard checker needs to be restored to a state
928	/// corresponding the current cycle.
929	void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
930	HazardRec->Reset();
931
932	unsigned LookAhead = std::min(a: (unsigned)Sequence.size(),
933	b: HazardRec->getMaxLookAhead());
934	if (LookAhead == `0`)
935	return;
936
937	std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
938	unsigned HazardCycle = (*I)->getHeight();
939	for (auto E = Sequence.end(); I != E; ++I) {
940	SUnit SU = I;
941	for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
942	HazardRec->RecedeCycle();
943	}
944	EmitNode(SU);
945	}
946	}
947
948	/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
949	/// BTCycle in order to schedule a specific node.
950	void ScheduleDAGRRList::BacktrackBottomUp(SUnit SU, SUnit BtSU) {
951	SUnit *OldSU = Sequence.back();
952	while (true) {
953	Sequence.pop_back();
954	// FIXME: use ready cycle instead of height
955	CurCycle = OldSU->getHeight();
956	UnscheduleNodeBottomUp(SU: OldSU);
957	AvailableQueue->setCurCycle(CurCycle);
958	if (OldSU == BtSU)
959	break;
960	OldSU = Sequence.back();
961	}
962
963	assert(!SU->isSucc(OldSU) && "Something is wrong!");
964
965	RestoreHazardCheckerBottomUp();
966
967	ReleasePending();
968
969	++NumBacktracks;
970	}
971
972	static bool isOperandOf(const SUnit SU, SDNode N) {
973	for (const SDNode *SUNode = SU->getNode(); SUNode;
974	SUNode = SUNode->getGluedNode()) {
975	if (SUNode->isOperandOf(N))
976	return true;
977	}
978	return false;
979	}
980
981	/// TryUnfold - Attempt to unfold
982	SUnit ScheduleDAGRRList::TryUnfoldSU(SUnit SU) {
983	SDNode *N = SU->getNode();
984	// Use while over if to ease fall through.
985	SmallVector<SDNode *, `2`> NewNodes;
986	if (!TII->unfoldMemoryOperand(DAG&: *DAG, N, NewNodes))
987	return nullptr;
988
989	assert(NewNodes.size() == `2` && "Expected a load folding node!");
990
991	N = NewNodes [`1`];
992	SDNode *LoadNode = NewNodes [`0`];
993	unsigned NumVals = N->getNumValues();
994	unsigned OldNumVals = SU->getNode()->getNumValues();
995
996	// LoadNode may already exist. This can happen when there is another
997	// load from the same location and producing the same type of value
998	// but it has different alignment or volatileness.
999	bool isNewLoad = true;
1000	SUnit *LoadSU;
1001	if (LoadNode->getNodeId() != -`1`) {
1002	LoadSU = &SUnits [LoadNode->getNodeId()];
1003	// If LoadSU has already been scheduled, we should clone it but
1004	// this would negate the benefit to unfolding so just return SU.
1005	if (LoadSU->isScheduled)
1006	return SU;
1007	isNewLoad = false;
1008	} else {
1009	LoadSU = CreateNewSUnit(N: LoadNode);
1010	LoadNode->setNodeId(LoadSU->NodeNum);
1011
1012	InitNumRegDefsLeft(SU: LoadSU);
1013	computeLatency(SU: LoadSU);
1014	}
1015
1016	bool isNewN = true;
1017	SUnit *NewSU;
1018	// This can only happen when isNewLoad is false.
1019	if (N->getNodeId() != -`1`) {
1020	NewSU = &SUnits [N->getNodeId()];
1021	// If NewSU has already been scheduled, we need to clone it, but this
1022	// negates the benefit to unfolding so just return SU.
1023	if (NewSU->isScheduled) {
1024	return SU;
1025	}
1026	isNewN = false;
1027	} else {
1028	NewSU = CreateNewSUnit(N);
1029	N->setNodeId(NewSU->NodeNum);
1030
1031	const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode());
1032	for (unsigned i = `0`; i != MCID.getNumOperands(); ++i) {
1033	if (MCID.getOperandConstraint(OpNum: i, Constraint: MCOI::TIED_TO) != -`1`) {
1034	NewSU->isTwoAddress = true;
1035	break;
1036	}
1037	}
1038	if (MCID.isCommutable())
1039	NewSU->isCommutable = true;
1040
1041	InitNumRegDefsLeft(SU: NewSU);
1042	computeLatency(SU: NewSU);
1043	}
1044
1045	LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1046
1047	// Now that we are committed to unfolding replace DAG Uses.
1048	for (unsigned i = `0`; i != NumVals; ++i)
1049	DAG->ReplaceAllUsesOfValueWith(From: SDValue (SU->getNode(), i), To: SDValue (N, i));
1050	DAG->ReplaceAllUsesOfValueWith(From: SDValue (SU->getNode(), OldNumVals - `1`),
1051	To: SDValue (LoadNode, `1`));
1052
1053	// Record all the edges to and from the old SU, by category.
1054	SmallVector<SDep, `4`> ChainPreds;
1055	SmallVector<SDep, `4`> ChainSuccs;
1056	SmallVector<SDep, `4`> LoadPreds;
1057	SmallVector<SDep, `4`> NodePreds;
1058	SmallVector<SDep, `4`> NodeSuccs;
1059	for (SDep &Pred : SU->Preds) {
1060	if (Pred.isCtrl())
1061	ChainPreds.push_back(Elt: Pred);
1062	else if (isOperandOf(SU: Pred.getSUnit(), N: LoadNode))
1063	LoadPreds.push_back(Elt: Pred);
1064	else
1065	NodePreds.push_back(Elt: Pred);
1066	}
1067	for (SDep &Succ : SU->Succs) {
1068	if (Succ.isCtrl())
1069	ChainSuccs.push_back(Elt: Succ);
1070	else
1071	NodeSuccs.push_back(Elt: Succ);
1072	}
1073
1074	// Now assign edges to the newly-created nodes.
1075	for (const SDep &Pred : ChainPreds) {
1076	RemovePred(SU, D: Pred);
1077	if (isNewLoad)
1078	AddPredQueued(SU: LoadSU, D: Pred);
1079	}
1080	for (const SDep &Pred : LoadPreds) {
1081	RemovePred(SU, D: Pred);
1082	if (isNewLoad)
1083	AddPredQueued(SU: LoadSU, D: Pred);
1084	}
1085	for (const SDep &Pred : NodePreds) {
1086	RemovePred(SU, D: Pred);
1087	AddPredQueued(SU: NewSU, D: Pred);
1088	}
1089	for (SDep &D : NodeSuccs) {
1090	SUnit *SuccDep = D.getSUnit();
1091	D.setSUnit(SU);
1092	RemovePred(SU: SuccDep, D);
1093	D.setSUnit(NewSU);
1094	AddPredQueued(SU: SuccDep, D);
1095	// Balance register pressure.
1096	if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1097	!D.isCtrl() && NewSU->NumRegDefsLeft > `0`)
1098	--NewSU->NumRegDefsLeft;
1099	}
1100	for (SDep &D : ChainSuccs) {
1101	SUnit *SuccDep = D.getSUnit();
1102	D.setSUnit(SU);
1103	RemovePred(SU: SuccDep, D);
1104	if (isNewLoad) {
1105	D.setSUnit(LoadSU);
1106	AddPredQueued(SU: SuccDep, D);
1107	}
1108	}
1109
1110	// Add a data dependency to reflect that NewSU reads the value defined
1111	// by LoadSU.
1112	SDep D(LoadSU, SDep::Data, `0`);
1113	D.setLatency(LoadSU->Latency);
1114	AddPredQueued(SU: NewSU, D);
1115
1116	if (isNewLoad)
1117	AvailableQueue->addNode(SU: LoadSU);
1118	if (isNewN)
1119	AvailableQueue->addNode(SU: NewSU);
1120
1121	++NumUnfolds;
1122
1123	if (NewSU->NumSuccsLeft == `0`)
1124	NewSU->isAvailable = true;
1125
1126	return NewSU;
1127	}
1128
1129	/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1130	/// successors to the newly created node.
1131	SUnit ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit SU) {
1132	SDNode *N = SU->getNode();
1133	if (!N)
1134	return nullptr;
1135
1136	LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1137	LLVM_DEBUG(dumpNode(*SU));
1138
1139	if (N->getGluedNode() &&
1140	!TII->canCopyGluedNodeDuringSchedule(N)) {
1141	LLVM_DEBUG(
1142	dbgs()
1143	<< "Giving up because it has incoming glue and the target does not "
1144	"want to copy it\n");
1145	return nullptr;
1146	}
1147
1148	SUnit *NewSU;
1149	bool TryUnfold = false;
1150	for (unsigned i = `0`, e = N->getNumValues(); i != e; ++i) {
1151	MVT VT = N->getSimpleValueType(ResNo: i);
1152	if (VT == MVT::Glue) {
1153	LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1154	return nullptr;
1155	} else if (VT == MVT::Other)
1156	TryUnfold = true;
1157	}
1158	for (const SDValue &Op : N->op_values()) {
1159	MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo());
1160	if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1161	LLVM_DEBUG(
1162	dbgs() << "Giving up because it one of the operands is glue and "
1163	"the target does not want to copy it\n");
1164	return nullptr;
1165	}
1166	}
1167
1168	// If possible unfold instruction.
1169	if (TryUnfold) {
1170	SUnit *UnfoldSU = TryUnfoldSU(SU);
1171	if (!UnfoldSU)
1172	return nullptr;
1173	SU = UnfoldSU;
1174	N = SU->getNode();
1175	// If this can be scheduled don't bother duplicating and just return
1176	if (SU->NumSuccsLeft == `0`)
1177	return SU;
1178	}
1179
1180	LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1181	NewSU = CreateClone(N: SU);
1182
1183	// New SUnit has the exact same predecessors.
1184	for (SDep &Pred : SU->Preds)
1185	if (!Pred.isArtificial())
1186	AddPredQueued(SU: NewSU, D: Pred);
1187
1188	// Make sure the clone comes after the original. (InstrEmitter assumes
1189	// this ordering.)
1190	AddPredQueued(SU: NewSU, D: SDep (SU, SDep::Artificial));
1191
1192	// Only copy scheduled successors. Cut them from old node's successor
1193	// list and move them over.
1194	SmallVector<std::pair<SUnit *, SDep>, `4`> DelDeps;
1195	for (SDep &Succ : SU->Succs) {
1196	if (Succ.isArtificial())
1197	continue;
1198	SUnit *SuccSU = Succ.getSUnit();
1199	if (SuccSU->isScheduled) {
1200	SDep D = Succ;
1201	D.setSUnit(NewSU);
1202	AddPredQueued(SU: SuccSU, D);
1203	D.setSUnit(SU);
1204	DelDeps.emplace_back(Args&: SuccSU, Args&: D);
1205	}
1206	}
1207	for (const auto &[DelSU, DelD] : DelDeps)
1208	RemovePred(SU: DelSU, D: DelD);
1209
1210	AvailableQueue->updateNode(SU);
1211	AvailableQueue->addNode(SU: NewSU);
1212
1213	++NumDups;
1214	return NewSU;
1215	}
1216
1217	/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1218	/// scheduled successors of the given SUnit to the last copy.
1219	void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit SU, unsigned* Reg,
1220	const TargetRegisterClass *DestRC,
1221	const TargetRegisterClass *SrcRC,
1222	SmallVectorImpl<SUnit*> &Copies) {
1223	SUnit CopyFromSU = CreateNewSUnit(N: nullptr*);
1224	CopyFromSU->CopySrcRC = SrcRC;
1225	CopyFromSU->CopyDstRC = DestRC;
1226
1227	SUnit CopyToSU = CreateNewSUnit(N: nullptr*);
1228	CopyToSU->CopySrcRC = DestRC;
1229	CopyToSU->CopyDstRC = SrcRC;
1230
1231	// Only copy scheduled successors. Cut them from old node's successor
1232	// list and move them over.
1233	SmallVector<std::pair<SUnit *, SDep>, `4`> DelDeps;
1234	for (SDep &Succ : SU->Succs) {
1235	if (Succ.isArtificial())
1236	continue;
1237	SUnit *SuccSU = Succ.getSUnit();
1238	if (SuccSU->isScheduled) {
1239	SDep D = Succ;
1240	D.setSUnit(CopyToSU);
1241	AddPredQueued(SU: SuccSU, D);
1242	DelDeps.emplace_back(Args&: SuccSU, Args&: Succ);
1243	}
1244	else {
1245	// Avoid scheduling the def-side copy before other successors. Otherwise,
1246	// we could introduce another physreg interference on the copy and
1247	// continue inserting copies indefinitely.
1248	AddPredQueued(SU: SuccSU, D: SDep (CopyFromSU, SDep::Artificial));
1249	}
1250	}
1251	for (const auto &[DelSU, DelD] : DelDeps)
1252	RemovePred(SU: DelSU, D: DelD);
1253
1254	SDep FromDep(SU, SDep::Data, Reg);
1255	FromDep.setLatency(SU->Latency);
1256	AddPredQueued(SU: CopyFromSU, D: FromDep);
1257	SDep ToDep(CopyFromSU, SDep::Data, `0`);
1258	ToDep.setLatency(CopyFromSU->Latency);
1259	AddPredQueued(SU: CopyToSU, D: ToDep);
1260
1261	AvailableQueue->updateNode(SU);
1262	AvailableQueue->addNode(SU: CopyFromSU);
1263	AvailableQueue->addNode(SU: CopyToSU);
1264	Copies.push_back(Elt: CopyFromSU);
1265	Copies.push_back(Elt: CopyToSU);
1266
1267	++NumPRCopies;
1268	}
1269
1270	/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1271	/// definition of the specified node.
1272	/// FIXME: Move to SelectionDAG?
1273	static MVT getPhysicalRegisterVT(SDNode N, unsigned* Reg,
1274	const TargetInstrInfo *TII) {
1275	unsigned NumRes;
1276	if (N->getOpcode() == ISD::CopyFromReg) {
1277	// CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1278	NumRes = `1`;
1279	} else {
1280	const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode());
1281	assert(!MCID.implicit_defs().empty() &&
1282	"Physical reg def must be in implicit def list!");
1283	NumRes = MCID.getNumDefs();
1284	for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1285	if (Reg == ImpDef)
1286	break;
1287	++NumRes;
1288	}
1289	}
1290	return N->getSimpleValueType(ResNo: NumRes);
1291	}
1292
1293	/// CheckForLiveRegDef - Return true and update live register vector if the
1294	/// specified register def of the specified SUnit clobbers any "live" registers.
1295	static void CheckForLiveRegDef(SUnit SU, unsigned* Reg, SUnit **LiveRegDefs,
1296	SmallSet<unsigned, `4`> &RegAdded,
1297	SmallVectorImpl<unsigned> &LRegs,
1298	const TargetRegisterInfo *TRI,
1299	const SDNode Node = nullptr*) {
1300	for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1301
1302	// Check if Ref is live.
1303	if (!LiveRegDefs[AliasI]) continue*;
1304
1305	// Allow multiple uses of the same def.
1306	if (LiveRegDefs[AliasI] == SU) continue*;
1307
1308	// Allow multiple uses of same def
1309	if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1310	continue;
1311
1312	// Add Reg to the set of interfering live regs.
1313	if (RegAdded.insert(V: *AliasI).second) {
1314	LRegs.push_back(Elt: *AliasI);
1315	}
1316	}
1317	}
1318
1319	/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1320	/// by RegMask, and add them to LRegs.
1321	static void CheckForLiveRegDefMasked(SUnit SU, const* uint32_t *RegMask,
1322	ArrayRef<SUnit*> LiveRegDefs,
1323	SmallSet<unsigned, `4`> &RegAdded,
1324	SmallVectorImpl<unsigned> &LRegs) {
1325	// Look at all live registers. Skip Reg0 and the special CallResource.
1326	for (unsigned i = `1`, e = LiveRegDefs.size()-`1`; i != e; ++i) {
1327	if (!LiveRegDefs [i]) continue;
1328	if (LiveRegDefs [i] == SU) continue;
1329	if (!MachineOperand::clobbersPhysReg(RegMask, PhysReg: i)) continue;
1330	if (RegAdded.insert(V: i).second)
1331	LRegs.push_back(Elt: i);
1332	}
1333	}
1334
1335	/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1336	static const uint32_t getNodeRegMask(const* SDNode *N) {
1337	for (const SDValue &Op : N->op_values())
1338	if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Val: Op.getNode()))
1339	return RegOp->getRegMask();
1340	return nullptr;
1341	}
1342
1343	/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1344	/// scheduling of the given node to satisfy live physical register dependencies.
1345	/// If the specific node is the last one that's available to schedule, do
1346	/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1347	bool ScheduleDAGRRList::
1348	DelayForLiveRegsBottomUp(SUnit SU, SmallVectorImpl<unsigned*> &LRegs) {
1349	if (NumLiveRegs == `0`)
1350	return false;
1351
1352	SmallSet<unsigned, `4`> RegAdded;
1353	// If this node would clobber any "live" register, then it's not ready.
1354	//
1355	// If SU is the currently live definition of the same register that it uses,
1356	// then we are free to schedule it.
1357	for (SDep &Pred : SU->Preds) {
1358	if (Pred.isAssignedRegDep() && LiveRegDefs [Pred.getReg()] != SU)
1359	CheckForLiveRegDef(SU: Pred.getSUnit(), Reg: Pred.getReg(), LiveRegDefs: LiveRegDefs.get(),
1360	RegAdded, LRegs, TRI);
1361	}
1362
1363	for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1364	if (Node->getOpcode() == ISD::INLINEASM \|\|
1365	Node->getOpcode() == ISD::INLINEASM_BR) {
1366	// Inline asm can clobber physical defs.
1367	unsigned NumOps = Node->getNumOperands();
1368	if (Node->getOperand(Num: NumOps-`1`).getValueType() == MVT::Glue)
1369	--NumOps; // Ignore the glue operand.
1370
1371	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1372	unsigned Flags = Node->getConstantOperandVal(Num: i);
1373	const InlineAsm::Flag F(Flags);
1374	unsigned NumVals = F.getNumOperandRegisters();
1375
1376	++i; // Skip the ID value.
1377	if (F.isRegDefKind() \|\| F.isRegDefEarlyClobberKind() \|\|
1378	F.isClobberKind()) {
1379	// Check for def of register or earlyclobber register.
1380	for (; NumVals; --NumVals, ++i) {
1381	Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: i))->getReg();
1382	if (Reg.isPhysical())
1383	CheckForLiveRegDef(SU, Reg, LiveRegDefs: LiveRegDefs.get(), RegAdded, LRegs, TRI);
1384	}
1385	} else
1386	i += NumVals;
1387	}
1388	continue;
1389	}
1390
1391	if (Node->getOpcode() == ISD::CopyToReg) {
1392	Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: `1`))->getReg();
1393	if (Reg.isPhysical()) {
1394	SDNode *SrcNode = Node->getOperand(Num: `2`).getNode();
1395	CheckForLiveRegDef(SU, Reg, LiveRegDefs: LiveRegDefs.get(), RegAdded, LRegs, TRI,
1396	Node: SrcNode);
1397	}
1398	}
1399
1400	if (!Node->isMachineOpcode())
1401	continue;
1402	// If we're in the middle of scheduling a call, don't begin scheduling
1403	// another call. Also, don't allow any physical registers to be live across
1404	// the call.
1405	if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1406	// Check the special calling-sequence resource.
1407	unsigned CallResource = TRI->getNumRegs();
1408	if (LiveRegDefs [CallResource]) {
1409	SDNode *Gen = LiveRegGens [CallResource]->getNode();
1410	while (SDNode *Glued = Gen->getGluedNode())
1411	Gen = Glued;
1412	if (!IsChainDependent(Outer: Gen, Inner: Node, NestLevel: `0`, TII) &&
1413	RegAdded.insert(V: CallResource).second)
1414	LRegs.push_back(Elt: CallResource);
1415	}
1416	}
1417	if (const uint32_t *RegMask = getNodeRegMask(N: Node))
1418	CheckForLiveRegDefMasked(SU, RegMask,
1419	LiveRegDefs: ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1420	RegAdded, LRegs);
1421
1422	const MCInstrDesc &MCID = TII->get(Opcode: Node->getMachineOpcode());
1423	if (MCID.hasOptionalDef()) {
1424	// Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1425	// This operand can be either a def of CPSR, if the S bit is set; or a use
1426	// of %noreg. When the OptionalDef is set to a valid register, we need to
1427	// handle it in the same way as an ImplicitDef.
1428	for (unsigned i = `0`; i < MCID.getNumDefs(); ++i)
1429	if (MCID.operands()[i].isOptionalDef()) {
1430	const SDValue &OptionalDef = Node->getOperand(Num: i - Node->getNumValues());
1431	Register Reg = cast<RegisterSDNode>(Val: OptionalDef)->getReg();
1432	CheckForLiveRegDef(SU, Reg, LiveRegDefs: LiveRegDefs.get(), RegAdded, LRegs, TRI);
1433	}
1434	}
1435	for (MCPhysReg Reg : MCID.implicit_defs())
1436	CheckForLiveRegDef(SU, Reg, LiveRegDefs: LiveRegDefs.get(), RegAdded, LRegs, TRI);
1437	}
1438
1439	return !LRegs.empty();
1440	}
1441
1442	void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1443	// Add the nodes that aren't ready back onto the available list.
1444	for (unsigned i = Interferences.size(); i > `0`; --i) {
1445	SUnit *SU = Interferences [i-`1`];
1446	LRegsMapT::iterator LRegsPos = LRegsMap.find(Val: SU);
1447	if (Reg) {
1448	SmallVectorImpl<unsigned> &LRegs = LRegsPos ->second;
1449	if (!is_contained(Range&: LRegs, Element: Reg))
1450	continue;
1451	}
1452	SU->isPending = false;
1453	// The interfering node may no longer be available due to backtracking.
1454	// Furthermore, it may have been made available again, in which case it is
1455	// now already in the AvailableQueue.
1456	if (SU->isAvailable && !SU->NodeQueueId) {
1457	LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << `'\n'`);
1458	AvailableQueue->push(U: SU);
1459	}
1460	if (i < Interferences.size())
1461	Interferences [i-`1`] = Interferences.back();
1462	Interferences.pop_back();
1463	LRegsMap.erase(I: LRegsPos);
1464	}
1465	}
1466
1467	/// Return a node that can be scheduled in this cycle. Requirements:
1468	/// (1) Ready: latency has been satisfied
1469	/// (2) No Hazards: resources are available
1470	/// (3) No Interferences: may unschedule to break register interferences.
1471	SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1472	SUnit CurSU = AvailableQueue->empty() ? nullptr* : AvailableQueue->pop();
1473	auto FindAvailableNode = [&]() {
1474	while (CurSU) {
1475	SmallVector<unsigned, `4`> LRegs;
1476	if (!DelayForLiveRegsBottomUp(SU: CurSU, LRegs))
1477	break;
1478	LLVM_DEBUG(dbgs() << " Interfering reg ";
1479	if (LRegs[`0`] == TRI->getNumRegs()) dbgs() << "CallResource";
1480	else dbgs() << printReg(LRegs[`0`], TRI);
1481	dbgs() << " SU #" << CurSU->NodeNum << `'\n'`);
1482	auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(Key: CurSU, Args&: LRegs);
1483	if (LRegsInserted) {
1484	CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1485	Interferences.push_back(Elt: CurSU);
1486	}
1487	else {
1488	assert(CurSU->isPending && "Interferences are pending");
1489	// Update the interference with current live regs.
1490	LRegsIter ->second = LRegs;
1491	}
1492	CurSU = AvailableQueue->pop();
1493	}
1494	};
1495	FindAvailableNode ();
1496	if (CurSU)
1497	return CurSU;
1498
1499	// We query the topological order in the loop body, so make sure outstanding
1500	// updates are applied before entering it (we only enter the loop if there
1501	// are some interferences). If we make changes to the ordering, we exit
1502	// the loop.
1503
1504	// All candidates are delayed due to live physical reg dependencies.
1505	// Try backtracking, code duplication, or inserting cross class copies
1506	// to resolve it.
1507	for (SUnit *TrySU : Interferences) {
1508	SmallVectorImpl<unsigned> &LRegs = LRegsMap [TrySU];
1509
1510	// Try unscheduling up to the point where it's safe to schedule
1511	// this node.
1512	SUnit BtSU = nullptr*;
1513	unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1514	for (unsigned Reg : LRegs) {
1515	if (LiveRegGens [Reg]->getHeight() < LiveCycle) {
1516	BtSU = LiveRegGens [Reg];
1517	LiveCycle = BtSU->getHeight();
1518	}
1519	}
1520	if (!WillCreateCycle(SU: TrySU, TargetSU: BtSU)) {
1521	// BacktrackBottomUp mutates Interferences!
1522	BacktrackBottomUp(SU: TrySU, BtSU);
1523
1524	// Force the current node to be scheduled before the node that
1525	// requires the physical reg dep.
1526	if (BtSU->isAvailable) {
1527	BtSU->isAvailable = false;
1528	if (!BtSU->isPending)
1529	AvailableQueue->remove(SU: BtSU);
1530	}
1531	LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1532	<< ") to SU(" << TrySU->NodeNum << ")\n");
1533	AddPredQueued(SU: TrySU, D: SDep (BtSU, SDep::Artificial));
1534
1535	// If one or more successors has been unscheduled, then the current
1536	// node is no longer available.
1537	if (!TrySU->isAvailable \|\| !TrySU->NodeQueueId) {
1538	LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1539	CurSU = AvailableQueue->pop();
1540	} else {
1541	LLVM_DEBUG(dbgs() << "TrySU available\n");
1542	// Available and in AvailableQueue
1543	AvailableQueue->remove(SU: TrySU);
1544	CurSU = TrySU;
1545	}
1546	FindAvailableNode ();
1547	// Interferences has been mutated. We must break.
1548	break;
1549	}
1550	}
1551
1552	if (!CurSU) {
1553	// Can't backtrack. If it's too expensive to copy the value, then try
1554	// duplicate the nodes that produces these "too expensive to copy"
1555	// values to break the dependency. In case even that doesn't work,
1556	// insert cross class copies.
1557	// If it's not too expensive, i.e. cost != -1, issue copies.
1558	SUnit *TrySU = Interferences [`0`];
1559	SmallVectorImpl<unsigned> &LRegs = LRegsMap [TrySU];
1560	assert(LRegs.size() == `1` && "Can't handle this yet!");
1561	unsigned Reg = LRegs [`0`];
1562	SUnit *LRDef = LiveRegDefs [Reg];
1563	MVT VT = getPhysicalRegisterVT(N: LRDef->getNode(), Reg, TII);
1564	const TargetRegisterClass *RC =
1565	TRI->getMinimalPhysRegClass(Reg, VT);
1566	const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1567
1568	// If cross copy register class is the same as RC, then it must be possible
1569	// copy the value directly. Do not try duplicate the def.
1570	// If cross copy register class is not the same as RC, then it's possible to
1571	// copy the value but it require cross register class copies and it is
1572	// expensive.
1573	// If cross copy register class is null, then it's not possible to copy
1574	// the value at all.
1575	SUnit NewDef = nullptr*;
1576	if (DestRC != RC) {
1577	NewDef = CopyAndMoveSuccessors(SU: LRDef);
1578	if (!DestRC && !NewDef)
1579	report_fatal_error(reason: "Can't handle live physical register dependency!");
1580	}
1581	if (!NewDef) {
1582	// Issue copies, these can be expensive cross register class copies.
1583	SmallVector<SUnit*, `2`> Copies;
1584	InsertCopiesAndMoveSuccs(SU: LRDef, Reg, DestRC, SrcRC: RC, Copies);
1585	LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1586	<< " to SU #" << Copies.front()->NodeNum << "\n");
1587	AddPredQueued(SU: TrySU, D: SDep (Copies.front(), SDep::Artificial));
1588	NewDef = Copies.back();
1589	}
1590
1591	LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1592	<< " to SU #" << TrySU->NodeNum << "\n");
1593	LiveRegDefs [Reg] = NewDef;
1594	AddPredQueued(SU: NewDef, D: SDep (TrySU, SDep::Artificial));
1595	TrySU->isAvailable = false;
1596	CurSU = NewDef;
1597	}
1598	assert(CurSU && "Unable to resolve live physical register dependencies!");
1599	return CurSU;
1600	}
1601
1602	/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1603	/// schedulers.
1604	void ScheduleDAGRRList::ListScheduleBottomUp() {
1605	// Release any predecessors of the special Exit node.
1606	ReleasePredecessors(SU: &ExitSU);
1607
1608	// Add root to Available queue.
1609	if (!SUnits.empty()) {
1610	SUnit *RootSU = &SUnits [DAG->getRoot().getNode()->getNodeId()];
1611	assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1612	RootSU->isAvailable = true;
1613	AvailableQueue->push(U: RootSU);
1614	}
1615
1616	// While Available queue is not empty, grab the node with the highest
1617	// priority. If it is not ready put it back. Schedule the node.
1618	Sequence.reserve(n: SUnits.size());
1619	while (!AvailableQueue->empty() \|\| !Interferences.empty()) {
1620	LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1621	AvailableQueue->dump(this));
1622
1623	// Pick the best node to schedule taking all constraints into
1624	// consideration.
1625	SUnit *SU = PickNodeToScheduleBottomUp();
1626
1627	AdvancePastStalls(SU);
1628
1629	ScheduleNodeBottomUp(SU);
1630
1631	while (AvailableQueue->empty() && !PendingQueue.empty()) {
1632	// Advance the cycle to free resources. Skip ahead to the next ready SU.
1633	assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1634	"MinAvailableCycle uninitialized");
1635	AdvanceToCycle(NextCycle: std::max(a: CurCycle + `1`, b: MinAvailableCycle));
1636	}
1637	}
1638
1639	// Reverse the order if it is bottom up.
1640	std::reverse(first: Sequence.begin(), last: Sequence.end());
1641
1642	#ifndef NDEBUG
1643	VerifyScheduledSequence(/isBottomUp=/true);
1644	#endif
1645	}
1646
1647	namespace {
1648
1649	class RegReductionPQBase;
1650
1651	struct queue_sort {
1652	bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1653	};
1654
1655	#ifndef NDEBUG
1656	template<class SF>
1657	struct reverse_sort : public queue_sort {
1658	SF &SortFunc;
1659
1660	reverse_sort(SF &sf) : SortFunc(sf) {}
1661
1662	bool operator()(SUnit* left, SUnit* right) const {
1663	// reverse left/right rather than simply !SortFunc(left, right)
1664	// to expose different paths in the comparison logic.
1665	return SortFunc(right, left);
1666	}
1667	};
1668	#endif // NDEBUG
1669
1670	/// bu_ls_rr_sort - Priority function for bottom up register pressure
1671	// reduction scheduler.
1672	struct bu_ls_rr_sort : public queue_sort {
1673	enum {
1674	IsBottomUp = true,
1675	HasReadyFilter = false
1676	};
1677
1678	RegReductionPQBase *SPQ;
1679
1680	bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1681
1682	bool operator()(SUnit* left, SUnit* right) const;
1683	};
1684
1685	// src_ls_rr_sort - Priority function for source order scheduler.
1686	struct src_ls_rr_sort : public queue_sort {
1687	enum {
1688	IsBottomUp = true,
1689	HasReadyFilter = false
1690	};
1691
1692	RegReductionPQBase *SPQ;
1693
1694	src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1695
1696	bool operator()(SUnit* left, SUnit* right) const;
1697	};
1698
1699	// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1700	struct hybrid_ls_rr_sort : public queue_sort {
1701	enum {
1702	IsBottomUp = true,
1703	HasReadyFilter = false
1704	};
1705
1706	RegReductionPQBase *SPQ;
1707
1708	hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1709
1710	bool isReady(SUnit SU, unsigned* CurCycle) const;
1711
1712	bool operator()(SUnit* left, SUnit* right) const;
1713	};
1714
1715	// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1716	// scheduler.
1717	struct ilp_ls_rr_sort : public queue_sort {
1718	enum {
1719	IsBottomUp = true,
1720	HasReadyFilter = false
1721	};
1722
1723	RegReductionPQBase *SPQ;
1724
1725	ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1726
1727	bool isReady(SUnit SU, unsigned* CurCycle) const;
1728
1729	bool operator()(SUnit* left, SUnit* right) const;
1730	};
1731
1732	class RegReductionPQBase : public SchedulingPriorityQueue {
1733	protected:
1734	std::vector<SUnit *> Queue;
1735	unsigned CurQueueId = `0`;
1736	bool TracksRegPressure;
1737	bool SrcOrder;
1738
1739	// SUnits - The SUnits for the current graph.
1740	std::vector<SUnit> SUnits = nullptr*;
1741
1742	MachineFunction &MF;
1743	const TargetInstrInfo TII = nullptr*;
1744	const TargetRegisterInfo TRI = nullptr*;
1745	const TargetLowering TLI = nullptr*;
1746	ScheduleDAGRRList scheduleDAG = nullptr*;
1747
1748	// SethiUllmanNumbers - The SethiUllman number for each node.
1749	std::vector<unsigned> SethiUllmanNumbers;
1750
1751	/// RegPressure - Tracking current reg pressure per register class.
1752	std::vector<unsigned> RegPressure;
1753
1754	/// RegLimit - Tracking the number of allocatable registers per register
1755	/// class.
1756	std::vector<unsigned> RegLimit;
1757
1758	public:
1759	RegReductionPQBase(MachineFunction &mf,
1760	bool hasReadyFilter,
1761	bool tracksrp,
1762	bool srcorder,
1763	const TargetInstrInfo *tii,
1764	const TargetRegisterInfo *tri,
1765	const TargetLowering *tli)
1766	: SchedulingPriorityQueue (hasReadyFilter), TracksRegPressure(tracksrp),
1767	SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1768	if (TracksRegPressure) {
1769	unsigned NumRC = TRI->getNumRegClasses();
1770	RegLimit.resize(new_size: NumRC);
1771	RegPressure.resize(new_size: NumRC);
1772	std::fill(RegLimit.begin(), RegLimit.end(), `0`);
1773	std::fill(RegPressure.begin(), RegPressure.end(), `0`);
1774	for (const TargetRegisterClass *RC : TRI->regclasses())
1775	RegLimit [RC->getID()] = tri->getRegPressureLimit(RC, MF);
1776	}
1777	}
1778
1779	void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1780	scheduleDAG = scheduleDag;
1781	}
1782
1783	ScheduleHazardRecognizer* getHazardRec() {
1784	return scheduleDAG->getHazardRec();
1785	}
1786
1787	void initNodes(std::vector<SUnit> &sunits) override;
1788
1789	void addNode(const SUnit *SU) override;
1790
1791	void updateNode(const SUnit *SU) override;
1792
1793	void releaseState() override {
1794	SUnits = nullptr;
1795	SethiUllmanNumbers.clear();
1796	std::fill(RegPressure.begin(), RegPressure.end(), `0`);
1797	}
1798
1799	unsigned getNodePriority(const SUnit SU) const*;
1800
1801	unsigned getNodeOrdering(const SUnit SU) const* {
1802	if (!SU->getNode()) return `0`;
1803
1804	return SU->getNode()->getIROrder();
1805	}
1806
1807	bool empty() const override { return Queue.empty(); }
1808
1809	void push(SUnit *U) override {
1810	assert(!U->NodeQueueId && "Node in the queue already");
1811	U->NodeQueueId = ++CurQueueId;
1812	Queue.push_back(x: U);
1813	}
1814
1815	void remove(SUnit *SU) override {
1816	assert(!Queue.empty() && "Queue is empty!");
1817	assert(SU->NodeQueueId != `0` && "Not in queue!");
1818	std::vector<SUnit *>::iterator I = llvm::find(Range&: Queue, Val: SU);
1819	if (I != std::prev(x: Queue.end()))
1820	std::swap(a&: *I, b&: Queue.back());
1821	Queue.pop_back();
1822	SU->NodeQueueId = `0`;
1823	}
1824
1825	bool tracksRegPressure() const override { return TracksRegPressure; }
1826
1827	void dumpRegPressure() const;
1828
1829	bool HighRegPressure(const SUnit SU) const*;
1830
1831	bool MayReduceRegPressure(SUnit SU) const*;
1832
1833	int RegPressureDiff(SUnit SU, unsigned* &LiveUses) const;
1834
1835	void scheduledNode(SUnit *SU) override;
1836
1837	void unscheduledNode(SUnit *SU) override;
1838
1839	protected:
1840	bool canClobber(const SUnit SU, const* SUnit *Op);
1841	void AddPseudoTwoAddrDeps();
1842	void PrescheduleNodesWithMultipleUses();
1843	void CalculateSethiUllmanNumbers();
1844	};
1845
1846	template<class SF>
1847	static SUnit popFromQueueImpl(std::vector<SUnit > &Q, SF &Picker) {
1848	unsigned BestIdx = `0`;
1849	// Only compute the cost for the first 1000 items in the queue, to avoid
1850	// excessive compile-times for very large queues.
1851	for (unsigned I = `1`, E = std::min(a: Q.size(), b: (decltype(Q.size()))`1000`); I != E;
1852	I++)
1853	if (Picker(Q [BestIdx], Q [I]))
1854	BestIdx = I;
1855	SUnit *V = Q [BestIdx];
1856	if (BestIdx + `1` != Q.size())
1857	std::swap(a&: Q [BestIdx], b&: Q.back());
1858	Q.pop_back();
1859	return V;
1860	}
1861
1862	template<class SF>
1863	SUnit popFromQueue(std::vector<SUnit > &Q, SF &Picker, ScheduleDAG *DAG) {
1864	#ifndef NDEBUG
1865	if (DAG->StressSched) {
1866	reverse_sort<SF> RPicker(Picker);
1867	return popFromQueueImpl(Q, RPicker);
1868	}
1869	#endif
1870	(void)DAG;
1871	return popFromQueueImpl(Q, Picker);
1872	}
1873
1874	//===----------------------------------------------------------------------===//
1875	// RegReductionPriorityQueue Definition
1876	//===----------------------------------------------------------------------===//
1877	//
1878	// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1879	// to reduce register pressure.
1880	//
1881	template<class SF>
1882	class RegReductionPriorityQueue : public RegReductionPQBase {
1883	SF Picker;
1884
1885	public:
1886	RegReductionPriorityQueue(MachineFunction &mf,
1887	bool tracksrp,
1888	bool srcorder,
1889	const TargetInstrInfo *tii,
1890	const TargetRegisterInfo *tri,
1891	const TargetLowering *tli)
1892	: RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1893	tii, tri, tli),
1894	Picker(this) {}
1895
1896	bool isBottomUp() const override { return SF::IsBottomUp; }
1897
1898	bool isReady(SUnit U) const* override {
1899	return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1900	}
1901
1902	SUnit *pop() override {
1903	if (Queue.empty()) return nullptr;
1904
1905	SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1906	V->NodeQueueId = `0`;
1907	return V;
1908	}
1909
1910	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1911	LLVM_DUMP_METHOD void dump(ScheduleDAG DAG) const* override {
1912	// Emulate pop() without clobbering NodeQueueIds.
1913	std::vector<SUnit *> DumpQueue = Queue;
1914	SF DumpPicker = Picker;
1915	while (!DumpQueue.empty()) {
1916	SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1917	dbgs() << "Height " << SU->getHeight() << ": ";
1918	DAG->dumpNode(*SU);
1919	}
1920	}
1921	#endif
1922	};
1923
1924	using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1925	using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1926	using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1927	using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1928
1929	} // end anonymous namespace
1930
1931	//===----------------------------------------------------------------------===//
1932	// Static Node Priority for Register Pressure Reduction
1933	//===----------------------------------------------------------------------===//
1934
1935	// Check for special nodes that bypass scheduling heuristics.
1936	// Currently this pushes TokenFactor nodes down, but may be used for other
1937	// pseudo-ops as well.
1938	//
1939	// Return -1 to schedule right above left, 1 for left above right.
1940	// Return 0 if no bias exists.
1941	static int checkSpecialNodes(const SUnit left, const* SUnit *right) {
1942	bool LSchedLow = left->isScheduleLow;
1943	bool RSchedLow = right->isScheduleLow;
1944	if (LSchedLow != RSchedLow)
1945	return LSchedLow < RSchedLow ? `1` : -`1`;
1946	return `0`;
1947	}
1948
1949	/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1950	/// Smaller number is the higher priority.
1951	static unsigned
1952	CalcNodeSethiUllmanNumber(const SUnit SU, std::vector<unsigned*> &SUNumbers) {
1953	if (SUNumbers [SU->NodeNum] != `0`)
1954	return SUNumbers [SU->NodeNum];
1955
1956	// Use WorkList to avoid stack overflow on excessively large IRs.
1957	struct WorkState {
1958	WorkState(const SUnit *SU) : SU(SU) {}
1959	const SUnit *SU;
1960	unsigned PredsProcessed = `0`;
1961	};
1962
1963	SmallVector<WorkState, `16`> WorkList;
1964	WorkList.push_back(Elt: SU);
1965	while (!WorkList.empty()) {
1966	auto &Temp = WorkList.back();
1967	auto *TempSU = Temp.SU;
1968	bool AllPredsKnown = true;
1969	// Try to find a non-evaluated pred and push it into the processing stack.
1970	for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1971	auto &Pred = TempSU->Preds [P];
1972	if (Pred.isCtrl()) continue; // ignore chain preds
1973	SUnit *PredSU = Pred.getSUnit();
1974	if (SUNumbers [PredSU->NodeNum] == `0`) {
1975	#ifndef NDEBUG
1976	// In debug mode, check that we don't have such element in the stack.
1977	for (auto It : WorkList)
1978	assert(It.SU != PredSU && "Trying to push an element twice?");
1979	#endif
1980	// Next time start processing this one starting from the next pred.
1981	Temp.PredsProcessed = P + `1`;
1982	WorkList.push_back(Elt: PredSU);
1983	AllPredsKnown = false;
1984	break;
1985	}
1986	}
1987
1988	if (!AllPredsKnown)
1989	continue;
1990
1991	// Once all preds are known, we can calculate the answer for this one.
1992	unsigned SethiUllmanNumber = `0`;
1993	unsigned Extra = `0`;
1994	for (const SDep &Pred : TempSU->Preds) {
1995	if (Pred.isCtrl()) continue; // ignore chain preds
1996	SUnit *PredSU = Pred.getSUnit();
1997	unsigned PredSethiUllman = SUNumbers [PredSU->NodeNum];
1998	assert(PredSethiUllman > `0` && "We should have evaluated this pred!");
1999	if (PredSethiUllman > SethiUllmanNumber) {
2000	SethiUllmanNumber = PredSethiUllman;
2001	Extra = `0`;
2002	} else if (PredSethiUllman == SethiUllmanNumber)
2003	++Extra;
2004	}
2005
2006	SethiUllmanNumber += Extra;
2007	if (SethiUllmanNumber == `0`)
2008	SethiUllmanNumber = `1`;
2009	SUNumbers [TempSU->NodeNum] = SethiUllmanNumber;
2010	WorkList.pop_back();
2011	}
2012
2013	assert(SUNumbers[SU->NodeNum] > `0` && "SethiUllman should never be zero!");
2014	return SUNumbers [SU->NodeNum];
2015	}
2016
2017	/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2018	/// scheduling units.
2019	void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2020	SethiUllmanNumbers.assign(n: SUnits->size(), val: `0`);
2021
2022	for (const SUnit &SU : *SUnits)
2023	CalcNodeSethiUllmanNumber(SU: &SU, SUNumbers&: SethiUllmanNumbers);
2024	}
2025
2026	void RegReductionPQBase::addNode(const SUnit *SU) {
2027	unsigned SUSize = SethiUllmanNumbers.size();
2028	if (SUnits->size() > SUSize)
2029	SethiUllmanNumbers.resize(new_size: SUSize*`2`, x: `0`);
2030	CalcNodeSethiUllmanNumber(SU, SUNumbers&: SethiUllmanNumbers);
2031	}
2032
2033	void RegReductionPQBase::updateNode(const SUnit *SU) {
2034	SethiUllmanNumbers [SU->NodeNum] = `0`;
2035	CalcNodeSethiUllmanNumber(SU, SUNumbers&: SethiUllmanNumbers);
2036	}
2037
2038	// Lower priority means schedule further down. For bottom-up scheduling, lower
2039	// priority SUs are scheduled before higher priority SUs.
2040	unsigned RegReductionPQBase::getNodePriority(const SUnit SU) const* {
2041	assert(SU->NodeNum < SethiUllmanNumbers.size());
2042	unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : `0`;
2043	if (Opc == ISD::TokenFactor \|\| Opc == ISD::CopyToReg)
2044	// CopyToReg should be close to its uses to facilitate coalescing and
2045	// avoid spilling.
2046	return `0`;
2047	if (Opc == TargetOpcode::EXTRACT_SUBREG \|\|
2048	Opc == TargetOpcode::SUBREG_TO_REG \|\|
2049	Opc == TargetOpcode::INSERT_SUBREG)
2050	// EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2051	// close to their uses to facilitate coalescing.
2052	return `0`;
2053	if (SU->NumSuccs == `0` && SU->NumPreds != `0`)
2054	// If SU does not have a register use, i.e. it doesn't produce a value
2055	// that would be consumed (e.g. store), then it terminates a chain of
2056	// computation. Give it a large SethiUllman number so it will be
2057	// scheduled right before its predecessors that it doesn't lengthen
2058	// their live ranges.
2059	return `0xffff`;
2060	if (SU->NumPreds == `0` && SU->NumSuccs != `0`)
2061	// If SU does not have a register def, schedule it close to its uses
2062	// because it does not lengthen any live ranges.
2063	return `0`;
2064	#if 1
2065	return SethiUllmanNumbers [SU->NodeNum];
2066	#else
2067	unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2068	if (SU->isCallOp) {
2069	// FIXME: This assumes all of the defs are used as call operands.
2070	int NP = (int)Priority - SU->getNode()->getNumValues();
2071	return (NP > `0`) ? NP : `0`;
2072	}
2073	return Priority;
2074	#endif
2075	}
2076
2077	//===----------------------------------------------------------------------===//
2078	// Register Pressure Tracking
2079	//===----------------------------------------------------------------------===//
2080
2081	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2082	LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2083	for (const TargetRegisterClass *RC : TRI->regclasses()) {
2084	unsigned Id = RC->getID();
2085	unsigned RP = RegPressure[Id];
2086	if (!RP) continue;
2087	LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2088	<< RegLimit[Id] << `'\n'`);
2089	}
2090	}
2091	#endif
2092
2093	bool RegReductionPQBase::HighRegPressure(const SUnit SU) const* {
2094	if (!TLI)
2095	return false;
2096
2097	for (const SDep &Pred : SU->Preds) {
2098	if (Pred.isCtrl())
2099	continue;
2100	SUnit *PredSU = Pred.getSUnit();
2101	// NumRegDefsLeft is zero when enough uses of this node have been scheduled
2102	// to cover the number of registers defined (they are all live).
2103	if (PredSU->NumRegDefsLeft == `0`) {
2104	continue;
2105	}
2106	for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2107	RegDefPos.IsValid(); RegDefPos.Advance()) {
2108	unsigned RCId, Cost;
2109	GetCostForDef(RegDefPos, TLI, TII, TRI, RegClass&: RCId, Cost, MF);
2110
2111	if ((RegPressure [RCId] + Cost) >= RegLimit [RCId])
2112	return true;
2113	}
2114	}
2115	return false;
2116	}
2117
2118	bool RegReductionPQBase::MayReduceRegPressure(SUnit SU) const* {
2119	const SDNode *N = SU->getNode();
2120
2121	if (!N->isMachineOpcode() \|\| !SU->NumSuccs)
2122	return false;
2123
2124	unsigned NumDefs = TII->get(Opcode: N->getMachineOpcode()).getNumDefs();
2125	for (unsigned i = `0`; i != NumDefs; ++i) {
2126	MVT VT = N->getSimpleValueType(ResNo: i);
2127	if (!N->hasAnyUseOfValue(Value: i))
2128	continue;
2129	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2130	if (RegPressure [RCId] >= RegLimit [RCId])
2131	return true;
2132	}
2133	return false;
2134	}
2135
2136	// Compute the register pressure contribution by this instruction by count up
2137	// for uses that are not live and down for defs. Only count register classes
2138	// that are already under high pressure. As a side effect, compute the number of
2139	// uses of registers that are already live.
2140	//
2141	// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2142	// so could probably be factored.
2143	int RegReductionPQBase::RegPressureDiff(SUnit SU, unsigned* &LiveUses) const {
2144	LiveUses = `0`;
2145	int PDiff = `0`;
2146	for (const SDep &Pred : SU->Preds) {
2147	if (Pred.isCtrl())
2148	continue;
2149	SUnit *PredSU = Pred.getSUnit();
2150	// NumRegDefsLeft is zero when enough uses of this node have been scheduled
2151	// to cover the number of registers defined (they are all live).
2152	if (PredSU->NumRegDefsLeft == `0`) {
2153	if (PredSU->getNode()->isMachineOpcode())
2154	++LiveUses;
2155	continue;
2156	}
2157	for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2158	RegDefPos.IsValid(); RegDefPos.Advance()) {
2159	MVT VT = RegDefPos.GetValue();
2160	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2161	if (RegPressure [RCId] >= RegLimit [RCId])
2162	++PDiff;
2163	}
2164	}
2165	const SDNode *N = SU->getNode();
2166
2167	if (!N \|\| !N->isMachineOpcode() \|\| !SU->NumSuccs)
2168	return PDiff;
2169
2170	unsigned NumDefs = TII->get(Opcode: N->getMachineOpcode()).getNumDefs();
2171	for (unsigned i = `0`; i != NumDefs; ++i) {
2172	MVT VT = N->getSimpleValueType(ResNo: i);
2173	if (!N->hasAnyUseOfValue(Value: i))
2174	continue;
2175	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2176	if (RegPressure [RCId] >= RegLimit [RCId])
2177	--PDiff;
2178	}
2179	return PDiff;
2180	}
2181
2182	void RegReductionPQBase::scheduledNode(SUnit *SU) {
2183	if (!TracksRegPressure)
2184	return;
2185
2186	if (!SU->getNode())
2187	return;
2188
2189	for (const SDep &Pred : SU->Preds) {
2190	if (Pred.isCtrl())
2191	continue;
2192	SUnit *PredSU = Pred.getSUnit();
2193	// NumRegDefsLeft is zero when enough uses of this node have been scheduled
2194	// to cover the number of registers defined (they are all live).
2195	if (PredSU->NumRegDefsLeft == `0`) {
2196	continue;
2197	}
2198	// FIXME: The ScheduleDAG currently loses information about which of a
2199	// node's values is consumed by each dependence. Consequently, if the node
2200	// defines multiple register classes, we don't know which to pressurize
2201	// here. Instead the following loop consumes the register defs in an
2202	// arbitrary order. At least it handles the common case of clustered loads
2203	// to the same class. For precise liveness, each SDep needs to indicate the
2204	// result number. But that tightly couples the ScheduleDAG with the
2205	// SelectionDAG making updates tricky. A simpler hack would be to attach a
2206	// value type or register class to SDep.
2207	//
2208	// The most important aspect of register tracking is balancing the increase
2209	// here with the reduction further below. Note that this SU may use multiple
2210	// defs in PredSU. The can't be determined here, but we've already
2211	// compensated by reducing NumRegDefsLeft in PredSU during
2212	// ScheduleDAGSDNodes::AddSchedEdges.
2213	--PredSU->NumRegDefsLeft;
2214	unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2215	for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2216	RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2217	if (SkipRegDefs)
2218	continue;
2219
2220	unsigned RCId, Cost;
2221	GetCostForDef(RegDefPos, TLI, TII, TRI, RegClass&: RCId, Cost, MF);
2222	RegPressure [RCId] += Cost;
2223	break;
2224	}
2225	}
2226
2227	// We should have this assert, but there may be dead SDNodes that never
2228	// materialize as SUnits, so they don't appear to generate liveness.
2229	//assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2230	int SkipRegDefs = (int)SU->NumRegDefsLeft;
2231	for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2232	RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2233	if (SkipRegDefs > `0`)
2234	continue;
2235	unsigned RCId, Cost;
2236	GetCostForDef(RegDefPos, TLI, TII, TRI, RegClass&: RCId, Cost, MF);
2237	if (RegPressure [RCId] < Cost) {
2238	// Register pressure tracking is imprecise. This can happen. But we try
2239	// hard not to let it happen because it likely results in poor scheduling.
2240	LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2241	<< ") has too many regdefs\n");
2242	RegPressure [RCId] = `0`;
2243	}
2244	else {
2245	RegPressure [RCId] -= Cost;
2246	}
2247	}
2248	LLVM_DEBUG(dumpRegPressure());
2249	}
2250
2251	void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2252	if (!TracksRegPressure)
2253	return;
2254
2255	const SDNode *N = SU->getNode();
2256	if (!N) return;
2257
2258	if (!N->isMachineOpcode()) {
2259	if (N->getOpcode() != ISD::CopyToReg)
2260	return;
2261	} else {
2262	unsigned Opc = N->getMachineOpcode();
2263	if (Opc == TargetOpcode::EXTRACT_SUBREG \|\|
2264	Opc == TargetOpcode::INSERT_SUBREG \|\|
2265	Opc == TargetOpcode::SUBREG_TO_REG \|\|
2266	Opc == TargetOpcode::REG_SEQUENCE \|\|
2267	Opc == TargetOpcode::IMPLICIT_DEF)
2268	return;
2269	}
2270
2271	for (const SDep &Pred : SU->Preds) {
2272	if (Pred.isCtrl())
2273	continue;
2274	SUnit *PredSU = Pred.getSUnit();
2275	// NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2276	// counts data deps.
2277	if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2278	continue;
2279	const SDNode *PN = PredSU->getNode();
2280	if (!PN->isMachineOpcode()) {
2281	if (PN->getOpcode() == ISD::CopyFromReg) {
2282	MVT VT = PN->getSimpleValueType(ResNo: `0`);
2283	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2284	RegPressure [RCId] += TLI->getRepRegClassCostFor(VT);
2285	}
2286	continue;
2287	}
2288	unsigned POpc = PN->getMachineOpcode();
2289	if (POpc == TargetOpcode::IMPLICIT_DEF)
2290	continue;
2291	if (POpc == TargetOpcode::EXTRACT_SUBREG \|\|
2292	POpc == TargetOpcode::INSERT_SUBREG \|\|
2293	POpc == TargetOpcode::SUBREG_TO_REG) {
2294	MVT VT = PN->getSimpleValueType(ResNo: `0`);
2295	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2296	RegPressure [RCId] += TLI->getRepRegClassCostFor(VT);
2297	continue;
2298	}
2299	if (POpc == TargetOpcode::REG_SEQUENCE) {
2300	unsigned DstRCIdx = PN->getConstantOperandVal(Num: `0`);
2301	const TargetRegisterClass *RC = TRI->getRegClass(i: DstRCIdx);
2302	unsigned RCId = RC->getID();
2303	// REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2304	// here. Instead use the same constant as in GetCostForDef.
2305	RegPressure [RCId] += RegSequenceCost;
2306	continue;
2307	}
2308	unsigned NumDefs = TII->get(Opcode: PN->getMachineOpcode()).getNumDefs();
2309	for (unsigned i = `0`; i != NumDefs; ++i) {
2310	MVT VT = PN->getSimpleValueType(ResNo: i);
2311	if (!PN->hasAnyUseOfValue(Value: i))
2312	continue;
2313	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2314	if (RegPressure [RCId] < TLI->getRepRegClassCostFor(VT))
2315	// Register pressure tracking is imprecise. This can happen.
2316	RegPressure [RCId] = `0`;
2317	else
2318	RegPressure [RCId] -= TLI->getRepRegClassCostFor(VT);
2319	}
2320	}
2321
2322	// Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2323	// may transfer data dependencies to CopyToReg.
2324	if (SU->NumSuccs && N->isMachineOpcode()) {
2325	unsigned NumDefs = TII->get(Opcode: N->getMachineOpcode()).getNumDefs();
2326	for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2327	MVT VT = N->getSimpleValueType(ResNo: i);
2328	if (VT == MVT::Glue \|\| VT == MVT::Other)
2329	continue;
2330	if (!N->hasAnyUseOfValue(Value: i))
2331	continue;
2332	unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2333	RegPressure [RCId] += TLI->getRepRegClassCostFor(VT);
2334	}
2335	}
2336
2337	LLVM_DEBUG(dumpRegPressure());
2338	}
2339
2340	//===----------------------------------------------------------------------===//
2341	// Dynamic Node Priority for Register Pressure Reduction
2342	//===----------------------------------------------------------------------===//
2343
2344	/// closestSucc - Returns the scheduled cycle of the successor which is
2345	/// closest to the current cycle.
2346	static unsigned closestSucc(const SUnit *SU) {
2347	unsigned MaxHeight = `0`;
2348	for (const SDep &Succ : SU->Succs) {
2349	if (Succ.isCtrl()) continue; // ignore chain succs
2350	unsigned Height = Succ.getSUnit()->getHeight();
2351	// If there are bunch of CopyToRegs stacked up, they should be considered
2352	// to be at the same position.
2353	if (Succ.getSUnit()->getNode() &&
2354	Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2355	Height = closestSucc(SU: Succ.getSUnit())+`1`;
2356	if (Height > MaxHeight)
2357	MaxHeight = Height;
2358	}
2359	return MaxHeight;
2360	}
2361
2362	/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2363	/// for scratch registers, i.e. number of data dependencies.
2364	static unsigned calcMaxScratches(const SUnit *SU) {
2365	unsigned Scratches = `0`;
2366	for (const SDep &Pred : SU->Preds) {
2367	if (Pred.isCtrl()) continue; // ignore chain preds
2368	Scratches++;
2369	}
2370	return Scratches;
2371	}
2372
2373	/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2374	/// CopyFromReg from a virtual register.
2375	static bool hasOnlyLiveInOpers(const SUnit *SU) {
2376	bool RetVal = false;
2377	for (const SDep &Pred : SU->Preds) {
2378	if (Pred.isCtrl()) continue;
2379	const SUnit *PredSU = Pred.getSUnit();
2380	if (PredSU->getNode() &&
2381	PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2382	Register Reg =
2383	cast<RegisterSDNode>(Val: PredSU->getNode()->getOperand(Num: `1`))->getReg();
2384	if (Reg.isVirtual()) {
2385	RetVal = true;
2386	continue;
2387	}
2388	}
2389	return false;
2390	}
2391	return RetVal;
2392	}
2393
2394	/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2395	/// CopyToReg to a virtual register. This SU def is probably a liveout and
2396	/// it has no other use. It should be scheduled closer to the terminator.
2397	static bool hasOnlyLiveOutUses(const SUnit *SU) {
2398	bool RetVal = false;
2399	for (const SDep &Succ : SU->Succs) {
2400	if (Succ.isCtrl()) continue;
2401	const SUnit *SuccSU = Succ.getSUnit();
2402	if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2403	Register Reg =
2404	cast<RegisterSDNode>(Val: SuccSU->getNode()->getOperand(Num: `1`))->getReg();
2405	if (Reg.isVirtual()) {
2406	RetVal = true;
2407	continue;
2408	}
2409	}
2410	return false;
2411	}
2412	return RetVal;
2413	}
2414
2415	// Set isVRegCycle for a node with only live in opers and live out uses. Also
2416	// set isVRegCycle for its CopyFromReg operands.
2417	//
2418	// This is only relevant for single-block loops, in which case the VRegCycle
2419	// node is likely an induction variable in which the operand and target virtual
2420	// registers should be coalesced (e.g. pre/post increment values). Setting the
2421	// isVRegCycle flag helps the scheduler prioritize other uses of the same
2422	// CopyFromReg so that this node becomes the virtual register "kill". This
2423	// avoids interference between the values live in and out of the block and
2424	// eliminates a copy inside the loop.
2425	static void initVRegCycle(SUnit *SU) {
2426	if (DisableSchedVRegCycle)
2427	return;
2428
2429	if (!hasOnlyLiveInOpers(SU) \|\| !hasOnlyLiveOutUses(SU))
2430	return;
2431
2432	LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2433
2434	SU->isVRegCycle = true;
2435
2436	for (const SDep &Pred : SU->Preds) {
2437	if (Pred.isCtrl()) continue;
2438	Pred.getSUnit()->isVRegCycle = true;
2439	}
2440	}
2441
2442	// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2443	// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2444	static void resetVRegCycle(SUnit *SU) {
2445	if (!SU->isVRegCycle)
2446	return;
2447
2448	for (const SDep &Pred : SU->Preds) {
2449	if (Pred.isCtrl()) continue; // ignore chain preds
2450	SUnit *PredSU = Pred.getSUnit();
2451	if (PredSU->isVRegCycle) {
2452	assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2453	"VRegCycle def must be CopyFromReg");
2454	Pred.getSUnit()->isVRegCycle = false;
2455	}
2456	}
2457	}
2458
2459	// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2460	// means a node that defines the VRegCycle has not been scheduled yet.
2461	static bool hasVRegCycleUse(const SUnit *SU) {
2462	// If this SU also defines the VReg, don't hoist it as a "use".
2463	if (SU->isVRegCycle)
2464	return false;
2465
2466	for (const SDep &Pred : SU->Preds) {
2467	if (Pred.isCtrl()) continue; // ignore chain preds
2468	if (Pred.getSUnit()->isVRegCycle &&
2469	Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2470	LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2471	return true;
2472	}
2473	}
2474	return false;
2475	}
2476
2477	// Check for either a dependence (latency) or resource (hazard) stall.
2478	//
2479	// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2480	static bool BUHasStall(SUnit SU, int* Height, RegReductionPQBase *SPQ) {
2481	if ((int)SPQ->getCurCycle() < Height) return true;
2482	if (SPQ->getHazardRec()->getHazardType(SU, Stalls: `0`)
2483	!= ScheduleHazardRecognizer::NoHazard)
2484	return true;
2485	return false;
2486	}
2487
2488	// Return -1 if left has higher priority, 1 if right has higher priority.
2489	// Return 0 if latency-based priority is equivalent.
2490	static int BUCompareLatency(SUnit left, SUnit right, bool checkPref,
2491	RegReductionPQBase *SPQ) {
2492	// Scheduling an instruction that uses a VReg whose postincrement has not yet
2493	// been scheduled will induce a copy. Model this as an extra cycle of latency.
2494	int LPenalty = hasVRegCycleUse(SU: left) ? `1` : `0`;
2495	int RPenalty = hasVRegCycleUse(SU: right) ? `1` : `0`;
2496	int LHeight = (int)left->getHeight() + LPenalty;
2497	int RHeight = (int)right->getHeight() + RPenalty;
2498
2499	bool LStall = (!checkPref \|\| left->SchedulingPref == Sched::ILP) &&
2500	BUHasStall(SU: left, Height: LHeight, SPQ);
2501	bool RStall = (!checkPref \|\| right->SchedulingPref == Sched::ILP) &&
2502	BUHasStall(SU: right, Height: RHeight, SPQ);
2503
2504	// If scheduling one of the node will cause a pipeline stall, delay it.
2505	// If scheduling either one of the node will cause a pipeline stall, sort
2506	// them according to their height.
2507	if (LStall) {
2508	if (!RStall)
2509	return `1`;
2510	if (LHeight != RHeight)
2511	return LHeight > RHeight ? `1` : -`1`;
2512	} else if (RStall)
2513	return -`1`;
2514
2515	// If either node is scheduling for latency, sort them by height/depth
2516	// and latency.
2517	if (!checkPref \|\| (left->SchedulingPref == Sched::ILP \|\|
2518	right->SchedulingPref == Sched::ILP)) {
2519	// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2520	// is enabled, grouping instructions by cycle, then its height is already
2521	// covered so only its depth matters. We also reach this point if both stall
2522	// but have the same height.
2523	if (!SPQ->getHazardRec()->isEnabled()) {
2524	if (LHeight != RHeight)
2525	return LHeight > RHeight ? `1` : -`1`;
2526	}
2527	int LDepth = left->getDepth() - LPenalty;
2528	int RDepth = right->getDepth() - RPenalty;
2529	if (LDepth != RDepth) {
2530	LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2531	<< ") depth " << LDepth << " vs SU (" << right->NodeNum
2532	<< ") depth " << RDepth << "\n");
2533	return LDepth < RDepth ? `1` : -`1`;
2534	}
2535	if (left->Latency != right->Latency)
2536	return left->Latency > right->Latency ? `1` : -`1`;
2537	}
2538	return `0`;
2539	}
2540
2541	static bool BURRSort(SUnit left, SUnit right, RegReductionPQBase *SPQ) {
2542	// Schedule physical register definitions close to their use. This is
2543	// motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2544	// long as shortening physreg live ranges is generally good, we can defer
2545	// creating a subtarget hook.
2546	if (!DisableSchedPhysRegJoin) {
2547	bool LHasPhysReg = left->hasPhysRegDefs;
2548	bool RHasPhysReg = right->hasPhysRegDefs;
2549	if (LHasPhysReg != RHasPhysReg) {
2550	#ifndef NDEBUG
2551	static const char *const PhysRegMsg[] = { " has no physreg",
2552	" defines a physreg" };
2553	#endif
2554	LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2555	<< PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2556	<< ") " << PhysRegMsg[RHasPhysReg] << "\n");
2557	return LHasPhysReg < RHasPhysReg;
2558	}
2559	}
2560
2561	// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2562	unsigned LPriority = SPQ->getNodePriority(SU: left);
2563	unsigned RPriority = SPQ->getNodePriority(SU: right);
2564
2565	// Be really careful about hoisting call operands above previous calls.
2566	// Only allows it if it would reduce register pressure.
2567	if (left->isCall && right->isCallOp) {
2568	unsigned RNumVals = right->getNode()->getNumValues();
2569	RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : `0`;
2570	}
2571	if (right->isCall && left->isCallOp) {
2572	unsigned LNumVals = left->getNode()->getNumValues();
2573	LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : `0`;
2574	}
2575
2576	if (LPriority != RPriority)
2577	return LPriority > RPriority;
2578
2579	// One or both of the nodes are calls and their sethi-ullman numbers are the
2580	// same, then keep source order.
2581	if (left->isCall \|\| right->isCall) {
2582	unsigned LOrder = SPQ->getNodeOrdering(SU: left);
2583	unsigned ROrder = SPQ->getNodeOrdering(SU: right);
2584
2585	// Prefer an ordering where the lower the non-zero order number, the higher
2586	// the preference.
2587	if ((LOrder \|\| ROrder) && LOrder != ROrder)
2588	return LOrder != `0` && (LOrder < ROrder \|\| ROrder == `0`);
2589	}
2590
2591	// Try schedule def + use closer when Sethi-Ullman numbers are the same.
2592	// e.g.
2593	// t1 = op t2, c1
2594	// t3 = op t4, c2
2595	//
2596	// and the following instructions are both ready.
2597	// t2 = op c3
2598	// t4 = op c4
2599	//
2600	// Then schedule t2 = op first.
2601	// i.e.
2602	// t4 = op c4
2603	// t2 = op c3
2604	// t1 = op t2, c1
2605	// t3 = op t4, c2
2606	//
2607	// This creates more short live intervals.
2608	unsigned LDist = closestSucc(SU: left);
2609	unsigned RDist = closestSucc(SU: right);
2610	if (LDist != RDist)
2611	return LDist < RDist;
2612
2613	// How many registers becomes live when the node is scheduled.
2614	unsigned LScratch = calcMaxScratches(SU: left);
2615	unsigned RScratch = calcMaxScratches(SU: right);
2616	if (LScratch != RScratch)
2617	return LScratch > RScratch;
2618
2619	// Comparing latency against a call makes little sense unless the node
2620	// is register pressure-neutral.
2621	if ((left->isCall && RPriority > `0`) \|\| (right->isCall && LPriority > `0`))
2622	return (left->NodeQueueId > right->NodeQueueId);
2623
2624	// Do not compare latencies when one or both of the nodes are calls.
2625	if (!DisableSchedCycles &&
2626	!(left->isCall \|\| right->isCall)) {
2627	int result = BUCompareLatency(left, right, checkPref: false /checkPref/, SPQ);
2628	if (result != `0`)
2629	return result > `0`;
2630	}
2631	else {
2632	if (left->getHeight() != right->getHeight())
2633	return left->getHeight() > right->getHeight();
2634
2635	if (left->getDepth() != right->getDepth())
2636	return left->getDepth() < right->getDepth();
2637	}
2638
2639	assert(left->NodeQueueId && right->NodeQueueId &&
2640	"NodeQueueId cannot be zero");
2641	return (left->NodeQueueId > right->NodeQueueId);
2642	}
2643
2644	// Bottom up
2645	bool bu_ls_rr_sort::operator()(SUnit left, SUnit right) const {
2646	if (int res = checkSpecialNodes(left, right))
2647	return res > `0`;
2648
2649	return BURRSort(left, right, SPQ);
2650	}
2651
2652	// Source order, otherwise bottom up.
2653	bool src_ls_rr_sort::operator()(SUnit left, SUnit right) const {
2654	if (int res = checkSpecialNodes(left, right))
2655	return res > `0`;
2656
2657	unsigned LOrder = SPQ->getNodeOrdering(SU: left);
2658	unsigned ROrder = SPQ->getNodeOrdering(SU: right);
2659
2660	// Prefer an ordering where the lower the non-zero order number, the higher
2661	// the preference.
2662	if ((LOrder \|\| ROrder) && LOrder != ROrder)
2663	return LOrder != `0` && (LOrder < ROrder \|\| ROrder == `0`);
2664
2665	return BURRSort(left, right, SPQ);
2666	}
2667
2668	// If the time between now and when the instruction will be ready can cover
2669	// the spill code, then avoid adding it to the ready queue. This gives long
2670	// stalls highest priority and allows hoisting across calls. It should also
2671	// speed up processing the available queue.
2672	bool hybrid_ls_rr_sort::isReady(SUnit SU, unsigned* CurCycle) const {
2673	static const unsigned ReadyDelay = `3`;
2674
2675	if (SPQ->MayReduceRegPressure(SU)) return true;
2676
2677	if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2678
2679	if (SPQ->getHazardRec()->getHazardType(SU, Stalls: -ReadyDelay)
2680	!= ScheduleHazardRecognizer::NoHazard)
2681	return false;
2682
2683	return true;
2684	}
2685
2686	// Return true if right should be scheduled with higher priority than left.
2687	bool hybrid_ls_rr_sort::operator()(SUnit left, SUnit right) const {
2688	if (int res = checkSpecialNodes(left, right))
2689	return res > `0`;
2690
2691	if (left->isCall \|\| right->isCall)
2692	// No way to compute latency of calls.
2693	return BURRSort(left, right, SPQ);
2694
2695	bool LHigh = SPQ->HighRegPressure(SU: left);
2696	bool RHigh = SPQ->HighRegPressure(SU: right);
2697	// Avoid causing spills. If register pressure is high, schedule for
2698	// register pressure reduction.
2699	if (LHigh && !RHigh) {
2700	LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2701	<< right->NodeNum << ")\n");
2702	return true;
2703	}
2704	else if (!LHigh && RHigh) {
2705	LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2706	<< left->NodeNum << ")\n");
2707	return false;
2708	}
2709	if (!LHigh && !RHigh) {
2710	int result = BUCompareLatency(left, right, checkPref: true /checkPref/, SPQ);
2711	if (result != `0`)
2712	return result > `0`;
2713	}
2714	return BURRSort(left, right, SPQ);
2715	}
2716
2717	// Schedule as many instructions in each cycle as possible. So don't make an
2718	// instruction available unless it is ready in the current cycle.
2719	bool ilp_ls_rr_sort::isReady(SUnit SU, unsigned* CurCycle) const {
2720	if (SU->getHeight() > CurCycle) return false;
2721
2722	if (SPQ->getHazardRec()->getHazardType(SU, Stalls: `0`)
2723	!= ScheduleHazardRecognizer::NoHazard)
2724	return false;
2725
2726	return true;
2727	}
2728
2729	static bool canEnableCoalescing(SUnit *SU) {
2730	unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : `0`;
2731	if (Opc == ISD::TokenFactor \|\| Opc == ISD::CopyToReg)
2732	// CopyToReg should be close to its uses to facilitate coalescing and
2733	// avoid spilling.
2734	return true;
2735
2736	if (Opc == TargetOpcode::EXTRACT_SUBREG \|\|
2737	Opc == TargetOpcode::SUBREG_TO_REG \|\|
2738	Opc == TargetOpcode::INSERT_SUBREG)
2739	// EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2740	// close to their uses to facilitate coalescing.
2741	return true;
2742
2743	if (SU->NumPreds == `0` && SU->NumSuccs != `0`)
2744	// If SU does not have a register def, schedule it close to its uses
2745	// because it does not lengthen any live ranges.
2746	return true;
2747
2748	return false;
2749	}
2750
2751	// list-ilp is currently an experimental scheduler that allows various
2752	// heuristics to be enabled prior to the normal register reduction logic.
2753	bool ilp_ls_rr_sort::operator()(SUnit left, SUnit right) const {
2754	if (int res = checkSpecialNodes(left, right))
2755	return res > `0`;
2756
2757	if (left->isCall \|\| right->isCall)
2758	// No way to compute latency of calls.
2759	return BURRSort(left, right, SPQ);
2760
2761	unsigned LLiveUses = `0`, RLiveUses = `0`;
2762	int LPDiff = `0`, RPDiff = `0`;
2763	if (!DisableSchedRegPressure \|\| !DisableSchedLiveUses) {
2764	LPDiff = SPQ->RegPressureDiff(SU: left, LiveUses&: LLiveUses);
2765	RPDiff = SPQ->RegPressureDiff(SU: right, LiveUses&: RLiveUses);
2766	}
2767	if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2768	LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2769	<< "): " << LPDiff << " != SU(" << right->NodeNum
2770	<< "): " << RPDiff << "\n");
2771	return LPDiff > RPDiff;
2772	}
2773
2774	if (!DisableSchedRegPressure && (LPDiff > `0` \|\| RPDiff > `0`)) {
2775	bool LReduce = canEnableCoalescing(SU: left);
2776	bool RReduce = canEnableCoalescing(SU: right);
2777	if (LReduce && !RReduce) return false;
2778	if (RReduce && !LReduce) return true;
2779	}
2780
2781	if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2782	LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2783	<< " != SU(" << right->NodeNum << "): " << RLiveUses
2784	<< "\n");
2785	return LLiveUses < RLiveUses;
2786	}
2787
2788	if (!DisableSchedStalls) {
2789	bool LStall = BUHasStall(SU: left, Height: left->getHeight(), SPQ);
2790	bool RStall = BUHasStall(SU: right, Height: right->getHeight(), SPQ);
2791	if (LStall != RStall)
2792	return left->getHeight() > right->getHeight();
2793	}
2794
2795	if (!DisableSchedCriticalPath) {
2796	int spread = (int)left->getDepth() - (int)right->getDepth();
2797	if (std::abs(x: spread) > MaxReorderWindow) {
2798	LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2799	<< left->getDepth() << " != SU(" << right->NodeNum
2800	<< "): " << right->getDepth() << "\n");
2801	return left->getDepth() < right->getDepth();
2802	}
2803	}
2804
2805	if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2806	int spread = (int)left->getHeight() - (int)right->getHeight();
2807	if (std::abs(x: spread) > MaxReorderWindow)
2808	return left->getHeight() > right->getHeight();
2809	}
2810
2811	return BURRSort(left, right, SPQ);
2812	}
2813
2814	void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2815	SUnits = &sunits;
2816	// Add pseudo dependency edges for two-address nodes.
2817	if (!Disable2AddrHack)
2818	AddPseudoTwoAddrDeps();
2819	// Reroute edges to nodes with multiple uses.
2820	if (!TracksRegPressure && !SrcOrder)
2821	PrescheduleNodesWithMultipleUses();
2822	// Calculate node priorities.
2823	CalculateSethiUllmanNumbers();
2824
2825	// For single block loops, mark nodes that look like canonical IV increments.
2826	if (scheduleDAG->BB->isSuccessor(MBB: scheduleDAG->BB))
2827	for (SUnit &SU : sunits)
2828	initVRegCycle(SU: &SU);
2829	}
2830
2831	//===----------------------------------------------------------------------===//
2832	// Preschedule for Register Pressure
2833	//===----------------------------------------------------------------------===//
2834
2835	bool RegReductionPQBase::canClobber(const SUnit SU, const* SUnit *Op) {
2836	if (SU->isTwoAddress) {
2837	unsigned Opc = SU->getNode()->getMachineOpcode();
2838	const MCInstrDesc &MCID = TII->get(Opcode: Opc);
2839	unsigned NumRes = MCID.getNumDefs();
2840	unsigned NumOps = MCID.getNumOperands() - NumRes;
2841	for (unsigned i = `0`; i != NumOps; ++i) {
2842	if (MCID.getOperandConstraint(OpNum: i+NumRes, Constraint: MCOI::TIED_TO) != -`1`) {
2843	SDNode *DU = SU->getNode()->getOperand(Num: i).getNode();
2844	if (DU->getNodeId() != -`1` &&
2845	Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2846	return true;
2847	}
2848	}
2849	}
2850	return false;
2851	}
2852
2853	/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2854	/// successor's explicit physregs whose definition can reach DepSU.
2855	/// i.e. DepSU should not be scheduled above SU.
2856	static bool canClobberReachingPhysRegUse(const SUnit DepSU, const* SUnit *SU,
2857	ScheduleDAGRRList *scheduleDAG,
2858	const TargetInstrInfo *TII,
2859	const TargetRegisterInfo *TRI) {
2860	ArrayRef<MCPhysReg> ImpDefs =
2861	TII->get(Opcode: SU->getNode()->getMachineOpcode()).implicit_defs();
2862	const uint32_t *RegMask = getNodeRegMask(N: SU->getNode());
2863	if (ImpDefs.empty() && !RegMask)
2864	return false;
2865
2866	for (const SDep &Succ : SU->Succs) {
2867	SUnit *SuccSU = Succ.getSUnit();
2868	for (const SDep &SuccPred : SuccSU->Preds) {
2869	if (!SuccPred.isAssignedRegDep())
2870	continue;
2871
2872	if (RegMask &&
2873	MachineOperand::clobbersPhysReg(RegMask, PhysReg: SuccPred.getReg()) &&
2874	scheduleDAG->IsReachable(SU: DepSU, TargetSU: SuccPred.getSUnit()))
2875	return true;
2876
2877	for (MCPhysReg ImpDef : ImpDefs) {
2878	// Return true if SU clobbers this physical register use and the
2879	// definition of the register reaches from DepSU. IsReachable queries
2880	// a topological forward sort of the DAG (following the successors).
2881	if (TRI->regsOverlap(RegA: ImpDef, RegB: SuccPred.getReg()) &&
2882	scheduleDAG->IsReachable(SU: DepSU, TargetSU: SuccPred.getSUnit()))
2883	return true;
2884	}
2885	}
2886	}
2887	return false;
2888	}
2889
2890	/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2891	/// physical register defs.
2892	static bool canClobberPhysRegDefs(const SUnit SuccSU, const* SUnit *SU,
2893	const TargetInstrInfo *TII,
2894	const TargetRegisterInfo *TRI) {
2895	SDNode *N = SuccSU->getNode();
2896	unsigned NumDefs = TII->get(Opcode: N->getMachineOpcode()).getNumDefs();
2897	ArrayRef<MCPhysReg> ImpDefs = TII->get(Opcode: N->getMachineOpcode()).implicit_defs();
2898	assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2899	for (const SDNode *SUNode = SU->getNode(); SUNode;
2900	SUNode = SUNode->getGluedNode()) {
2901	if (!SUNode->isMachineOpcode())
2902	continue;
2903	ArrayRef<MCPhysReg> SUImpDefs =
2904	TII->get(Opcode: SUNode->getMachineOpcode()).implicit_defs();
2905	const uint32_t *SURegMask = getNodeRegMask(N: SUNode);
2906	if (SUImpDefs.empty() && !SURegMask)
2907	continue;
2908	for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2909	MVT VT = N->getSimpleValueType(ResNo: i);
2910	if (VT == MVT::Glue \|\| VT == MVT::Other)
2911	continue;
2912	if (!N->hasAnyUseOfValue(Value: i))
2913	continue;
2914	MCPhysReg Reg = ImpDefs [i - NumDefs];
2915	if (SURegMask && MachineOperand::clobbersPhysReg(RegMask: SURegMask, PhysReg: Reg))
2916	return true;
2917	for (MCPhysReg SUReg : SUImpDefs) {
2918	if (TRI->regsOverlap(RegA: Reg, RegB: SUReg))
2919	return true;
2920	}
2921	}
2922	}
2923	return false;
2924	}
2925
2926	/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2927	/// are not handled well by the general register pressure reduction
2928	/// heuristics. When presented with code like this:
2929	///
2930	/// N
2931	/// / \|
2932	/// / \|
2933	/// U store
2934	/// \|
2935	/// ...
2936	///
2937	/// the heuristics tend to push the store up, but since the
2938	/// operand of the store has another use (U), this would increase
2939	/// the length of that other use (the U->N edge).
2940	///
2941	/// This function transforms code like the above to route U's
2942	/// dependence through the store when possible, like this:
2943	///
2944	/// N
2945	/// \|\|
2946	/// \|\|
2947	/// store
2948	/// \|
2949	/// U
2950	/// \|
2951	/// ...
2952	///
2953	/// This results in the store being scheduled immediately
2954	/// after N, which shortens the U->N live range, reducing
2955	/// register pressure.
2956	void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2957	// Visit all the nodes in topological order, working top-down.
2958	for (SUnit &SU : *SUnits) {
2959	// For now, only look at nodes with no data successors, such as stores.
2960	// These are especially important, due to the heuristics in
2961	// getNodePriority for nodes with no data successors.
2962	if (SU.NumSuccs != `0`)
2963	continue;
2964	// For now, only look at nodes with exactly one data predecessor.
2965	if (SU.NumPreds != `1`)
2966	continue;
2967	// Avoid prescheduling copies to virtual registers, which don't behave
2968	// like other nodes from the perspective of scheduling heuristics.
2969	if (SDNode *N = SU.getNode())
2970	if (N->getOpcode() == ISD::CopyToReg &&
2971	cast<RegisterSDNode>(Val: N->getOperand(Num: `1`))->getReg().isVirtual())
2972	continue;
2973
2974	SDNode PredFrameSetup = nullptr*;
2975	for (const SDep &Pred : SU.Preds)
2976	if (Pred.isCtrl() && Pred.getSUnit()) {
2977	// Find the predecessor which is not data dependence.
2978	SDNode *PredND = Pred.getSUnit()->getNode();
2979
2980	// If PredND is FrameSetup, we should not pre-scheduled the node,
2981	// or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2982	// ADJCALLSTACKUP may hold CallResource too long and make other
2983	// calls can't be scheduled. If there's no other available node
2984	// to schedule, the schedular will try to rename the register by
2985	// creating copy to avoid the conflict which will fail because
2986	// CallResource is not a real physical register.
2987	if (PredND && PredND->isMachineOpcode() &&
2988	(PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2989	PredFrameSetup = PredND;
2990	break;
2991	}
2992	}
2993	// Skip the node has FrameSetup parent.
2994	if (PredFrameSetup != nullptr)
2995	continue;
2996
2997	// Locate the single data predecessor.
2998	SUnit PredSU = nullptr*;
2999	for (const SDep &Pred : SU.Preds)
3000	if (!Pred.isCtrl()) {
3001	PredSU = Pred.getSUnit();
3002	break;
3003	}
3004	assert(PredSU);
3005
3006	// Don't rewrite edges that carry physregs, because that requires additional
3007	// support infrastructure.
3008	if (PredSU->hasPhysRegDefs)
3009	continue;
3010	// Short-circuit the case where SU is PredSU's only data successor.
3011	if (PredSU->NumSuccs == `1`)
3012	continue;
3013	// Avoid prescheduling to copies from virtual registers, which don't behave
3014	// like other nodes from the perspective of scheduling heuristics.
3015	if (SDNode *N = SU.getNode())
3016	if (N->getOpcode() == ISD::CopyFromReg &&
3017	cast<RegisterSDNode>(Val: N->getOperand(Num: `1`))->getReg().isVirtual())
3018	continue;
3019
3020	// Perform checks on the successors of PredSU.
3021	for (const SDep &PredSucc : PredSU->Succs) {
3022	SUnit *PredSuccSU = PredSucc.getSUnit();
3023	if (PredSuccSU == &SU) continue;
3024	// If PredSU has another successor with no data successors, for
3025	// now don't attempt to choose either over the other.
3026	if (PredSuccSU->NumSuccs == `0`)
3027	goto outer_loop_continue;
3028	// Don't break physical register dependencies.
3029	if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3030	if (canClobberPhysRegDefs(SuccSU: PredSuccSU, SU: &SU, TII, TRI))
3031	goto outer_loop_continue;
3032	// Don't introduce graph cycles.
3033	if (scheduleDAG->IsReachable(SU: &SU, TargetSU: PredSuccSU))
3034	goto outer_loop_continue;
3035	}
3036
3037	// Ok, the transformation is safe and the heuristics suggest it is
3038	// profitable. Update the graph.
3039	LLVM_DEBUG(
3040	dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3041	<< PredSU->NodeNum
3042	<< " to guide scheduling in the presence of multiple uses\n");
3043	for (unsigned i = `0`; i != PredSU->Succs.size(); ++i) {
3044	SDep Edge = PredSU->Succs [i];
3045	assert(!Edge.isAssignedRegDep());
3046	SUnit *SuccSU = Edge.getSUnit();
3047	if (SuccSU != &SU) {
3048	Edge.setSUnit(PredSU);
3049	scheduleDAG->RemovePred(SU: SuccSU, D: Edge);
3050	scheduleDAG->AddPredQueued(SU: &SU, D: Edge);
3051	Edge.setSUnit(&SU);
3052	scheduleDAG->AddPredQueued(SU: SuccSU, D: Edge);
3053	--i;
3054	}
3055	}
3056	outer_loop_continue:;
3057	}
3058	}
3059
3060	/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3061	/// it as a def&use operand. Add a pseudo control edge from it to the other
3062	/// node (if it won't create a cycle) so the two-address one will be scheduled
3063	/// first (lower in the schedule). If both nodes are two-address, favor the
3064	/// one that has a CopyToReg use (more likely to be a loop induction update).
3065	/// If both are two-address, but one is commutable while the other is not
3066	/// commutable, favor the one that's not commutable.
3067	void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3068	for (SUnit &SU : *SUnits) {
3069	if (!SU.isTwoAddress)
3070	continue;
3071
3072	SDNode *Node = SU.getNode();
3073	if (!Node \|\| !Node->isMachineOpcode() \|\| SU.getNode()->getGluedNode())
3074	continue;
3075
3076	bool isLiveOut = hasOnlyLiveOutUses(SU: &SU);
3077	unsigned Opc = Node->getMachineOpcode();
3078	const MCInstrDesc &MCID = TII->get(Opcode: Opc);
3079	unsigned NumRes = MCID.getNumDefs();
3080	unsigned NumOps = MCID.getNumOperands() - NumRes;
3081	for (unsigned j = `0`; j != NumOps; ++j) {
3082	if (MCID.getOperandConstraint(OpNum: j+NumRes, Constraint: MCOI::TIED_TO) == -`1`)
3083	continue;
3084	SDNode *DU = SU.getNode()->getOperand(Num: j).getNode();
3085	if (DU->getNodeId() == -`1`)
3086	continue;
3087	const SUnit DUSU = &(SUnits)[DU->getNodeId()];
3088	if (!DUSU)
3089	continue;
3090	for (const SDep &Succ : DUSU->Succs) {
3091	if (Succ.isCtrl())
3092	continue;
3093	SUnit *SuccSU = Succ.getSUnit();
3094	if (SuccSU == &SU)
3095	continue;
3096	// Be conservative. Ignore if nodes aren't at roughly the same
3097	// depth and height.
3098	if (SuccSU->getHeight() < SU.getHeight() &&
3099	(SU.getHeight() - SuccSU->getHeight()) > `1`)
3100	continue;
3101	// Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3102	// constrains whatever is using the copy, instead of the copy
3103	// itself. In the case that the copy is coalesced, this
3104	// preserves the intent of the pseudo two-address heurietics.
3105	while (SuccSU->Succs.size() == `1` &&
3106	SuccSU->getNode()->isMachineOpcode() &&
3107	SuccSU->getNode()->getMachineOpcode() ==
3108	TargetOpcode::COPY_TO_REGCLASS)
3109	SuccSU = SuccSU->Succs.front().getSUnit();
3110	// Don't constrain non-instruction nodes.
3111	if (!SuccSU->getNode() \|\| !SuccSU->getNode()->isMachineOpcode())
3112	continue;
3113	// Don't constrain nodes with physical register defs if the
3114	// predecessor can clobber them.
3115	if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3116	if (canClobberPhysRegDefs(SuccSU, SU: &SU, TII, TRI))
3117	continue;
3118	}
3119	// Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3120	// these may be coalesced away. We want them close to their uses.
3121	unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3122	if (SuccOpc == TargetOpcode::EXTRACT_SUBREG \|\|
3123	SuccOpc == TargetOpcode::INSERT_SUBREG \|\|
3124	SuccOpc == TargetOpcode::SUBREG_TO_REG)
3125	continue;
3126	if (!canClobberReachingPhysRegUse(DepSU: SuccSU, SU: &SU, scheduleDAG, TII, TRI) &&
3127	(!canClobber(SU: SuccSU, Op: DUSU) \|\|
3128	(isLiveOut && !hasOnlyLiveOutUses(SU: SuccSU)) \|\|
3129	(!SU.isCommutable && SuccSU->isCommutable)) &&
3130	!scheduleDAG->IsReachable(SU: SuccSU, TargetSU: &SU)) {
3131	LLVM_DEBUG(dbgs()
3132	<< " Adding a pseudo-two-addr edge from SU #"
3133	<< SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3134	scheduleDAG->AddPredQueued(SU: &SU, D: SDep (SuccSU, SDep::Artificial));
3135	}
3136	}
3137	}
3138	}
3139	}
3140
3141	//===----------------------------------------------------------------------===//
3142	// Public Constructor Functions
3143	//===----------------------------------------------------------------------===//
3144
3145	ScheduleDAGSDNodes llvm::createBURRListDAGScheduler(SelectionDAGISel IS,
3146	CodeGenOptLevel OptLevel) {
3147	const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3148	const TargetInstrInfo *TII = STI.getInstrInfo();
3149	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3150
3151	BURegReductionPriorityQueue *PQ =
3152	new BURegReductionPriorityQueue (IS->MF, false, false, TII, TRI, nullptr*);
3153	ScheduleDAGRRList SD = new* ScheduleDAGRRList (IS->MF, false*, PQ, OptLevel);
3154	PQ->setScheduleDAG(SD);
3155	return SD;
3156	}
3157
3158	ScheduleDAGSDNodes *
3159	llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
3160	CodeGenOptLevel OptLevel) {
3161	const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3162	const TargetInstrInfo *TII = STI.getInstrInfo();
3163	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3164
3165	SrcRegReductionPriorityQueue *PQ =
3166	new SrcRegReductionPriorityQueue (IS->MF, false, true, TII, TRI, nullptr*);
3167	ScheduleDAGRRList SD = new* ScheduleDAGRRList (IS->MF, false*, PQ, OptLevel);
3168	PQ->setScheduleDAG(SD);
3169	return SD;
3170	}
3171
3172	ScheduleDAGSDNodes *
3173	llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
3174	CodeGenOptLevel OptLevel) {
3175	const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3176	const TargetInstrInfo *TII = STI.getInstrInfo();
3177	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3178	const TargetLowering *TLI = IS->TLI;
3179
3180	HybridBURRPriorityQueue *PQ =
3181	new HybridBURRPriorityQueue (IS->MF, true, false*, TII, TRI, TLI);
3182
3183	ScheduleDAGRRList SD = new* ScheduleDAGRRList (IS->MF, true*, PQ, OptLevel);
3184	PQ->setScheduleDAG(SD);
3185	return SD;
3186	}
3187
3188	ScheduleDAGSDNodes llvm::createILPListDAGScheduler(SelectionDAGISel IS,
3189	CodeGenOptLevel OptLevel) {
3190	const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3191	const TargetInstrInfo *TII = STI.getInstrInfo();
3192	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3193	const TargetLowering *TLI = IS->TLI;
3194
3195	ILPBURRPriorityQueue *PQ =
3196	new ILPBURRPriorityQueue (IS->MF, true, false*, TII, TRI, TLI);
3197	ScheduleDAGRRList SD = new* ScheduleDAGRRList (IS->MF, true*, PQ, OptLevel);
3198	PQ->setScheduleDAG(SD);
3199	return SD;
3200	}
3201

Browse the source code of llvm_projects/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp