MachineScheduler.cpp source code [llvm_projects/llvm/lib/CodeGen/MachineScheduler.cpp]

1	//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/MachineScheduler.h"
15	#include "llvm/ADT/ArrayRef.h"
16	#include "llvm/ADT/BitVector.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/PriorityQueue.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/ADT/iterator_range.h"
23	#include "llvm/Analysis/AliasAnalysis.h"
24	#include "llvm/CodeGen/LiveInterval.h"
25	#include "llvm/CodeGen/LiveIntervals.h"
26	#include "llvm/CodeGen/MachineBasicBlock.h"
27	#include "llvm/CodeGen/MachineDominators.h"
28	#include "llvm/CodeGen/MachineFunction.h"
29	#include "llvm/CodeGen/MachineFunctionPass.h"
30	#include "llvm/CodeGen/MachineInstr.h"
31	#include "llvm/CodeGen/MachineLoopInfo.h"
32	#include "llvm/CodeGen/MachineOperand.h"
33	#include "llvm/CodeGen/MachinePassRegistry.h"
34	#include "llvm/CodeGen/MachineRegisterInfo.h"
35	#include "llvm/CodeGen/RegisterClassInfo.h"
36	#include "llvm/CodeGen/RegisterPressure.h"
37	#include "llvm/CodeGen/ScheduleDAG.h"
38	#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39	#include "llvm/CodeGen/ScheduleDAGMutation.h"
40	#include "llvm/CodeGen/ScheduleDFS.h"
41	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42	#include "llvm/CodeGen/SlotIndexes.h"
43	#include "llvm/CodeGen/TargetFrameLowering.h"
44	#include "llvm/CodeGen/TargetInstrInfo.h"
45	#include "llvm/CodeGen/TargetLowering.h"
46	#include "llvm/CodeGen/TargetPassConfig.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/TargetSchedule.h"
49	#include "llvm/CodeGen/TargetSubtargetInfo.h"
50	#include "llvm/CodeGenTypes/MachineValueType.h"
51	#include "llvm/Config/llvm-config.h"
52	#include "llvm/InitializePasses.h"
53	#include "llvm/MC/LaneBitmask.h"
54	#include "llvm/Pass.h"
55	#include "llvm/Support/CommandLine.h"
56	#include "llvm/Support/Compiler.h"
57	#include "llvm/Support/Debug.h"
58	#include "llvm/Support/ErrorHandling.h"
59	#include "llvm/Support/GraphWriter.h"
60	#include "llvm/Support/raw_ostream.h"
61	#include <algorithm>
62	#include <cassert>
63	#include <cstdint>
64	#include <iterator>
65	#include <limits>
66	#include <memory>
67	#include <string>
68	#include <tuple>
69	#include <utility>
70	#include <vector>
71
72	using namespace llvm;
73
74	#define DEBUG_TYPE "machine-scheduler"
75
76	STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78	namespace llvm {
79
80	cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81	cl::desc ("Force top-down list scheduling"));
82	cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83	cl::desc ("Force bottom-up list scheduling"));
84	namespace MISchedPostRASched {
85	enum Direction {
86	TopDown,
87	BottomUp,
88	Bidirectional,
89	};
90	} // end namespace MISchedPostRASched
91	cl::opt<MISchedPostRASched::Direction> PostRADirection(
92	"misched-postra-direction", cl::Hidden,
93	cl::desc ("Post reg-alloc list scheduling direction"),
94	// Default to top-down because it was implemented first and existing targets
95	// expect that behavior by default.
96	cl::init(Val: MISchedPostRASched::TopDown),
97	cl::values(
98	clEnumValN(MISchedPostRASched::TopDown, "topdown",
99	"Force top-down post reg-alloc list scheduling"),
100	clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
101	"Force bottom-up post reg-alloc list scheduling"),
102	clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional",
103	"Force bidirectional post reg-alloc list scheduling")));
104	cl::opt<bool>
105	DumpCriticalPathLength("misched-dcpl", cl::Hidden,
106	cl::desc ("Print critical path length to stdout"));
107
108	cl::opt<bool> VerifyScheduling(
109	"verify-misched", cl::Hidden,
110	cl::desc ("Verify machine instrs before and after machine scheduling"));
111
112	#ifndef NDEBUG
113	cl::opt<bool> ViewMISchedDAGs(
114	"view-misched-dags", cl::Hidden,
115	cl::desc("Pop up a window to show MISched dags after they are processed"));
116	cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117	cl::desc("Print schedule DAGs"));
118	cl::opt<bool> MISchedDumpReservedCycles(
119	"misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
120	cl::desc("Dump resource usage at schedule boundary."));
121	cl::opt<bool> MischedDetailResourceBooking(
122	"misched-detail-resource-booking", cl::Hidden, cl::init(false),
123	cl::desc("Show details of invoking getNextResoufceCycle."));
124	#else
125	const bool ViewMISchedDAGs = false;
126	const bool PrintDAGs = false;
127	const bool MischedDetailResourceBooking = false;
128	#ifdef LLVM_ENABLE_DUMP
129	const bool MISchedDumpReservedCycles = false;
130	#endif // LLVM_ENABLE_DUMP
131	#endif // NDEBUG
132
133	} // end namespace llvm
134
135	#ifndef NDEBUG
136	/// In some situations a few uninteresting nodes depend on nearly all other
137	/// nodes in the graph, provide a cutoff to hide them.
138	static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139	cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
141	static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
142	cl::desc("Stop scheduling after N instructions"), cl::init(~`0U`));
143
144	static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
145	cl::desc("Only schedule this function"));
146	static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147	cl::desc("Only schedule this MBB#"));
148	#endif // NDEBUG
149
150	/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151	/// size of the ready lists.
152	static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
153	cl::desc ("Limit ready list to N instructions"), cl::init(Val: `256`));
154
155	static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156	cl::desc ("Enable register pressure scheduling."), cl::init(Val: true));
157
158	static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159	cl::desc ("Enable cyclic critical path analysis."), cl::init(Val: true));
160
161	static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
162	cl::desc ("Enable memop clustering."),
163	cl::init(Val: true));
164	static cl::opt<bool>
165	ForceFastCluster("force-fast-cluster", cl::Hidden,
166	cl::desc ("Switch to fast cluster algorithm with the lost "
167	"of some fusion opportunities"),
168	cl::init(Val: false));
169	static cl::opt<unsigned>
170	FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171	cl::desc ("The threshold for fast cluster"),
172	cl::init(Val: `1000`));
173
174	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
175	static cl::opt<bool> MISchedDumpScheduleTrace(
176	"misched-dump-schedule-trace", cl::Hidden, cl::init(false),
177	cl::desc("Dump resource usage at schedule boundary."));
178	static cl::opt<unsigned>
179	HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180	cl::desc("Set width of the columns with "
181	"the resources and schedule units"),
182	cl::init(`19`));
183	static cl::opt<unsigned>
184	ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185	cl::desc("Set width of the columns showing resource booking."),
186	cl::init(`5`));
187	static cl::opt<bool> MISchedSortResourcesInTrace(
188	"misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
189	cl::desc("Sort the resources printed in the dump trace"));
190	#endif
191
192	static cl::opt<unsigned>
193	MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194	cl::desc ("Number of intervals to track"), cl::init(Val: `10`));
195
196	// DAG subtrees must have at least this many nodes.
197	static const unsigned MinSubtreeSize = `8`;
198
199	// Pin the vtables to this file.
200	void MachineSchedStrategy::anchor() {}
201
202	void ScheduleDAGMutation::anchor() {}
203
204	//===----------------------------------------------------------------------===//
205	// Machine Instruction Scheduling Pass and Registry
206	//===----------------------------------------------------------------------===//
207
208	MachineSchedContext::MachineSchedContext() {
209	RegClassInfo = new RegisterClassInfo ();
210	}
211
212	MachineSchedContext::~MachineSchedContext() {
213	delete RegClassInfo;
214	}
215
216	namespace {
217
218	/// Base class for a machine scheduler class that can run at any point.
219	class MachineSchedulerBase : public MachineSchedContext,
220	public MachineFunctionPass {
221	public:
222	MachineSchedulerBase(char &ID): MachineFunctionPass (ID) {}
223
224	void print(raw_ostream &O, const Module* = nullptr) const override;
225
226	protected:
227	void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228	};
229
230	/// MachineScheduler runs after coalescing and before register allocation.
231	class MachineScheduler : public MachineSchedulerBase {
232	public:
233	MachineScheduler();
234
235	void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237	bool runOnMachineFunction(MachineFunction&) override;
238
239	static char ID; // Class identification, replacement for typeinfo
240
241	protected:
242	ScheduleDAGInstrs *createMachineScheduler();
243	};
244
245	/// PostMachineScheduler runs after shortly before code emission.
246	class PostMachineScheduler : public MachineSchedulerBase {
247	public:
248	PostMachineScheduler();
249
250	void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252	bool runOnMachineFunction(MachineFunction&) override;
253
254	static char ID; // Class identification, replacement for typeinfo
255
256	protected:
257	ScheduleDAGInstrs *createPostMachineScheduler();
258	};
259
260	} // end anonymous namespace
261
262	char MachineScheduler::ID = `0`;
263
264	char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
266	INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
267	"Machine Instruction Scheduler", false, false)
268	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
269	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
270	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
271	INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
272	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
273	INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
274	"Machine Instruction Scheduler", false, false)
275
276	MachineScheduler::MachineScheduler() : MachineSchedulerBase (ID) {
277	initializeMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
278	}
279
280	void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281	AU.setPreservesCFG();
282	AU.addRequired<MachineDominatorTreeWrapperPass>();
283	AU.addRequired<MachineLoopInfoWrapperPass>();
284	AU.addRequired<AAResultsWrapperPass>();
285	AU.addRequired<TargetPassConfig>();
286	AU.addRequired<SlotIndexesWrapperPass>();
287	AU.addPreserved<SlotIndexesWrapperPass>();
288	AU.addRequired<LiveIntervalsWrapperPass>();
289	AU.addPreserved<LiveIntervalsWrapperPass>();
290	MachineFunctionPass::getAnalysisUsage(AU);
291	}
292
293	char PostMachineScheduler::ID = `0`;
294
295	char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297	INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298	"PostRA Machine Instruction Scheduler", false, false)
299	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
300	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
301	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
302	INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
303	"PostRA Machine Instruction Scheduler", false, false)
304
305	PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase (ID) {
306	initializePostMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
307	}
308
309	void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310	AU.setPreservesCFG();
311	AU.addRequired<MachineDominatorTreeWrapperPass>();
312	AU.addRequired<MachineLoopInfoWrapperPass>();
313	AU.addRequired<AAResultsWrapperPass>();
314	AU.addRequired<TargetPassConfig>();
315	MachineFunctionPass::getAnalysisUsage(AU);
316	}
317
318	MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
319	MachineSchedRegistry::Registry;
320
321	/// A dummy default scheduler factory indicates whether the scheduler
322	/// is overridden on the command line.
323	static ScheduleDAGInstrs useDefaultMachineSched(MachineSchedContext C) {
324	return nullptr;
325	}
326
327	/// MachineSchedOpt allows command line selection of the scheduler.
328	static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
329	RegisterPassParser<MachineSchedRegistry>>
330	MachineSchedOpt("misched",
331	cl::init(Val: &useDefaultMachineSched), cl::Hidden,
332	cl::desc ("Machine instruction scheduler to use"));
333
334	static MachineSchedRegistry
335	DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
336	useDefaultMachineSched);
337
338	static cl::opt<bool> EnableMachineSched(
339	"enable-misched",
340	cl::desc ("Enable the machine instruction scheduling pass."), cl::init(Val: true),
341	cl::Hidden);
342
343	static cl::opt<bool> EnablePostRAMachineSched(
344	"enable-post-misched",
345	cl::desc ("Enable the post-ra machine instruction scheduling pass."),
346	cl::init(Val: true), cl::Hidden);
347
348	/// Decrement this iterator until reaching the top or a non-debug instr.
349	static MachineBasicBlock::const_iterator
350	priorNonDebug(MachineBasicBlock::const_iterator I,
351	MachineBasicBlock::const_iterator Beg) {
352	assert(I != Beg && "reached the top of the region, cannot decrement");
353	while (--I != Beg) {
354	if (!I ->isDebugOrPseudoInstr())
355	break;
356	}
357	return I;
358	}
359
360	/// Non-const version.
361	static MachineBasicBlock::iterator
362	priorNonDebug(MachineBasicBlock::iterator I,
363	MachineBasicBlock::const_iterator Beg) {
364	return priorNonDebug(I: MachineBasicBlock::const_iterator (I), Beg)
365	.getNonConstIterator();
366	}
367
368	/// If this iterator is a debug value, increment until reaching the End or a
369	/// non-debug instruction.
370	static MachineBasicBlock::const_iterator
371	nextIfDebug(MachineBasicBlock::const_iterator I,
372	MachineBasicBlock::const_iterator End) {
373	for(; I != End; ++I) {
374	if (!I ->isDebugOrPseudoInstr())
375	break;
376	}
377	return I;
378	}
379
380	/// Non-const version.
381	static MachineBasicBlock::iterator
382	nextIfDebug(MachineBasicBlock::iterator I,
383	MachineBasicBlock::const_iterator End) {
384	return nextIfDebug(I: MachineBasicBlock::const_iterator (I), End)
385	.getNonConstIterator();
386	}
387
388	/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389	ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390	// Select the scheduler, or set the default.
391	MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
392	if (Ctor != useDefaultMachineSched)
393	return Ctor(this);
394
395	// Get the default scheduler set by the target for this function.
396	ScheduleDAGInstrs Scheduler = PassConfig->createMachineScheduler(C: this*);
397	if (Scheduler)
398	return Scheduler;
399
400	// Default to GenericScheduler.
401	return createGenericSchedLive(C: this);
402	}
403
404	/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405	/// the caller. We don't have a command line option to override the postRA
406	/// scheduler. The Target must configure it.
407	ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408	// Get the postRA scheduler set by the target for this function.
409	ScheduleDAGInstrs Scheduler = PassConfig->createPostMachineScheduler(C: this*);
410	if (Scheduler)
411	return Scheduler;
412
413	// Default to GenericScheduler.
414	return createGenericSchedPostRA(C: this);
415	}
416
417	/// Top-level MachineScheduler pass driver.
418	///
419	/// Visit blocks in function order. Divide each block into scheduling regions
420	/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421	/// consistent with the DAG builder, which traverses the interior of the
422	/// scheduling regions bottom-up.
423	///
424	/// This design avoids exposing scheduling boundaries to the DAG builder,
425	/// simplifying the DAG builder's support for "special" target instructions.
426	/// At the same time the design allows target schedulers to operate across
427	/// scheduling boundaries, for example to bundle the boundary instructions
428	/// without reordering them. This creates complexity, because the target
429	/// scheduler must update the RegionBegin and RegionEnd positions cached by
430	/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431	/// design would be to split blocks at scheduling boundaries, but LLVM has a
432	/// general bias against block splitting purely for implementation simplicity.
433	bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434	if (skipFunction(F: mf.getFunction()))
435	return false;
436
437	if (EnableMachineSched.getNumOccurrences()) {
438	if (!EnableMachineSched)
439	return false;
440	} else if (!mf.getSubtarget().enableMachineScheduler())
441	return false;
442
443	LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445	// Initialize the context of the pass.
446	MF = &mf;
447	MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
448	MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
449	PassConfig = &getAnalysis<TargetPassConfig>();
450	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452	LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
453
454	if (VerifyScheduling) {
455	LLVM_DEBUG(LIS->dump());
456	MF->verify(p: this, Banner: "Before machine scheduling.");
457	}
458	RegClassInfo->runOnMachineFunction(MF: *MF);
459
460	// Instantiate the selected scheduler for this target, function, and
461	// optimization level.
462	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
463	ScheduleDAGMI::DumpDirection D;
464	if (ForceTopDown)
465	D = ScheduleDAGMI::DumpDirection::TopDown;
466	else if (ForceBottomUp)
467	D = ScheduleDAGMI::DumpDirection::BottomUp;
468	else
469	D = ScheduleDAGMI::DumpDirection::Bidirectional;
470	Scheduler ->setDumpDirection(D);
471	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: false*);
472
473	LLVM_DEBUG(LIS->dump());
474	if (VerifyScheduling)
475	MF->verify(p: this, Banner: "After machine scheduling.");
476	return true;
477	}
478
479	bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
480	if (skipFunction(F: mf.getFunction()))
481	return false;
482
483	if (EnablePostRAMachineSched.getNumOccurrences()) {
484	if (!EnablePostRAMachineSched)
485	return false;
486	} else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
487	LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
488	return false;
489	}
490	LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
491
492	// Initialize the context of the pass.
493	MF = &mf;
494	MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
495	PassConfig = &getAnalysis<TargetPassConfig>();
496	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
497
498	if (VerifyScheduling)
499	MF->verify(p: this, Banner: "Before post machine scheduling.");
500
501	// Instantiate the selected scheduler for this target, function, and
502	// optimization level.
503	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
504	ScheduleDAGMI::DumpDirection D;
505	if (PostRADirection == MISchedPostRASched::TopDown)
506	D = ScheduleDAGMI::DumpDirection::TopDown;
507	else if (PostRADirection == MISchedPostRASched::BottomUp)
508	D = ScheduleDAGMI::DumpDirection::BottomUp;
509	else
510	D = ScheduleDAGMI::DumpDirection::Bidirectional;
511	Scheduler ->setDumpDirection(D);
512	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: true*);
513
514	if (VerifyScheduling)
515	MF->verify(p: this, Banner: "After post machine scheduling.");
516	return true;
517	}
518
519	/// Return true of the given instruction should not be included in a scheduling
520	/// region.
521	///
522	/// MachineScheduler does not currently support scheduling across calls. To
523	/// handle calls, the DAG builder needs to be modified to create register
524	/// anti/output dependencies on the registers clobbered by the call's regmask
525	/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
526	/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
527	/// the boundary, but there would be no benefit to postRA scheduling across
528	/// calls this late anyway.
529	static bool isSchedBoundary(MachineBasicBlock::iterator MI,
530	MachineBasicBlock *MBB,
531	MachineFunction *MF,
532	const TargetInstrInfo *TII) {
533	return MI ->isCall() \|\| TII->isSchedulingBoundary(MI: MI, MBB, MF: MF);
534	}
535
536	/// A region of an MBB for scheduling.
537	namespace {
538	struct SchedRegion {
539	/// RegionBegin is the first instruction in the scheduling region, and
540	/// RegionEnd is either MBB->end() or the scheduling boundary after the
541	/// last instruction in the scheduling region. These iterators cannot refer
542	/// to instructions outside of the identified scheduling region because
543	/// those may be reordered before scheduling this region.
544	MachineBasicBlock::iterator RegionBegin;
545	MachineBasicBlock::iterator RegionEnd;
546	unsigned NumRegionInstrs;
547
548	SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
549	unsigned N) :
550	RegionBegin (B), RegionEnd (E), NumRegionInstrs(N) {}
551	};
552	} // end anonymous namespace
553
554	using MBBRegionsVector = SmallVector<SchedRegion, `16`>;
555
556	static void
557	getSchedRegions(MachineBasicBlock *MBB,
558	MBBRegionsVector &Regions,
559	bool RegionsTopDown) {
560	MachineFunction *MF = MBB->getParent();
561	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
562
563	MachineBasicBlock::iterator I = nullptr;
564	for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565	RegionEnd != MBB->begin(); RegionEnd = I) {
566
567	// Avoid decrementing RegionEnd for blocks with no terminator.
568	if (RegionEnd != MBB->end() \|\|
569	isSchedBoundary(MI: &std::prev(x: RegionEnd), MBB: &MBB, MF, TII)) {
570	--RegionEnd;
571	}
572
573	// The next region starts above the previous region. Look backward in the
574	// instruction stream until we find the nearest boundary.
575	unsigned NumRegionInstrs = `0`;
576	I = RegionEnd;
577	for (;I != MBB->begin(); --I) {
578	MachineInstr &MI = *std::prev(x: I);
579	if (isSchedBoundary(MI: &MI, MBB: &*MBB, MF, TII))
580	break;
581	if (!MI.isDebugOrPseudoInstr()) {
582	// MBB::size() uses instr_iterator to count. Here we need a bundle to
583	// count as a single instruction.
584	++NumRegionInstrs;
585	}
586	}
587
588	// It's possible we found a scheduling region that only has debug
589	// instructions. Don't bother scheduling these.
590	if (NumRegionInstrs != `0`)
591	Regions.push_back(Elt: SchedRegion (I, RegionEnd, NumRegionInstrs));
592	}
593
594	if (RegionsTopDown)
595	std::reverse(first: Regions.begin(), last: Regions.end());
596	}
597
598	/// Main driver for both MachineScheduler and PostMachineScheduler.
599	void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
600	bool FixKillFlags) {
601	// Visit all machine basic blocks.
602	//
603	// TODO: Visit blocks in global postorder or postorder within the bottom-up
604	// loop tree. Then we can optionally compute global RegPressure.
605	for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
606	MBB != MBBEnd; ++MBB) {
607
608	Scheduler.startBlock(BB: &*MBB);
609
610	#ifndef NDEBUG
611	if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
612	continue;
613	if (SchedOnlyBlock.getNumOccurrences()
614	&& (int)SchedOnlyBlock != MBB->getNumber())
615	continue;
616	#endif
617
618	// Break the block into scheduling regions [I, RegionEnd). RegionEnd
619	// points to the scheduling boundary at the bottom of the region. The DAG
620	// does not include RegionEnd, but the region does (i.e. the next
621	// RegionEnd is above the previous RegionBegin). If the current block has
622	// no terminator then RegionEnd == MBB->end() for the bottom region.
623	//
624	// All the regions of MBB are first found and stored in MBBRegions, which
625	// will be processed (MBB) top-down if initialized with true.
626	//
627	// The Scheduler may insert instructions during either schedule() or
628	// exitRegion(), even for empty regions. So the local iterators 'I' and
629	// 'RegionEnd' are invalid across these calls. Instructions must not be
630	// added to other regions than the current one without updating MBBRegions.
631
632	MBBRegionsVector MBBRegions;
633	getSchedRegions(MBB: &*MBB, Regions&: MBBRegions, RegionsTopDown: Scheduler.doMBBSchedRegionsTopDown());
634	for (const SchedRegion &R : MBBRegions) {
635	MachineBasicBlock::iterator I = R.RegionBegin;
636	MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
637	unsigned NumRegionInstrs = R.NumRegionInstrs;
638
639	// Notify the scheduler of the region, even if we may skip scheduling
640	// it. Perhaps it still needs to be bundled.
641	Scheduler.enterRegion(bb: &*MBB, begin: I, end: RegionEnd, regioninstrs: NumRegionInstrs);
642
643	// Skip empty scheduling regions (0 or 1 schedulable instructions).
644	if (I == RegionEnd \|\| I == std::prev(x: RegionEnd)) {
645	// Close the current region. Bundle the terminator if needed.
646	// This invalidates 'RegionEnd' and 'I'.
647	Scheduler.exitRegion();
648	continue;
649	}
650	LLVM_DEBUG(dbgs() << "******** MI Scheduling ********\n");
651	LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652	<< " " << MBB->getName() << "\n From: " << *I
653	<< " To: ";
654	if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
655	else dbgs() << "End\n";
656	dbgs() << " RegionInstrs: " << NumRegionInstrs << `'\n'`);
657	if (DumpCriticalPathLength) {
658	errs() << MF->getName();
659	errs() << ":%bb. " << MBB ->getNumber();
660	errs() << " " << MBB ->getName() << " \n";
661	}
662
663	// Schedule a region: possibly reorder instructions.
664	// This invalidates the original region iterators.
665	Scheduler.schedule();
666
667	// Close the current region.
668	Scheduler.exitRegion();
669	}
670	Scheduler.finishBlock();
671	// FIXME: Ideally, no further passes should rely on kill flags. However,
672	// thumb2 size reduction is currently an exception, so the PostMIScheduler
673	// needs to do this.
674	if (FixKillFlags)
675	Scheduler.fixupKills(MBB&: *MBB);
676	}
677	Scheduler.finalizeSchedule();
678	}
679
680	void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
681	// unimplemented
682	}
683
684	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
685	LLVM_DUMP_METHOD void ReadyQueue::dump() const {
686	dbgs() << "Queue " << Name << ": ";
687	for (const SUnit *SU : Queue)
688	dbgs() << SU->NodeNum << " ";
689	dbgs() << "\n";
690	}
691	#endif
692
693	//===----------------------------------------------------------------------===//
694	// ScheduleDAGMI - Basic machine instruction scheduling. This is
695	// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
696	// virtual registers.
697	// ===----------------------------------------------------------------------===/
698
699	// Provide a vtable anchor.
700	ScheduleDAGMI::~ScheduleDAGMI() = default;
701
702	/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
703	/// NumPredsLeft reaches zero, release the successor node.
704	///
705	/// FIXME: Adjust SuccSU height based on MinLatency.
706	void ScheduleDAGMI::releaseSucc(SUnit SU, SDep SuccEdge) {
707	SUnit *SuccSU = SuccEdge->getSUnit();
708
709	if (SuccEdge->isWeak()) {
710	--SuccSU->WeakPredsLeft;
711	if (SuccEdge->isCluster())
712	NextClusterSucc = SuccSU;
713	return;
714	}
715	#ifndef NDEBUG
716	if (SuccSU->NumPredsLeft == `0`) {
717	dbgs() << "* Scheduling failed! *\n";
718	dumpNode(*SuccSU);
719	dbgs() << " has been released too many times!\n";
720	llvm_unreachable(nullptr);
721	}
722	#endif
723	// SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
724	// CurrCycle may have advanced since then.
725	if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726	SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
727
728	--SuccSU->NumPredsLeft;
729	if (SuccSU->NumPredsLeft == `0` && SuccSU != &ExitSU)
730	SchedImpl ->releaseTopNode(SU: SuccSU);
731	}
732
733	/// releaseSuccessors - Call releaseSucc on each of SU's successors.
734	void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
735	for (SDep &Succ : SU->Succs)
736	releaseSucc(SU, SuccEdge: &Succ);
737	}
738
739	/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
740	/// NumSuccsLeft reaches zero, release the predecessor node.
741	///
742	/// FIXME: Adjust PredSU height based on MinLatency.
743	void ScheduleDAGMI::releasePred(SUnit SU, SDep PredEdge) {
744	SUnit *PredSU = PredEdge->getSUnit();
745
746	if (PredEdge->isWeak()) {
747	--PredSU->WeakSuccsLeft;
748	if (PredEdge->isCluster())
749	NextClusterPred = PredSU;
750	return;
751	}
752	#ifndef NDEBUG
753	if (PredSU->NumSuccsLeft == `0`) {
754	dbgs() << "* Scheduling failed! *\n";
755	dumpNode(*PredSU);
756	dbgs() << " has been released too many times!\n";
757	llvm_unreachable(nullptr);
758	}
759	#endif
760	// SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
761	// CurrCycle may have advanced since then.
762	if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763	PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
764
765	--PredSU->NumSuccsLeft;
766	if (PredSU->NumSuccsLeft == `0` && PredSU != &EntrySU)
767	SchedImpl ->releaseBottomNode(SU: PredSU);
768	}
769
770	/// releasePredecessors - Call releasePred on each of SU's predecessors.
771	void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
772	for (SDep &Pred : SU->Preds)
773	releasePred(SU, PredEdge: &Pred);
774	}
775
776	void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
777	ScheduleDAGInstrs::startBlock(BB: bb);
778	SchedImpl ->enterMBB(MBB: bb);
779	}
780
781	void ScheduleDAGMI::finishBlock() {
782	SchedImpl ->leaveMBB();
783	ScheduleDAGInstrs::finishBlock();
784	}
785
786	/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
787	/// after crossing a scheduling boundary. [begin, end) includes all instructions
788	/// in the region, including the boundary itself and single-instruction regions
789	/// that don't get scheduled.
790	void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
791	MachineBasicBlock::iterator begin,
792	MachineBasicBlock::iterator end,
793	unsigned regioninstrs)
794	{
795	ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
796
797	SchedImpl ->initPolicy(Begin: begin, End: end, NumRegionInstrs: regioninstrs);
798	}
799
800	/// This is normally called from the main scheduler loop but may also be invoked
801	/// by the scheduling strategy to perform additional code motion.
802	void ScheduleDAGMI::moveInstruction(
803	MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
804	// Advance RegionBegin if the first instruction moves down.
805	if (&*RegionBegin == MI)
806	++RegionBegin;
807
808	// Update the instruction stream.
809	BB->splice(Where: InsertPos, Other: BB, From: MI);
810
811	// Update LiveIntervals
812	if (LIS)
813	LIS->handleMove(MI&: MI, /UpdateFlags=/*true);
814
815	// Recede RegionBegin if an instruction moves above the first.
816	if (RegionBegin == InsertPos)
817	RegionBegin = MI;
818	}
819
820	bool ScheduleDAGMI::checkSchedLimit() {
821	#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
822	if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~`0U`) {
823	CurrentTop = CurrentBottom;
824	return false;
825	}
826	++NumInstrsScheduled;
827	#endif
828	return true;
829	}
830
831	/// Per-region scheduling driver, called back from
832	/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
833	/// that does not consider liveness or register pressure. It is useful for
834	/// PostRA scheduling and potentially other custom schedulers.
835	void ScheduleDAGMI::schedule() {
836	LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
837	LLVM_DEBUG(SchedImpl->dumpPolicy());
838
839	// Build the DAG.
840	buildSchedGraph(AA);
841
842	postProcessDAG();
843
844	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
845	findRootsAndBiasEdges(TopRoots, BotRoots);
846
847	LLVM_DEBUG(dump());
848	if (PrintDAGs) dump();
849	if (ViewMISchedDAGs) viewGraph();
850
851	// Initialize the strategy before modifying the DAG.
852	// This may initialize a DFSResult to be used for queue priority.
853	SchedImpl ->initialize(DAG: this);
854
855	// Initialize ready queues now that the DAG and priority data are finalized.
856	initQueues(TopRoots, BotRoots);
857
858	bool IsTopNode = false;
859	while (true) {
860	LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
861	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
862	if (!SU) break;
863
864	assert(!SU->isScheduled && "Node already scheduled");
865	if (!checkSchedLimit())
866	break;
867
868	MachineInstr *MI = SU->getInstr();
869	if (IsTopNode) {
870	assert(SU->isTopReady() && "node still has unscheduled dependencies");
871	if (&*CurrentTop == MI)
872	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
873	else
874	moveInstruction(MI, InsertPos: CurrentTop);
875	} else {
876	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
877	MachineBasicBlock::iterator priorII =
878	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
879	if (&*priorII == MI)
880	CurrentBottom = priorII;
881	else {
882	if (&*CurrentTop == MI)
883	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
884	moveInstruction(MI, InsertPos: CurrentBottom);
885	CurrentBottom = MI;
886	}
887	}
888	// Notify the scheduling strategy before updating the DAG.
889	// This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
890	// runs, it can then use the accurate ReadyCycle time to determine whether
891	// newly released nodes can move to the readyQ.
892	SchedImpl ->schedNode(SU, IsTopNode);
893
894	updateQueues(SU, IsTopNode);
895	}
896	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
897
898	placeDebugValues();
899
900	LLVM_DEBUG({
901	dbgs() << "*** Final schedule for "
902	<< printMBBReference(begin()->getParent()) << " **\n";
903	dumpSchedule();
904	dbgs() << `'\n'`;
905	});
906	}
907
908	/// Apply each ScheduleDAGMutation step in order.
909	void ScheduleDAGMI::postProcessDAG() {
910	for (auto &m : Mutations)
911	m ->apply(DAG: this);
912	}
913
914	void ScheduleDAGMI::
915	findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
916	SmallVectorImpl<SUnit*> &BotRoots) {
917	for (SUnit &SU : SUnits) {
918	assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
919
920	// Order predecessors so DFSResult follows the critical path.
921	SU.biasCriticalPath();
922
923	// A SUnit is ready to top schedule if it has no predecessors.
924	if (!SU.NumPredsLeft)
925	TopRoots.push_back(Elt: &SU);
926	// A SUnit is ready to bottom schedule if it has no successors.
927	if (!SU.NumSuccsLeft)
928	BotRoots.push_back(Elt: &SU);
929	}
930	ExitSU.biasCriticalPath();
931	}
932
933	/// Identify DAG roots and setup scheduler queues.
934	void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
935	ArrayRef<SUnit*> BotRoots) {
936	NextClusterSucc = nullptr;
937	NextClusterPred = nullptr;
938
939	// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
940	//
941	// Nodes with unreleased weak edges can still be roots.
942	// Release top roots in forward order.
943	for (SUnit *SU : TopRoots)
944	SchedImpl ->releaseTopNode(SU);
945
946	// Release bottom roots in reverse order so the higher priority nodes appear
947	// first. This is more natural and slightly more efficient.
948	for (SmallVectorImpl<SUnit*>::const_reverse_iterator
949	I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
950	SchedImpl ->releaseBottomNode(SU: *I);
951	}
952
953	releaseSuccessors(SU: &EntrySU);
954	releasePredecessors(SU: &ExitSU);
955
956	SchedImpl ->registerRoots();
957
958	// Advance past initial DebugValues.
959	CurrentTop = nextIfDebug(I: RegionBegin, End: RegionEnd);
960	CurrentBottom = RegionEnd;
961	}
962
963	/// Update scheduler queues after scheduling an instruction.
964	void ScheduleDAGMI::updateQueues(SUnit SU, bool* IsTopNode) {
965	// Release dependent instructions for scheduling.
966	if (IsTopNode)
967	releaseSuccessors(SU);
968	else
969	releasePredecessors(SU);
970
971	SU->isScheduled = true;
972	}
973
974	/// Reinsert any remaining debug_values, just like the PostRA scheduler.
975	void ScheduleDAGMI::placeDebugValues() {
976	// If first instruction was a DBG_VALUE then put it back.
977	if (FirstDbgValue) {
978	BB->splice(Where: RegionBegin, Other: BB, From: FirstDbgValue);
979	RegionBegin = FirstDbgValue;
980	}
981
982	for (std::vector<std::pair<MachineInstr , MachineInstr >>::iterator
983	DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
984	std::pair<MachineInstr , MachineInstr > P = *std::prev(x: DI);
985	MachineInstr *DbgValue = P.first;
986	MachineBasicBlock::iterator OrigPrevMI = P.second;
987	if (&*RegionBegin == DbgValue)
988	++RegionBegin;
989	BB->splice(Where: std::next(x: OrigPrevMI), Other: BB, From: DbgValue);
990	if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
991	RegionEnd = DbgValue;
992	}
993	}
994
995	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
996	static const char *scheduleTableLegend = " i: issue\n x: resource booked";
997
998	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
999	// Bail off when there is no schedule model to query.
1000	if (!SchedModel.hasInstrSchedModel())
1001	return;
1002
1003	// Nothing to show if there is no or just one instruction.
1004	if (BB->size() < `2`)
1005	return;
1006
1007	dbgs() << " * Schedule table (TopDown):\n";
1008	dbgs() << scheduleTableLegend << "\n";
1009	const unsigned FirstCycle = getSUnit(&(std::begin(this)))->TopReadyCycle;
1010	unsigned LastCycle = getSUnit(&(std::prev(std::end(this))))->TopReadyCycle;
1011	for (MachineInstr &MI : *this) {
1012	SUnit *SU = getSUnit(&MI);
1013	if (!SU)
1014	continue;
1015	const MCSchedClassDesc *SC = getSchedClass(SU);
1016	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1017	PE = SchedModel.getWriteProcResEnd(SC);
1018	PI != PE; ++PI) {
1019	if (SU->TopReadyCycle + PI->ReleaseAtCycle - `1` > LastCycle)
1020	LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - `1`;
1021	}
1022	}
1023	// Print the header with the cycles
1024	dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1025	for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1026	dbgs() << llvm::left_justify("\| " + std::to_string(C), ColWidth);
1027	dbgs() << "\|\n";
1028
1029	for (MachineInstr &MI : *this) {
1030	SUnit *SU = getSUnit(&MI);
1031	if (!SU) {
1032	dbgs() << "Missing SUnit\n";
1033	continue;
1034	}
1035	std::string NodeName("SU(");
1036	NodeName += std::to_string(SU->NodeNum) + ")";
1037	dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1038	unsigned C = FirstCycle;
1039	for (; C <= LastCycle; ++C) {
1040	if (C == SU->TopReadyCycle)
1041	dbgs() << llvm::left_justify("\| i", ColWidth);
1042	else
1043	dbgs() << llvm::left_justify("\|", ColWidth);
1044	}
1045	dbgs() << "\|\n";
1046	const MCSchedClassDesc *SC = getSchedClass(SU);
1047
1048	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1049	make_range(SchedModel.getWriteProcResBegin(SC),
1050	SchedModel.getWriteProcResEnd(SC)));
1051
1052	if (MISchedSortResourcesInTrace)
1053	llvm::stable_sort(ResourcesIt,
1054	[](const MCWriteProcResEntry &LHS,
1055	const MCWriteProcResEntry &RHS) -> bool {
1056	return LHS.AcquireAtCycle < RHS.AcquireAtCycle \|\|
1057	(LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1058	LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1059	});
1060	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1061	C = FirstCycle;
1062	const std::string ResName =
1063	SchedModel.getResourceName(PI.ProcResourceIdx);
1064	dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1065	for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1066	dbgs() << llvm::left_justify("\|", ColWidth);
1067	}
1068	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1069	++I, ++C)
1070	dbgs() << llvm::left_justify("\| x", ColWidth);
1071	while (C++ <= LastCycle)
1072	dbgs() << llvm::left_justify("\|", ColWidth);
1073	// Place end char
1074	dbgs() << "\| \n";
1075	}
1076	}
1077	}
1078
1079	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1080	// Bail off when there is no schedule model to query.
1081	if (!SchedModel.hasInstrSchedModel())
1082	return;
1083
1084	// Nothing to show if there is no or just one instruction.
1085	if (BB->size() < `2`)
1086	return;
1087
1088	dbgs() << " * Schedule table (BottomUp):\n";
1089	dbgs() << scheduleTableLegend << "\n";
1090
1091	const int FirstCycle = getSUnit(&(std::begin(this)))->BotReadyCycle;
1092	int LastCycle = getSUnit(&(std::prev(std::end(this))))->BotReadyCycle;
1093	for (MachineInstr &MI : *this) {
1094	SUnit *SU = getSUnit(&MI);
1095	if (!SU)
1096	continue;
1097	const MCSchedClassDesc *SC = getSchedClass(SU);
1098	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1099	PE = SchedModel.getWriteProcResEnd(SC);
1100	PI != PE; ++PI) {
1101	if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1` < LastCycle)
1102	LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1`;
1103	}
1104	}
1105	// Print the header with the cycles
1106	dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1107	for (int C = FirstCycle; C >= LastCycle; --C)
1108	dbgs() << llvm::left_justify("\| " + std::to_string(C), ColWidth);
1109	dbgs() << "\|\n";
1110
1111	for (MachineInstr &MI : *this) {
1112	SUnit *SU = getSUnit(&MI);
1113	if (!SU) {
1114	dbgs() << "Missing SUnit\n";
1115	continue;
1116	}
1117	std::string NodeName("SU(");
1118	NodeName += std::to_string(SU->NodeNum) + ")";
1119	dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1120	int C = FirstCycle;
1121	for (; C >= LastCycle; --C) {
1122	if (C == (int)SU->BotReadyCycle)
1123	dbgs() << llvm::left_justify("\| i", ColWidth);
1124	else
1125	dbgs() << llvm::left_justify("\|", ColWidth);
1126	}
1127	dbgs() << "\|\n";
1128	const MCSchedClassDesc *SC = getSchedClass(SU);
1129	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1130	make_range(SchedModel.getWriteProcResBegin(SC),
1131	SchedModel.getWriteProcResEnd(SC)));
1132
1133	if (MISchedSortResourcesInTrace)
1134	llvm::stable_sort(ResourcesIt,
1135	[](const MCWriteProcResEntry &LHS,
1136	const MCWriteProcResEntry &RHS) -> bool {
1137	return LHS.AcquireAtCycle < RHS.AcquireAtCycle \|\|
1138	(LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1139	LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1140	});
1141	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1142	C = FirstCycle;
1143	const std::string ResName =
1144	SchedModel.getResourceName(PI.ProcResourceIdx);
1145	dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1146	for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1147	dbgs() << llvm::left_justify("\|", ColWidth);
1148	}
1149	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150	++I, --C)
1151	dbgs() << llvm::left_justify("\| x", ColWidth);
1152	while (C-- >= LastCycle)
1153	dbgs() << llvm::left_justify("\|", ColWidth);
1154	// Place end char
1155	dbgs() << "\| \n";
1156	}
1157	}
1158	}
1159	#endif
1160
1161	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1162	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1163	if (MISchedDumpScheduleTrace) {
1164	if (DumpDir == DumpDirection::TopDown)
1165	dumpScheduleTraceTopDown();
1166	else if (DumpDir == DumpDirection::BottomUp)
1167	dumpScheduleTraceBottomUp();
1168	else if (DumpDir == DumpDirection::Bidirectional) {
1169	dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1170	} else {
1171	dbgs() << "* Schedule table: DumpDirection not set.\n";
1172	}
1173	}
1174
1175	for (MachineInstr &MI : *this) {
1176	if (SUnit *SU = getSUnit(&MI))
1177	dumpNode(*SU);
1178	else
1179	dbgs() << "Missing SUnit\n";
1180	}
1181	}
1182	#endif
1183
1184	//===----------------------------------------------------------------------===//
1185	// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1186	// preservation.
1187	//===----------------------------------------------------------------------===//
1188
1189	ScheduleDAGMILive::~ScheduleDAGMILive() {
1190	delete DFSResult;
1191	}
1192
1193	void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1194	const MachineInstr &MI = *SU.getInstr();
1195	for (const MachineOperand &MO : MI.operands()) {
1196	if (!MO.isReg())
1197	continue;
1198	if (!MO.readsReg())
1199	continue;
1200	if (TrackLaneMasks && !MO.isUse())
1201	continue;
1202
1203	Register Reg = MO.getReg();
1204	if (!Reg.isVirtual())
1205	continue;
1206
1207	// Ignore re-defs.
1208	if (TrackLaneMasks) {
1209	bool FoundDef = false;
1210	for (const MachineOperand &MO2 : MI.all_defs()) {
1211	if (MO2.getReg() == Reg && !MO2.isDead()) {
1212	FoundDef = true;
1213	break;
1214	}
1215	}
1216	if (FoundDef)
1217	continue;
1218	}
1219
1220	// Record this local VReg use.
1221	VReg2SUnitMultiMap::iterator UI = VRegUses.find(Key: Reg);
1222	for (; UI != VRegUses.end(); ++UI) {
1223	if (UI ->SU == &SU)
1224	break;
1225	}
1226	if (UI == VRegUses.end())
1227	VRegUses.insert(Val: VReg2SUnit (Reg, LaneBitmask::getNone(), &SU));
1228	}
1229	}
1230
1231	/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1232	/// crossing a scheduling boundary. [begin, end) includes all instructions in
1233	/// the region, including the boundary itself and single-instruction regions
1234	/// that don't get scheduled.
1235	void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1236	MachineBasicBlock::iterator begin,
1237	MachineBasicBlock::iterator end,
1238	unsigned regioninstrs)
1239	{
1240	// ScheduleDAGMI initializes SchedImpl's per-region policy.
1241	ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1242
1243	// For convenience remember the end of the liveness region.
1244	LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(x: RegionEnd);
1245
1246	SUPressureDiffs.clear();
1247
1248	ShouldTrackPressure = SchedImpl ->shouldTrackPressure();
1249	ShouldTrackLaneMasks = SchedImpl ->shouldTrackLaneMasks();
1250
1251	assert((!ShouldTrackLaneMasks \|\| ShouldTrackPressure) &&
1252	"ShouldTrackLaneMasks requires ShouldTrackPressure");
1253	}
1254
1255	// Setup the register pressure trackers for the top scheduled and bottom
1256	// scheduled regions.
1257	void ScheduleDAGMILive::initRegPressure() {
1258	VRegUses.clear();
1259	VRegUses.setUniverse(MRI.getNumVirtRegs());
1260	for (SUnit &SU : SUnits)
1261	collectVRegUses(SU);
1262
1263	TopRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin,
1264	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1265	BotRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1266	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1267
1268	// Close the RPTracker to finalize live ins.
1269	RPTracker.closeRegion();
1270
1271	LLVM_DEBUG(RPTracker.dump());
1272
1273	// Initialize the live ins and live outs.
1274	TopRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveInRegs);
1275	BotRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveOutRegs);
1276
1277	// Close one end of the tracker so we can call
1278	// getMaxUpward/DownwardPressureDelta before advancing across any
1279	// instructions. This converts currently live regs into live ins/outs.
1280	TopRPTracker.closeTop();
1281	BotRPTracker.closeBottom();
1282
1283	BotRPTracker.initLiveThru(RPTracker);
1284	if (!BotRPTracker.getLiveThru().empty()) {
1285	TopRPTracker.initLiveThru(PressureSet: BotRPTracker.getLiveThru());
1286	LLVM_DEBUG(dbgs() << "Live Thru: ";
1287	dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1288	};
1289
1290	// For each live out vreg reduce the pressure change associated with other
1291	// uses of the same vreg below the live-out reaching def.
1292	updatePressureDiffs(LiveUses: RPTracker.getPressure().LiveOutRegs);
1293
1294	// Account for liveness generated by the region boundary.
1295	if (LiveRegionEnd != RegionEnd) {
1296	SmallVector<RegisterMaskPair, `8`> LiveUses;
1297	BotRPTracker.recede(LiveUses: &LiveUses);
1298	updatePressureDiffs(LiveUses);
1299	}
1300
1301	LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1302	dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1303	dbgs() << "Bottom Pressure:\n";
1304	dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1305
1306	assert((BotRPTracker.getPos() == RegionEnd \|\|
1307	(RegionEnd->isDebugInstr() &&
1308	BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1309	"Can't find the region bottom");
1310
1311	// Cache the list of excess pressure sets in this region. This will also track
1312	// the max pressure in the scheduled code for these sets.
1313	RegionCriticalPSets.clear();
1314	const std::vector<unsigned> &RegionPressure =
1315	RPTracker.getPressure().MaxSetPressure;
1316	for (unsigned i = `0`, e = RegionPressure.size(); i < e; ++i) {
1317	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: i);
1318	if (RegionPressure [i] > Limit) {
1319	LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1320	<< " Actual " << RegionPressure[i] << "\n");
1321	RegionCriticalPSets.push_back(x: PressureChange (i));
1322	}
1323	}
1324	LLVM_DEBUG(dbgs() << "Excess PSets: ";
1325	for (const PressureChange &RCPS
1326	: RegionCriticalPSets) dbgs()
1327	<< TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1328	dbgs() << "\n");
1329	}
1330
1331	void ScheduleDAGMILive::
1332	updateScheduledPressure(const SUnit *SU,
1333	const std::vector<unsigned> &NewMaxPressure) {
1334	const PressureDiff &PDiff = getPressureDiff(SU);
1335	unsigned CritIdx = `0`, CritEnd = RegionCriticalPSets.size();
1336	for (const PressureChange &PC : PDiff) {
1337	if (!PC.isValid())
1338	break;
1339	unsigned ID = PC.getPSet();
1340	while (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() < ID)
1341	++CritIdx;
1342	if (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() == ID) {
1343	if ((int)NewMaxPressure [ID] > RegionCriticalPSets [CritIdx].getUnitInc()
1344	&& NewMaxPressure [ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1345	RegionCriticalPSets [CritIdx].setUnitInc(NewMaxPressure [ID]);
1346	}
1347	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: ID);
1348	if (NewMaxPressure [ID] >= Limit - `2`) {
1349	LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1350	<< NewMaxPressure[ID]
1351	<< ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1352	<< Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1353	<< " livethru)\n");
1354	}
1355	}
1356	}
1357
1358	/// Update the PressureDiff array for liveness after scheduling this
1359	/// instruction.
1360	void ScheduleDAGMILive::updatePressureDiffs(
1361	ArrayRef<RegisterMaskPair> LiveUses) {
1362	for (const RegisterMaskPair &P : LiveUses) {
1363	Register Reg = P.RegUnit;
1364	/// FIXME: Currently assuming single-use physregs.
1365	if (!Reg.isVirtual())
1366	continue;
1367
1368	if (ShouldTrackLaneMasks) {
1369	// If the register has just become live then other uses won't change
1370	// this fact anymore => decrement pressure.
1371	// If the register has just become dead then other uses make it come
1372	// back to life => increment pressure.
1373	bool Decrement = P.LaneMask.any();
1374
1375	for (const VReg2SUnit &V2SU
1376	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1377	SUnit &SU = *V2SU.SU;
1378	if (SU.isScheduled \|\| &SU == &ExitSU)
1379	continue;
1380
1381	PressureDiff &PDiff = getPressureDiff(SU: &SU);
1382	PDiff.addPressureChange(RegUnit: Reg, IsDec: Decrement, MRI: &MRI);
1383	LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1384	<< printReg(Reg, TRI) << `':'`
1385	<< PrintLaneMask(P.LaneMask) << `' '` << *SU.getInstr();
1386	dbgs() << " to "; PDiff.dump(*TRI););
1387	}
1388	} else {
1389	assert(P.LaneMask.any());
1390	LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1391	// This may be called before CurrentBottom has been initialized. However,
1392	// BotRPTracker must have a valid position. We want the value live into the
1393	// instruction or live out of the block, so ask for the previous
1394	// instruction's live-out.
1395	const LiveInterval &LI = LIS->getInterval(Reg);
1396	VNInfo *VNI;
1397	MachineBasicBlock::const_iterator I =
1398	nextIfDebug(I: BotRPTracker.getPos(), End: BB->end());
1399	if (I == BB->end())
1400	VNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1401	else {
1402	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *I));
1403	VNI = LRQ.valueIn();
1404	}
1405	// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1406	assert(VNI && "No live value at use.");
1407	for (const VReg2SUnit &V2SU
1408	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1409	SUnit *SU = V2SU.SU;
1410	// If this use comes before the reaching def, it cannot be a last use,
1411	// so decrease its pressure change.
1412	if (!SU->isScheduled && SU != &ExitSU) {
1413	LiveQueryResult LRQ =
1414	LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1415	if (LRQ.valueIn() == VNI) {
1416	PressureDiff &PDiff = getPressureDiff(SU);
1417	PDiff.addPressureChange(RegUnit: Reg, IsDec: true, MRI: &MRI);
1418	LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419	<< *SU->getInstr();
1420	dbgs() << " to "; PDiff.dump(*TRI););
1421	}
1422	}
1423	}
1424	}
1425	}
1426	}
1427
1428	void ScheduleDAGMILive::dump() const {
1429	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1430	if (EntrySU.getInstr() != nullptr)
1431	dumpNodeAll(EntrySU);
1432	for (const SUnit &SU : SUnits) {
1433	dumpNodeAll(SU);
1434	if (ShouldTrackPressure) {
1435	dbgs() << " Pressure Diff : ";
1436	getPressureDiff(&SU).dump(*TRI);
1437	}
1438	dbgs() << " Single Issue : ";
1439	if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1440	SchedModel.mustEndGroup(SU.getInstr()))
1441	dbgs() << "true;";
1442	else
1443	dbgs() << "false;";
1444	dbgs() << `'\n'`;
1445	}
1446	if (ExitSU.getInstr() != nullptr)
1447	dumpNodeAll(ExitSU);
1448	#endif
1449	}
1450
1451	/// schedule - Called back from MachineScheduler::runOnMachineFunction
1452	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1453	/// only includes instructions that have DAG nodes, not scheduling boundaries.
1454	///
1455	/// This is a skeletal driver, with all the functionality pushed into helpers,
1456	/// so that it can be easily extended by experimental schedulers. Generally,
1457	/// implementing MachineSchedStrategy should be sufficient to implement a new
1458	/// scheduling algorithm. However, if a scheduler further subclasses
1459	/// ScheduleDAGMILive then it will want to override this virtual method in order
1460	/// to update any specialized state.
1461	void ScheduleDAGMILive::schedule() {
1462	LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1463	LLVM_DEBUG(SchedImpl->dumpPolicy());
1464	buildDAGWithRegPressure();
1465
1466	postProcessDAG();
1467
1468	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
1469	findRootsAndBiasEdges(TopRoots, BotRoots);
1470
1471	// Initialize the strategy before modifying the DAG.
1472	// This may initialize a DFSResult to be used for queue priority.
1473	SchedImpl ->initialize(DAG: this);
1474
1475	LLVM_DEBUG(dump());
1476	if (PrintDAGs) dump();
1477	if (ViewMISchedDAGs) viewGraph();
1478
1479	// Initialize ready queues now that the DAG and priority data are finalized.
1480	initQueues(TopRoots, BotRoots);
1481
1482	bool IsTopNode = false;
1483	while (true) {
1484	LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1485	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
1486	if (!SU) break;
1487
1488	assert(!SU->isScheduled && "Node already scheduled");
1489	if (!checkSchedLimit())
1490	break;
1491
1492	scheduleMI(SU, IsTopNode);
1493
1494	if (DFSResult) {
1495	unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1496	if (!ScheduledTrees.test(Idx: SubtreeID)) {
1497	ScheduledTrees.set(SubtreeID);
1498	DFSResult->scheduleTree(SubtreeID);
1499	SchedImpl ->scheduleTree(SubtreeID);
1500	}
1501	}
1502
1503	// Notify the scheduling strategy after updating the DAG.
1504	SchedImpl ->schedNode(SU, IsTopNode);
1505
1506	updateQueues(SU, IsTopNode);
1507	}
1508	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1509
1510	placeDebugValues();
1511
1512	LLVM_DEBUG({
1513	dbgs() << "*** Final schedule for "
1514	<< printMBBReference(begin()->getParent()) << " **\n";
1515	dumpSchedule();
1516	dbgs() << `'\n'`;
1517	});
1518	}
1519
1520	/// Build the DAG and setup three register pressure trackers.
1521	void ScheduleDAGMILive::buildDAGWithRegPressure() {
1522	if (!ShouldTrackPressure) {
1523	RPTracker.reset();
1524	RegionCriticalPSets.clear();
1525	buildSchedGraph(AA);
1526	return;
1527	}
1528
1529	// Initialize the register pressure tracker used by buildSchedGraph.
1530	RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1531	TrackLaneMasks: ShouldTrackLaneMasks, /TrackUntiedDefs=/true);
1532
1533	// Account for liveness generate by the region boundary.
1534	if (LiveRegionEnd != RegionEnd)
1535	RPTracker.recede();
1536
1537	// Build the DAG, and compute current register pressure.
1538	buildSchedGraph(AA, RPTracker: &RPTracker, PDiffs: &SUPressureDiffs, LIS, TrackLaneMasks: ShouldTrackLaneMasks);
1539
1540	// Initialize top/bottom trackers after computing region pressure.
1541	initRegPressure();
1542	}
1543
1544	void ScheduleDAGMILive::computeDFSResult() {
1545	if (!DFSResult)
1546	DFSResult = new SchedDFSResult (/BottomU/true, MinSubtreeSize);
1547	DFSResult->clear();
1548	ScheduledTrees.clear();
1549	DFSResult->resize(NumSUnits: SUnits.size());
1550	DFSResult->compute(SUnits);
1551	ScheduledTrees.resize(N: DFSResult->getNumSubtrees());
1552	}
1553
1554	/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1555	/// only provides the critical path for single block loops. To handle loops that
1556	/// span blocks, we could use the vreg path latencies provided by
1557	/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1558	/// available for use in the scheduler.
1559	///
1560	/// The cyclic path estimation identifies a def-use pair that crosses the back
1561	/// edge and considers the depth and height of the nodes. For example, consider
1562	/// the following instruction sequence where each instruction has unit latency
1563	/// and defines an eponymous virtual register:
1564	///
1565	/// a->b(a,c)->c(b)->d(c)->exit
1566	///
1567	/// The cyclic critical path is a two cycles: b->c->b
1568	/// The acyclic critical path is four cycles: a->b->c->d->exit
1569	/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570	/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571	/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572	/// LiveInDepth = depth(b) = len(a->b) = 1
1573	///
1574	/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575	/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1576	/// CyclicCriticalPath = min(2, 2) = 2
1577	///
1578	/// This could be relevant to PostRA scheduling, but is currently implemented
1579	/// assuming LiveIntervals.
1580	unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1581	// This only applies to single block loop.
1582	if (!BB->isSuccessor(MBB: BB))
1583	return `0`;
1584
1585	unsigned MaxCyclicLatency = `0`;
1586	// Visit each live out vreg def to find def/use pairs that cross iterations.
1587	for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1588	Register Reg = P.RegUnit;
1589	if (!Reg.isVirtual())
1590	continue;
1591	const LiveInterval &LI = LIS->getInterval(Reg);
1592	const VNInfo *DefVNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1593	if (!DefVNI)
1594	continue;
1595
1596	MachineInstr *DefMI = LIS->getInstructionFromIndex(index: DefVNI->def);
1597	const SUnit *DefSU = getSUnit(MI: DefMI);
1598	if (!DefSU)
1599	continue;
1600
1601	unsigned LiveOutHeight = DefSU->getHeight();
1602	unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1603	// Visit all local users of the vreg def.
1604	for (const VReg2SUnit &V2SU
1605	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1606	SUnit *SU = V2SU.SU;
1607	if (SU == &ExitSU)
1608	continue;
1609
1610	// Only consider uses of the phi.
1611	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1612	if (!LRQ.valueIn()->isPHIDef())
1613	continue;
1614
1615	// Assume that a path spanning two iterations is a cycle, which could
1616	// overestimate in strange cases. This allows cyclic latency to be
1617	// estimated as the minimum slack of the vreg's depth or height.
1618	unsigned CyclicLatency = `0`;
1619	if (LiveOutDepth > SU->getDepth())
1620	CyclicLatency = LiveOutDepth - SU->getDepth();
1621
1622	unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1623	if (LiveInHeight > LiveOutHeight) {
1624	if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625	CyclicLatency = LiveInHeight - LiveOutHeight;
1626	} else
1627	CyclicLatency = `0`;
1628
1629	LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630	<< SU->NodeNum << ") = " << CyclicLatency << "c\n");
1631	if (CyclicLatency > MaxCyclicLatency)
1632	MaxCyclicLatency = CyclicLatency;
1633	}
1634	}
1635	LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1636	return MaxCyclicLatency;
1637	}
1638
1639	/// Release ExitSU predecessors and setup scheduler queues. Re-position
1640	/// the Top RP tracker in case the region beginning has changed.
1641	void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1642	ArrayRef<SUnit*> BotRoots) {
1643	ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1644	if (ShouldTrackPressure) {
1645	assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1646	TopRPTracker.setPos(CurrentTop);
1647	}
1648	}
1649
1650	/// Move an instruction and update register pressure.
1651	void ScheduleDAGMILive::scheduleMI(SUnit SU, bool* IsTopNode) {
1652	// Move the instruction to its new location in the instruction stream.
1653	MachineInstr *MI = SU->getInstr();
1654
1655	if (IsTopNode) {
1656	assert(SU->isTopReady() && "node still has unscheduled dependencies");
1657	if (&*CurrentTop == MI)
1658	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
1659	else {
1660	moveInstruction(MI, InsertPos: CurrentTop);
1661	TopRPTracker.setPos(MI);
1662	}
1663
1664	if (ShouldTrackPressure) {
1665	// Update top scheduled pressure.
1666	RegisterOperands RegOpers;
1667	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks,
1668	/IgnoreDead=/false);
1669	if (ShouldTrackLaneMasks) {
1670	// Adjust liveness and add missing dead+read-undef flags.
1671	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1672	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1673	} else {
1674	// Adjust for missing dead-def flags.
1675	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1676	}
1677
1678	TopRPTracker.advance(RegOpers);
1679	assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1680	LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1681	TopRPTracker.getRegSetPressureAtPos(), TRI););
1682
1683	updateScheduledPressure(SU, NewMaxPressure: TopRPTracker.getPressure().MaxSetPressure);
1684	}
1685	} else {
1686	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1687	MachineBasicBlock::iterator priorII =
1688	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
1689	if (&*priorII == MI)
1690	CurrentBottom = priorII;
1691	else {
1692	if (&*CurrentTop == MI) {
1693	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
1694	TopRPTracker.setPos(CurrentTop);
1695	}
1696	moveInstruction(MI, InsertPos: CurrentBottom);
1697	CurrentBottom = MI;
1698	BotRPTracker.setPos(CurrentBottom);
1699	}
1700	if (ShouldTrackPressure) {
1701	RegisterOperands RegOpers;
1702	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks,
1703	/IgnoreDead=/false);
1704	if (ShouldTrackLaneMasks) {
1705	// Adjust liveness and add missing dead+read-undef flags.
1706	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1707	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1708	} else {
1709	// Adjust for missing dead-def flags.
1710	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1711	}
1712
1713	if (BotRPTracker.getPos() != CurrentBottom)
1714	BotRPTracker.recedeSkipDebugValues();
1715	SmallVector<RegisterMaskPair, `8`> LiveUses;
1716	BotRPTracker.recede(RegOpers, LiveUses: &LiveUses);
1717	assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1718	LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1719	BotRPTracker.getRegSetPressureAtPos(), TRI););
1720
1721	updateScheduledPressure(SU, NewMaxPressure: BotRPTracker.getPressure().MaxSetPressure);
1722	updatePressureDiffs(LiveUses);
1723	}
1724	}
1725	}
1726
1727	//===----------------------------------------------------------------------===//
1728	// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1729	//===----------------------------------------------------------------------===//
1730
1731	namespace {
1732
1733	/// Post-process the DAG to create cluster edges between neighboring
1734	/// loads or between neighboring stores.
1735	class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1736	struct MemOpInfo {
1737	SUnit *SU;
1738	SmallVector<const MachineOperand *, `4`> BaseOps;
1739	int64_t Offset;
1740	LocationSize Width;
1741	bool OffsetIsScalable;
1742
1743	MemOpInfo(SUnit SU, ArrayRef<const* MachineOperand *> BaseOps,
1744	int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1745	: SU(SU), BaseOps (BaseOps.begin(), BaseOps.end()), Offset(Offset),
1746	Width (Width), OffsetIsScalable(OffsetIsScalable) {}
1747
1748	static bool Compare(const MachineOperand *const &A,
1749	const MachineOperand *const &B) {
1750	if (A->getType() != B->getType())
1751	return A->getType() < B->getType();
1752	if (A->isReg())
1753	return A->getReg() < B->getReg();
1754	if (A->isFI()) {
1755	const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1756	const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1757	bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1758	TargetFrameLowering::StackGrowsDown;
1759	return StackGrowsDown ? A->getIndex() > B->getIndex()
1760	: A->getIndex() < B->getIndex();
1761	}
1762
1763	llvm_unreachable("MemOpClusterMutation only supports register or frame "
1764	"index bases.");
1765	}
1766
1767	bool operator<(const MemOpInfo &RHS) const {
1768	// FIXME: Don't compare everything twice. Maybe use C++20 three way
1769	// comparison instead when it's available.
1770	if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1771	RHS.BaseOps.begin(), RHS.BaseOps.end(),
1772	Compare))
1773	return true;
1774	if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1775	BaseOps.begin(), BaseOps.end(), Compare))
1776	return false;
1777	if (Offset != RHS.Offset)
1778	return Offset < RHS.Offset;
1779	return SU->NodeNum < RHS.SU->NodeNum;
1780	}
1781	};
1782
1783	const TargetInstrInfo *TII;
1784	const TargetRegisterInfo *TRI;
1785	bool IsLoad;
1786	bool ReorderWhileClustering;
1787
1788	public:
1789	BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1790	const TargetRegisterInfo tri, bool* IsLoad,
1791	bool ReorderWhileClustering)
1792	: TII(tii), TRI(tri), IsLoad(IsLoad),
1793	ReorderWhileClustering(ReorderWhileClustering) {}
1794
1795	void apply(ScheduleDAGInstrs *DAGInstrs) override;
1796
1797	protected:
1798	void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1799	ScheduleDAGInstrs *DAG);
1800	void collectMemOpRecords(std::vector<SUnit> &SUnits,
1801	SmallVectorImpl<MemOpInfo> &MemOpRecords);
1802	bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1803	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups);
1804	};
1805
1806	class StoreClusterMutation : public BaseMemOpClusterMutation {
1807	public:
1808	StoreClusterMutation(const TargetInstrInfo *tii,
1809	const TargetRegisterInfo *tri,
1810	bool ReorderWhileClustering)
1811	: BaseMemOpClusterMutation (tii, tri, false, ReorderWhileClustering) {}
1812	};
1813
1814	class LoadClusterMutation : public BaseMemOpClusterMutation {
1815	public:
1816	LoadClusterMutation(const TargetInstrInfo tii, const* TargetRegisterInfo *tri,
1817	bool ReorderWhileClustering)
1818	: BaseMemOpClusterMutation (tii, tri, true, ReorderWhileClustering) {}
1819	};
1820
1821	} // end anonymous namespace
1822
1823	namespace llvm {
1824
1825	std::unique_ptr<ScheduleDAGMutation>
1826	createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1827	const TargetRegisterInfo *TRI,
1828	bool ReorderWhileClustering) {
1829	return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1830	args&: TII, args&: TRI, args&: ReorderWhileClustering)
1831	: nullptr;
1832	}
1833
1834	std::unique_ptr<ScheduleDAGMutation>
1835	createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1836	const TargetRegisterInfo *TRI,
1837	bool ReorderWhileClustering) {
1838	return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1839	args&: TII, args&: TRI, args&: ReorderWhileClustering)
1840	: nullptr;
1841	}
1842
1843	} // end namespace llvm
1844
1845	// Sorting all the loads/stores first, then for each load/store, checking the
1846	// following load/store one by one, until reach the first non-dependent one and
1847	// call target hook to see if they can cluster.
1848	// If FastCluster is enabled, we assume that, all the loads/stores have been
1849	// preprocessed and now, they didn't have dependencies on each other.
1850	void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1851	ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1852	ScheduleDAGInstrs *DAG) {
1853	// Keep track of the current cluster length and bytes for each SUnit.
1854	DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1855
1856	// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1857	// cluster mem ops collected within `MemOpRecords` array.
1858	for (unsigned Idx = `0`, End = MemOpRecords.size(); Idx < (End - `1`); ++Idx) {
1859	// Decision to cluster mem ops is taken based on target dependent logic
1860	auto MemOpa = MemOpRecords [Idx];
1861
1862	// Seek for the next load/store to do the cluster.
1863	unsigned NextIdx = Idx + `1`;
1864	for (; NextIdx < End; ++NextIdx)
1865	// Skip if MemOpb has been clustered already or has dependency with
1866	// MemOpa.
1867	if (!SUnit2ClusterInfo.count(Val: MemOpRecords [NextIdx].SU->NodeNum) &&
1868	(FastCluster \|\|
1869	(!DAG->IsReachable(SU: MemOpRecords [NextIdx].SU, TargetSU: MemOpa.SU) &&
1870	!DAG->IsReachable(SU: MemOpa.SU, TargetSU: MemOpRecords [NextIdx].SU))))
1871	break;
1872	if (NextIdx == End)
1873	continue;
1874
1875	auto MemOpb = MemOpRecords [NextIdx];
1876	unsigned ClusterLength = `2`;
1877	unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1878	MemOpb.Width.getValue().getKnownMinValue();
1879	if (SUnit2ClusterInfo.count(Val: MemOpa.SU->NodeNum)) {
1880	ClusterLength = SUnit2ClusterInfo [MemOpa.SU->NodeNum].first + `1`;
1881	CurrentClusterBytes = SUnit2ClusterInfo [MemOpa.SU->NodeNum].second +
1882	MemOpb.Width.getValue().getKnownMinValue();
1883	}
1884
1885	if (!TII->shouldClusterMemOps(BaseOps1: MemOpa.BaseOps, Offset1: MemOpa.Offset,
1886	OffsetIsScalable1: MemOpa.OffsetIsScalable, BaseOps2: MemOpb.BaseOps,
1887	Offset2: MemOpb.Offset, OffsetIsScalable2: MemOpb.OffsetIsScalable,
1888	ClusterSize: ClusterLength, NumBytes: CurrentClusterBytes))
1889	continue;
1890
1891	SUnit *SUa = MemOpa.SU;
1892	SUnit *SUb = MemOpb.SU;
1893	if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1894	std::swap(a&: SUa, b&: SUb);
1895
1896	// FIXME: Is this check really required?
1897	if (!DAG->addEdge(SuccSU: SUb, PredDep: SDep (SUa, SDep::Cluster)))
1898	continue;
1899
1900	LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1901	<< SUb->NodeNum << ")\n");
1902	++NumClustered;
1903
1904	if (IsLoad) {
1905	// Copy successor edges from SUa to SUb. Interleaving computation
1906	// dependent on SUa can prevent load combining due to register reuse.
1907	// Predecessor edges do not need to be copied from SUb to SUa since
1908	// nearby loads should have effectively the same inputs.
1909	for (const SDep &Succ : SUa->Succs) {
1910	if (Succ.getSUnit() == SUb)
1911	continue;
1912	LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1913	<< ")\n");
1914	DAG->addEdge(SuccSU: Succ.getSUnit(), PredDep: SDep (SUb, SDep::Artificial));
1915	}
1916	} else {
1917	// Copy predecessor edges from SUb to SUa to avoid the SUnits that
1918	// SUb dependent on scheduled in-between SUb and SUa. Successor edges
1919	// do not need to be copied from SUa to SUb since no one will depend
1920	// on stores.
1921	// Notice that, we don't need to care about the memory dependency as
1922	// we won't try to cluster them if they have any memory dependency.
1923	for (const SDep &Pred : SUb->Preds) {
1924	if (Pred.getSUnit() == SUa)
1925	continue;
1926	LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1927	<< ")\n");
1928	DAG->addEdge(SuccSU: SUa, PredDep: SDep (Pred.getSUnit(), SDep::Artificial));
1929	}
1930	}
1931
1932	SUnit2ClusterInfo [MemOpb.SU->NodeNum] = {ClusterLength,
1933	CurrentClusterBytes};
1934
1935	LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1936	<< ", Curr cluster bytes: " << CurrentClusterBytes
1937	<< "\n");
1938	}
1939	}
1940
1941	void BaseMemOpClusterMutation::collectMemOpRecords(
1942	std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1943	for (auto &SU : SUnits) {
1944	if ((IsLoad && !SU.getInstr()->mayLoad()) \|\|
1945	(!IsLoad && !SU.getInstr()->mayStore()))
1946	continue;
1947
1948	const MachineInstr &MI = *SU.getInstr();
1949	SmallVector<const MachineOperand *, `4`> BaseOps;
1950	int64_t Offset;
1951	bool OffsetIsScalable;
1952	LocationSize Width = `0`;
1953	if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1954	OffsetIsScalable, Width, TRI)) {
1955	MemOpRecords.push_back(
1956	Elt: MemOpInfo (&SU, BaseOps, Offset, OffsetIsScalable, Width));
1957
1958	LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1959	<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
1960	<< ", Width: " << Width << "\n");
1961	}
1962	#ifndef NDEBUG
1963	for (const auto *Op : BaseOps)
1964	assert(Op);
1965	#endif
1966	}
1967	}
1968
1969	bool BaseMemOpClusterMutation::groupMemOps(
1970	ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1971	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups) {
1972	bool FastCluster =
1973	ForceFastCluster \|\|
1974	MemOps.size() * DAG->SUnits.size() / `1000` > FastClusterThreshold;
1975
1976	for (const auto &MemOp : MemOps) {
1977	unsigned ChainPredID = DAG->SUnits.size();
1978	if (FastCluster) {
1979	for (const SDep &Pred : MemOp.SU->Preds) {
1980	// We only want to cluster the mem ops that have the same ctrl(non-data)
1981	// pred so that they didn't have ctrl dependency for each other. But for
1982	// store instrs, we can still cluster them if the pred is load instr.
1983	if ((Pred.isCtrl() &&
1984	(IsLoad \|\|
1985	(Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1986	!Pred.isArtificial()) {
1987	ChainPredID = Pred.getSUnit()->NodeNum;
1988	break;
1989	}
1990	}
1991	} else
1992	ChainPredID = `0`;
1993
1994	Groups [ChainPredID].push_back(Elt: MemOp);
1995	}
1996	return FastCluster;
1997	}
1998
1999	/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2000	void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2001	// Collect all the clusterable loads/stores
2002	SmallVector<MemOpInfo, `32`> MemOpRecords;
2003	collectMemOpRecords(SUnits&: DAG->SUnits, MemOpRecords);
2004
2005	if (MemOpRecords.size() < `2`)
2006	return;
2007
2008	// Put the loads/stores without dependency into the same group with some
2009	// heuristic if the DAG is too complex to avoid compiling time blow up.
2010	// Notice that, some fusion pair could be lost with this.
2011	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> Groups;
2012	bool FastCluster = groupMemOps(MemOps: MemOpRecords, DAG, Groups);
2013
2014	for (auto &Group : Groups) {
2015	// Sorting the loads/stores, so that, we can stop the cluster as early as
2016	// possible.
2017	llvm::sort(C&: Group.second);
2018
2019	// Trying to cluster all the neighboring loads/stores.
2020	clusterNeighboringMemOps(MemOpRecords: Group.second, FastCluster, DAG);
2021	}
2022	}
2023
2024	//===----------------------------------------------------------------------===//
2025	// CopyConstrain - DAG post-processing to encourage copy elimination.
2026	//===----------------------------------------------------------------------===//
2027
2028	namespace {
2029
2030	/// Post-process the DAG to create weak edges from all uses of a copy to
2031	/// the one use that defines the copy's source vreg, most likely an induction
2032	/// variable increment.
2033	class CopyConstrain : public ScheduleDAGMutation {
2034	// Transient state.
2035	SlotIndex RegionBeginIdx;
2036
2037	// RegionEndIdx is the slot index of the last non-debug instruction in the
2038	// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2039	SlotIndex RegionEndIdx;
2040
2041	public:
2042	CopyConstrain(const TargetInstrInfo , const* TargetRegisterInfo *) {}
2043
2044	void apply(ScheduleDAGInstrs *DAGInstrs) override;
2045
2046	protected:
2047	void constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG);
2048	};
2049
2050	} // end anonymous namespace
2051
2052	namespace llvm {
2053
2054	std::unique_ptr<ScheduleDAGMutation>
2055	createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
2056	const TargetRegisterInfo *TRI) {
2057	return std::make_unique<CopyConstrain>(args&: TII, args&: TRI);
2058	}
2059
2060	} // end namespace llvm
2061
2062	/// constrainLocalCopy handles two possibilities:
2063	/// 1) Local src:
2064	/// I0: = dst
2065	/// I1: src = ...
2066	/// I2: = dst
2067	/// I3: dst = src (copy)
2068	/// (create pred->succ edges I0->I1, I2->I1)
2069	///
2070	/// 2) Local copy:
2071	/// I0: dst = src (copy)
2072	/// I1: = dst
2073	/// I2: src = ...
2074	/// I3: = dst
2075	/// (create pred->succ edges I1->I2, I3->I2)
2076	///
2077	/// Although the MachineScheduler is currently constrained to single blocks,
2078	/// this algorithm should handle extended blocks. An EBB is a set of
2079	/// contiguously numbered blocks such that the previous block in the EBB is
2080	/// always the single predecessor.
2081	void CopyConstrain::constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG) {
2082	LiveIntervals *LIS = DAG->getLIS();
2083	MachineInstr *Copy = CopySU->getInstr();
2084
2085	// Check for pure vreg copies.
2086	const MachineOperand &SrcOp = Copy->getOperand(i: `1`);
2087	Register SrcReg = SrcOp.getReg();
2088	if (!SrcReg.isVirtual() \|\| !SrcOp.readsReg())
2089	return;
2090
2091	const MachineOperand &DstOp = Copy->getOperand(i: `0`);
2092	Register DstReg = DstOp.getReg();
2093	if (!DstReg.isVirtual() \|\| DstOp.isDead())
2094	return;
2095
2096	// Check if either the dest or source is local. If it's live across a back
2097	// edge, it's not local. Note that if both vregs are live across the back
2098	// edge, we cannot successfully contrain the copy without cyclic scheduling.
2099	// If both the copy's source and dest are local live intervals, then we
2100	// should treat the dest as the global for the purpose of adding
2101	// constraints. This adds edges from source's other uses to the copy.
2102	unsigned LocalReg = SrcReg;
2103	unsigned GlobalReg = DstReg;
2104	LiveInterval *LocalLI = &LIS->getInterval(Reg: LocalReg);
2105	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx)) {
2106	LocalReg = DstReg;
2107	GlobalReg = SrcReg;
2108	LocalLI = &LIS->getInterval(Reg: LocalReg);
2109	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx))
2110	return;
2111	}
2112	LiveInterval *GlobalLI = &LIS->getInterval(Reg: GlobalReg);
2113
2114	// Find the global segment after the start of the local LI.
2115	LiveInterval::iterator GlobalSegment = GlobalLI->find(Pos: LocalLI->beginIndex());
2116	// If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2117	// local live range. We could create edges from other global uses to the local
2118	// start, but the coalescer should have already eliminated these cases, so
2119	// don't bother dealing with it.
2120	if (GlobalSegment == GlobalLI->end())
2121	return;
2122
2123	// If GlobalSegment is killed at the LocalLI->start, the call to find()
2124	// returned the next global segment. But if GlobalSegment overlaps with
2125	// LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2126	// exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2127	if (GlobalSegment->contains(I: LocalLI->beginIndex()))
2128	++GlobalSegment;
2129
2130	if (GlobalSegment == GlobalLI->end())
2131	return;
2132
2133	// Check if GlobalLI contains a hole in the vicinity of LocalLI.
2134	if (GlobalSegment != GlobalLI->begin()) {
2135	// Two address defs have no hole.
2136	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->end,
2137	B: GlobalSegment->start)) {
2138	return;
2139	}
2140	// If the prior global segment may be defined by the same two-address
2141	// instruction that also defines LocalLI, then can't make a hole here.
2142	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->start,
2143	B: LocalLI->beginIndex())) {
2144	return;
2145	}
2146	// If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2147	// it would be a disconnected component in the live range.
2148	assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2149	"Disconnected LRG within the scheduling region.");
2150	}
2151	MachineInstr *GlobalDef = LIS->getInstructionFromIndex(index: GlobalSegment->start);
2152	if (!GlobalDef)
2153	return;
2154
2155	SUnit *GlobalSU = DAG->getSUnit(MI: GlobalDef);
2156	if (!GlobalSU)
2157	return;
2158
2159	// GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2160	// constraining the uses of the last local def to precede GlobalDef.
2161	SmallVector<SUnit*,`8`> LocalUses;
2162	const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(Idx: LocalLI->endIndex());
2163	MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(index: LastLocalVN->def);
2164	SUnit *LastLocalSU = DAG->getSUnit(MI: LastLocalDef);
2165	for (const SDep &Succ : LastLocalSU->Succs) {
2166	if (Succ.getKind() != SDep::Data \|\| Succ.getReg() != LocalReg)
2167	continue;
2168	if (Succ.getSUnit() == GlobalSU)
2169	continue;
2170	if (!DAG->canAddEdge(SuccSU: GlobalSU, PredSU: Succ.getSUnit()))
2171	return;
2172	LocalUses.push_back(Elt: Succ.getSUnit());
2173	}
2174	// Open the top of the GlobalLI hole by constraining any earlier global uses
2175	// to precede the start of LocalLI.
2176	SmallVector<SUnit*,`8`> GlobalUses;
2177	MachineInstr *FirstLocalDef =
2178	LIS->getInstructionFromIndex(index: LocalLI->beginIndex());
2179	SUnit *FirstLocalSU = DAG->getSUnit(MI: FirstLocalDef);
2180	for (const SDep &Pred : GlobalSU->Preds) {
2181	if (Pred.getKind() != SDep::Anti \|\| Pred.getReg() != GlobalReg)
2182	continue;
2183	if (Pred.getSUnit() == FirstLocalSU)
2184	continue;
2185	if (!DAG->canAddEdge(SuccSU: FirstLocalSU, PredSU: Pred.getSUnit()))
2186	return;
2187	GlobalUses.push_back(Elt: Pred.getSUnit());
2188	}
2189	LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2190	// Add the weak edges.
2191	for (SUnit *LU : LocalUses) {
2192	LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2193	<< GlobalSU->NodeNum << ")\n");
2194	DAG->addEdge(SuccSU: GlobalSU, PredDep: SDep (LU, SDep::Weak));
2195	}
2196	for (SUnit *GU : GlobalUses) {
2197	LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2198	<< FirstLocalSU->NodeNum << ")\n");
2199	DAG->addEdge(SuccSU: FirstLocalSU, PredDep: SDep (GU, SDep::Weak));
2200	}
2201	}
2202
2203	/// Callback from DAG postProcessing to create weak edges to encourage
2204	/// copy elimination.
2205	void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2206	ScheduleDAGMI DAG = static_cast<ScheduleDAGMI>(DAGInstrs);
2207	assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2208
2209	MachineBasicBlock::iterator FirstPos = nextIfDebug(I: DAG->begin(), End: DAG->end());
2210	if (FirstPos == DAG->end())
2211	return;
2212	RegionBeginIdx = DAG->getLIS()->getInstructionIndex(Instr: *FirstPos);
2213	RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2214	Instr: *priorNonDebug(I: DAG->end(), Beg: DAG->begin()));
2215
2216	for (SUnit &SU : DAG->SUnits) {
2217	if (!SU.getInstr()->isCopy())
2218	continue;
2219
2220	constrainLocalCopy(CopySU: &SU, DAG: static_cast<ScheduleDAGMILive*>(DAG));
2221	}
2222	}
2223
2224	//===----------------------------------------------------------------------===//
2225	// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2226	// and possibly other custom schedulers.
2227	//===----------------------------------------------------------------------===//
2228
2229	static const unsigned InvalidCycle = ~`0U`;
2230
2231	SchedBoundary::~SchedBoundary() { delete HazardRec; }
2232
2233	/// Given a Count of resource usage and a Latency value, return true if a
2234	/// SchedBoundary becomes resource limited.
2235	/// If we are checking after scheduling a node, we should return true when
2236	/// we just reach the resource limit.
2237	static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2238	unsigned Latency, bool AfterSchedNode) {
2239	int ResCntFactor = (int)(Count - (Latency * LFactor));
2240	if (AfterSchedNode)
2241	return ResCntFactor >= (int)LFactor;
2242	else
2243	return ResCntFactor > (int)LFactor;
2244	}
2245
2246	void SchedBoundary::reset() {
2247	// A new HazardRec is created for each DAG and owned by SchedBoundary.
2248	// Destroying and reconstructing it is very expensive though. So keep
2249	// invalid, placeholder HazardRecs.
2250	if (HazardRec && HazardRec->isEnabled()) {
2251	delete HazardRec;
2252	HazardRec = nullptr;
2253	}
2254	Available.clear();
2255	Pending.clear();
2256	CheckPending = false;
2257	CurrCycle = `0`;
2258	CurrMOps = `0`;
2259	MinReadyCycle = std::numeric_limits<unsigned>::max();
2260	ExpectedLatency = `0`;
2261	DependentLatency = `0`;
2262	RetiredMOps = `0`;
2263	MaxExecutedResCount = `0`;
2264	ZoneCritResIdx = `0`;
2265	IsResourceLimited = false;
2266	ReservedCycles.clear();
2267	ReservedResourceSegments.clear();
2268	ReservedCyclesIndex.clear();
2269	ResourceGroupSubUnitMasks.clear();
2270	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2271	// Track the maximum number of stall cycles that could arise either from the
2272	// latency of a DAG edge or the number of cycles that a processor resource is
2273	// reserved (SchedBoundary::ReservedCycles).
2274	MaxObservedStall = `0`;
2275	#endif
2276	// Reserve a zero-count for invalid CritResIdx.
2277	ExecutedResCounts.resize(N: `1`);
2278	assert(!ExecutedResCounts[`0`] && "nonzero count for bad resource");
2279	}
2280
2281	void SchedRemainder::
2282	init(ScheduleDAGMI DAG, const* TargetSchedModel *SchedModel) {
2283	reset();
2284	if (!SchedModel->hasInstrSchedModel())
2285	return;
2286	RemainingCounts.resize(N: SchedModel->getNumProcResourceKinds());
2287	for (SUnit &SU : DAG->SUnits) {
2288	const MCSchedClassDesc *SC = DAG->getSchedClass(SU: &SU);
2289	RemIssueCount += SchedModel->getNumMicroOps(MI: SU.getInstr(), SC)
2290	* SchedModel->getMicroOpFactor();
2291	for (TargetSchedModel::ProcResIter
2292	PI = SchedModel->getWriteProcResBegin(SC),
2293	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2294	unsigned PIdx = PI->ProcResourceIdx;
2295	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2296	assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2297	RemainingCounts [PIdx] +=
2298	(Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2299	}
2300	}
2301	}
2302
2303	void SchedBoundary::
2304	init(ScheduleDAGMI dag, const* TargetSchedModel smodel, SchedRemainder rem) {
2305	reset();
2306	DAG = dag;
2307	SchedModel = smodel;
2308	Rem = rem;
2309	if (SchedModel->hasInstrSchedModel()) {
2310	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2311	ReservedCyclesIndex.resize(N: ResourceCount);
2312	ExecutedResCounts.resize(N: ResourceCount);
2313	ResourceGroupSubUnitMasks.resize(N: ResourceCount, NV: APInt (ResourceCount, `0`));
2314	unsigned NumUnits = `0`;
2315
2316	for (unsigned i = `0`; i < ResourceCount; ++i) {
2317	ReservedCyclesIndex [i] = NumUnits;
2318	NumUnits += SchedModel->getProcResource(PIdx: i)->NumUnits;
2319	if (isUnbufferedGroup(PIdx: i)) {
2320	auto SubUnits = SchedModel->getProcResource(PIdx: i)->SubUnitsIdxBegin;
2321	for (unsigned U = `0`, UE = SchedModel->getProcResource(PIdx: i)->NumUnits;
2322	U != UE; ++U)
2323	ResourceGroupSubUnitMasks [i].setBit(SubUnits[U]);
2324	}
2325	}
2326
2327	ReservedCycles.resize(new_size: NumUnits, x: InvalidCycle);
2328	}
2329	}
2330
2331	/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2332	/// these "soft stalls" differently than the hard stall cycles based on CPU
2333	/// resources and computed by checkHazard(). A fully in-order model
2334	/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2335	/// available for scheduling until they are ready. However, a weaker in-order
2336	/// model may use this for heuristics. For example, if a processor has in-order
2337	/// behavior when reading certain resources, this may come into play.
2338	unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2339	if (!SU->isUnbuffered)
2340	return `0`;
2341
2342	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2343	if (ReadyCycle > CurrCycle)
2344	return ReadyCycle - CurrCycle;
2345	return `0`;
2346	}
2347
2348	/// Compute the next cycle at which the given processor resource unit
2349	/// can be scheduled.
2350	unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2351	unsigned ReleaseAtCycle,
2352	unsigned AcquireAtCycle) {
2353	if (SchedModel && SchedModel->enableIntervals()) {
2354	if (isTop())
2355	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromTop(
2356	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2357
2358	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromBottom(
2359	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2360	}
2361
2362	unsigned NextUnreserved = ReservedCycles [InstanceIdx];
2363	// If this resource has never been used, always return cycle zero.
2364	if (NextUnreserved == InvalidCycle)
2365	return CurrCycle;
2366	// For bottom-up scheduling add the cycles needed for the current operation.
2367	if (!isTop())
2368	NextUnreserved = std::max(a: CurrCycle, b: NextUnreserved + ReleaseAtCycle);
2369	return NextUnreserved;
2370	}
2371
2372	/// Compute the next cycle at which the given processor resource can be
2373	/// scheduled. Returns the next cycle and the index of the processor resource
2374	/// instance in the reserved cycles vector.
2375	std::pair<unsigned, unsigned>
2376	SchedBoundary::getNextResourceCycle(const MCSchedClassDesc SC, unsigned* PIdx,
2377	unsigned ReleaseAtCycle,
2378	unsigned AcquireAtCycle) {
2379	if (MischedDetailResourceBooking) {
2380	LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2381	LLVM_DEBUG(dumpReservedCycles());
2382	LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2383	}
2384	unsigned MinNextUnreserved = InvalidCycle;
2385	unsigned InstanceIdx = `0`;
2386	unsigned StartIndex = ReservedCyclesIndex [PIdx];
2387	unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2388	assert(NumberOfInstances > `0` &&
2389	"Cannot have zero instances of a ProcResource");
2390
2391	if (isUnbufferedGroup(PIdx)) {
2392	// If any subunits are used by the instruction, report that the
2393	// subunits of the resource group are available at the first cycle
2394	// in which the unit is available, effectively removing the group
2395	// record from hazarding and basing the hazarding decisions on the
2396	// subunit records. Otherwise, choose the first available instance
2397	// from among the subunits. Specifications which assign cycles to
2398	// both the subunits and the group or which use an unbuffered
2399	// group with buffered subunits will appear to schedule
2400	// strangely. In the first case, the additional cycles for the
2401	// group will be ignored. In the second, the group will be
2402	// ignored entirely.
2403	for (const MCWriteProcResEntry &PE :
2404	make_range(x: SchedModel->getWriteProcResBegin(SC),
2405	y: SchedModel->getWriteProcResEnd(SC)))
2406	if (ResourceGroupSubUnitMasks [PIdx][PE.ProcResourceIdx])
2407	return std::make_pair(x: getNextResourceCycleByInstance(
2408	InstanceIdx: StartIndex, ReleaseAtCycle, AcquireAtCycle),
2409	y&: StartIndex);
2410
2411	auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2412	for (unsigned I = `0`, End = NumberOfInstances; I < End; ++I) {
2413	unsigned NextUnreserved, NextInstanceIdx;
2414	std::tie(args&: NextUnreserved, args&: NextInstanceIdx) =
2415	getNextResourceCycle(SC, PIdx: SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2416	if (MinNextUnreserved > NextUnreserved) {
2417	InstanceIdx = NextInstanceIdx;
2418	MinNextUnreserved = NextUnreserved;
2419	}
2420	}
2421	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2422	}
2423
2424	for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2425	++I) {
2426	unsigned NextUnreserved =
2427	getNextResourceCycleByInstance(InstanceIdx: I, ReleaseAtCycle, AcquireAtCycle);
2428	if (MischedDetailResourceBooking)
2429	LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2430	<< NextUnreserved << "c\n");
2431	if (MinNextUnreserved > NextUnreserved) {
2432	InstanceIdx = I;
2433	MinNextUnreserved = NextUnreserved;
2434	}
2435	}
2436	if (MischedDetailResourceBooking)
2437	LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2438	<< "[" << InstanceIdx - StartIndex << "]"
2439	<< " available @" << MinNextUnreserved << "c"
2440	<< "\n");
2441	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2442	}
2443
2444	/// Does this SU have a hazard within the current instruction group.
2445	///
2446	/// The scheduler supports two modes of hazard recognition. The first is the
2447	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2448	/// supports highly complicated in-order reservation tables
2449	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2450	///
2451	/// The second is a streamlined mechanism that checks for hazards based on
2452	/// simple counters that the scheduler itself maintains. It explicitly checks
2453	/// for instruction dispatch limitations, including the number of micro-ops that
2454	/// can dispatch per cycle.
2455	///
2456	/// TODO: Also check whether the SU must start a new group.
2457	bool SchedBoundary::checkHazard(SUnit *SU) {
2458	if (HazardRec->isEnabled()
2459	&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2460	return true;
2461	}
2462
2463	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
2464	if ((CurrMOps > `0`) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2465	LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2466	<< SchedModel->getNumMicroOps(SU->getInstr()) << `'\n'`);
2467	return true;
2468	}
2469
2470	if (CurrMOps > `0` &&
2471	((isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr())) \|\|
2472	(!isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())))) {
2473	LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2474	<< (isTop() ? "begin" : "end") << " group\n");
2475	return true;
2476	}
2477
2478	if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2479	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2480	for (const MCWriteProcResEntry &PE :
2481	make_range(x: SchedModel->getWriteProcResBegin(SC),
2482	y: SchedModel->getWriteProcResEnd(SC))) {
2483	unsigned ResIdx = PE.ProcResourceIdx;
2484	unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2485	unsigned AcquireAtCycle = PE.AcquireAtCycle;
2486	unsigned NRCycle, InstanceIdx;
2487	std::tie(args&: NRCycle, args&: InstanceIdx) =
2488	getNextResourceCycle(SC, PIdx: ResIdx, ReleaseAtCycle, AcquireAtCycle);
2489	if (NRCycle > CurrCycle) {
2490	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2491	MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2492	#endif
2493	LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2494	<< SchedModel->getResourceName(ResIdx)
2495	<< `'['` << InstanceIdx - ReservedCyclesIndex[ResIdx] << `']'`
2496	<< "=" << NRCycle << "c\n");
2497	return true;
2498	}
2499	}
2500	}
2501	return false;
2502	}
2503
2504	// Find the unscheduled node in ReadySUs with the highest latency.
2505	unsigned SchedBoundary::
2506	findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2507	SUnit LateSU = nullptr*;
2508	unsigned RemLatency = `0`;
2509	for (SUnit *SU : ReadySUs) {
2510	unsigned L = getUnscheduledLatency(SU);
2511	if (L > RemLatency) {
2512	RemLatency = L;
2513	LateSU = SU;
2514	}
2515	}
2516	if (LateSU) {
2517	LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2518	<< LateSU->NodeNum << ") " << RemLatency << "c\n");
2519	}
2520	return RemLatency;
2521	}
2522
2523	// Count resources in this zone and the remaining unscheduled
2524	// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2525	// resource index, or zero if the zone is issue limited.
2526	unsigned SchedBoundary::
2527	getOtherResourceCount(unsigned &OtherCritIdx) {
2528	OtherCritIdx = `0`;
2529	if (!SchedModel->hasInstrSchedModel())
2530	return `0`;
2531
2532	unsigned OtherCritCount = Rem->RemIssueCount
2533	+ (RetiredMOps * SchedModel->getMicroOpFactor());
2534	LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2535	<< OtherCritCount / SchedModel->getMicroOpFactor() << `'\n'`);
2536	for (unsigned PIdx = `1`, PEnd = SchedModel->getNumProcResourceKinds();
2537	PIdx != PEnd; ++PIdx) {
2538	unsigned OtherCount = getResourceCount(ResIdx: PIdx) + Rem->RemainingCounts [PIdx];
2539	if (OtherCount > OtherCritCount) {
2540	OtherCritCount = OtherCount;
2541	OtherCritIdx = PIdx;
2542	}
2543	}
2544	if (OtherCritIdx) {
2545	LLVM_DEBUG(
2546	dbgs() << " " << Available.getName() << " + Remain CritRes: "
2547	<< OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2548	<< " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2549	}
2550	return OtherCritCount;
2551	}
2552
2553	void SchedBoundary::releaseNode(SUnit SU, unsigned* ReadyCycle, bool InPQueue,
2554	unsigned Idx) {
2555	assert(SU->getInstr() && "Scheduled SUnit must have instr");
2556
2557	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2558	// ReadyCycle was been bumped up to the CurrCycle when this node was
2559	// scheduled, but CurrCycle may have been eagerly advanced immediately after
2560	// scheduling, so may now be greater than ReadyCycle.
2561	if (ReadyCycle > CurrCycle)
2562	MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2563	#endif
2564
2565	if (ReadyCycle < MinReadyCycle)
2566	MinReadyCycle = ReadyCycle;
2567
2568	// Check for interlocks first. For the purpose of other heuristics, an
2569	// instruction that cannot issue appears as if it's not in the ReadyQueue.
2570	bool IsBuffered = SchedModel->getMicroOpBufferSize() != `0`;
2571	bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) \|\|
2572	checkHazard(SU) \|\| (Available.size() >= ReadyListLimit);
2573
2574	if (!HazardDetected) {
2575	Available.push(SU);
2576
2577	if (InPQueue)
2578	Pending.remove(I: Pending.begin() + Idx);
2579	return;
2580	}
2581
2582	if (!InPQueue)
2583	Pending.push(SU);
2584	}
2585
2586	/// Move the boundary of scheduled code by one cycle.
2587	void SchedBoundary::bumpCycle(unsigned NextCycle) {
2588	if (SchedModel->getMicroOpBufferSize() == `0`) {
2589	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2590	"MinReadyCycle uninitialized");
2591	if (MinReadyCycle > NextCycle)
2592	NextCycle = MinReadyCycle;
2593	}
2594	// Update the current micro-ops, which will issue in the next cycle.
2595	unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2596	CurrMOps = (CurrMOps <= DecMOps) ? `0` : CurrMOps - DecMOps;
2597
2598	// Decrement DependentLatency based on the next cycle.
2599	if ((NextCycle - CurrCycle) > DependentLatency)
2600	DependentLatency = `0`;
2601	else
2602	DependentLatency -= (NextCycle - CurrCycle);
2603
2604	if (!HazardRec->isEnabled()) {
2605	// Bypass HazardRec virtual calls.
2606	CurrCycle = NextCycle;
2607	} else {
2608	// Bypass getHazardType calls in case of long latency.
2609	for (; CurrCycle != NextCycle; ++CurrCycle) {
2610	if (isTop())
2611	HazardRec->AdvanceCycle();
2612	else
2613	HazardRec->RecedeCycle();
2614	}
2615	}
2616	CheckPending = true;
2617	IsResourceLimited =
2618	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2619	Latency: getScheduledLatency(), AfterSchedNode: true);
2620
2621	LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << `' '` << Available.getName()
2622	<< `'\n'`);
2623	}
2624
2625	void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2626	ExecutedResCounts [PIdx] += Count;
2627	if (ExecutedResCounts [PIdx] > MaxExecutedResCount)
2628	MaxExecutedResCount = ExecutedResCounts [PIdx];
2629	}
2630
2631	/// Add the given processor resource to this scheduled zone.
2632	///
2633	/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2634	/// cycles during which this resource is released.
2635	///
2636	/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2637	/// cycles at which the resource is aquired after issue (assuming no stalls).
2638	///
2639	/// \return the next cycle at which the instruction may execute without
2640	/// oversubscribing resources.
2641	unsigned SchedBoundary::countResource(const MCSchedClassDesc SC, unsigned* PIdx,
2642	unsigned ReleaseAtCycle,
2643	unsigned NextCycle,
2644	unsigned AcquireAtCycle) {
2645	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2646	unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2647	LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2648	<< ReleaseAtCycle << "x" << Factor << "u\n");
2649
2650	// Update Executed resources counts.
2651	incExecutedResources(PIdx, Count);
2652	assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2653	Rem->RemainingCounts [PIdx] -= Count;
2654
2655	// Check if this resource exceeds the current critical resource. If so, it
2656	// becomes the critical resource.
2657	if (ZoneCritResIdx != PIdx && (getResourceCount(ResIdx: PIdx) > getCriticalCount())) {
2658	ZoneCritResIdx = PIdx;
2659	LLVM_DEBUG(dbgs() << " *** Critical resource "
2660	<< SchedModel->getResourceName(PIdx) << ": "
2661	<< getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2662	<< "c\n");
2663	}
2664	// For reserved resources, record the highest cycle using the resource.
2665	unsigned NextAvailable, InstanceIdx;
2666	std::tie(args&: NextAvailable, args&: InstanceIdx) =
2667	getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2668	if (NextAvailable > CurrCycle) {
2669	LLVM_DEBUG(dbgs() << " Resource conflict: "
2670	<< SchedModel->getResourceName(PIdx)
2671	<< `'['` << InstanceIdx - ReservedCyclesIndex[PIdx] << `']'`
2672	<< " reserved until @" << NextAvailable << "\n");
2673	}
2674	return NextAvailable;
2675	}
2676
2677	/// Move the boundary of scheduled code by one SUnit.
2678	void SchedBoundary::bumpNode(SUnit *SU) {
2679	// Update the reservation table.
2680	if (HazardRec->isEnabled()) {
2681	if (!isTop() && SU->isCall) {
2682	// Calls are scheduled with their preceding instructions. For bottom-up
2683	// scheduling, clear the pipeline state before emitting.
2684	HazardRec->Reset();
2685	}
2686	HazardRec->EmitInstruction(SU);
2687	// Scheduling an instruction may have made pending instructions available.
2688	CheckPending = true;
2689	}
2690	// checkHazard should prevent scheduling multiple instructions per cycle that
2691	// exceed the issue width.
2692	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2693	unsigned IncMOps = SchedModel->getNumMicroOps(MI: SU->getInstr());
2694	assert(
2695	(CurrMOps == `0` \|\| (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2696	"Cannot schedule this instruction's MicroOps in the current cycle.");
2697
2698	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2699	LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2700
2701	unsigned NextCycle = CurrCycle;
2702	switch (SchedModel->getMicroOpBufferSize()) {
2703	case `0`:
2704	assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2705	break;
2706	case `1`:
2707	if (ReadyCycle > NextCycle) {
2708	NextCycle = ReadyCycle;
2709	LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2710	}
2711	break;
2712	default:
2713	// We don't currently model the OOO reorder buffer, so consider all
2714	// scheduled MOps to be "retired". We do loosely model in-order resource
2715	// latency. If this instruction uses an in-order resource, account for any
2716	// likely stall cycles.
2717	if (SU->isUnbuffered && ReadyCycle > NextCycle)
2718	NextCycle = ReadyCycle;
2719	break;
2720	}
2721	RetiredMOps += IncMOps;
2722
2723	// Update resource counts and critical resource.
2724	if (SchedModel->hasInstrSchedModel()) {
2725	unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2726	assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2727	Rem->RemIssueCount -= DecRemIssue;
2728	if (ZoneCritResIdx) {
2729	// Scale scheduled micro-ops for comparing with the critical resource.
2730	unsigned ScaledMOps =
2731	RetiredMOps * SchedModel->getMicroOpFactor();
2732
2733	// If scaled micro-ops are now more than the previous critical resource by
2734	// a full cycle, then micro-ops issue becomes critical.
2735	if ((int)(ScaledMOps - getResourceCount(ResIdx: ZoneCritResIdx))
2736	>= (int)SchedModel->getLatencyFactor()) {
2737	ZoneCritResIdx = `0`;
2738	LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2739	<< ScaledMOps / SchedModel->getLatencyFactor()
2740	<< "c\n");
2741	}
2742	}
2743	for (TargetSchedModel::ProcResIter
2744	PI = SchedModel->getWriteProcResBegin(SC),
2745	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2746	unsigned RCycle =
2747	countResource(SC, PIdx: PI->ProcResourceIdx, ReleaseAtCycle: PI->ReleaseAtCycle, NextCycle,
2748	AcquireAtCycle: PI->AcquireAtCycle);
2749	if (RCycle > NextCycle)
2750	NextCycle = RCycle;
2751	}
2752	if (SU->hasReservedResource) {
2753	// For reserved resources, record the highest cycle using the resource.
2754	// For top-down scheduling, this is the cycle in which we schedule this
2755	// instruction plus the number of cycles the operations reserves the
2756	// resource. For bottom-up is it simply the instruction's cycle.
2757	for (TargetSchedModel::ProcResIter
2758	PI = SchedModel->getWriteProcResBegin(SC),
2759	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2760	unsigned PIdx = PI->ProcResourceIdx;
2761	if (SchedModel->getProcResource(PIdx)->BufferSize == `0`) {
2762
2763	if (SchedModel && SchedModel->enableIntervals()) {
2764	unsigned ReservedUntil, InstanceIdx;
2765	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2766	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2767	if (isTop()) {
2768	ReservedResourceSegments [InstanceIdx].add(
2769	A: ResourceSegments::getResourceIntervalTop(
2770	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2771	CutOff: MIResourceCutOff);
2772	} else {
2773	ReservedResourceSegments [InstanceIdx].add(
2774	A: ResourceSegments::getResourceIntervalBottom(
2775	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2776	CutOff: MIResourceCutOff);
2777	}
2778	} else {
2779
2780	unsigned ReservedUntil, InstanceIdx;
2781	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2782	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2783	if (isTop()) {
2784	ReservedCycles [InstanceIdx] =
2785	std::max(a: ReservedUntil, b: NextCycle + PI->ReleaseAtCycle);
2786	} else
2787	ReservedCycles [InstanceIdx] = NextCycle;
2788	}
2789	}
2790	}
2791	}
2792	}
2793	// Update ExpectedLatency and DependentLatency.
2794	unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2795	unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2796	if (SU->getDepth() > TopLatency) {
2797	TopLatency = SU->getDepth();
2798	LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2799	<< SU->NodeNum << ") " << TopLatency << "c\n");
2800	}
2801	if (SU->getHeight() > BotLatency) {
2802	BotLatency = SU->getHeight();
2803	LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2804	<< SU->NodeNum << ") " << BotLatency << "c\n");
2805	}
2806	// If we stall for any reason, bump the cycle.
2807	if (NextCycle > CurrCycle)
2808	bumpCycle(NextCycle);
2809	else
2810	// After updating ZoneCritResIdx and ExpectedLatency, check if we're
2811	// resource limited. If a stall occurred, bumpCycle does this.
2812	IsResourceLimited =
2813	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2814	Latency: getScheduledLatency(), AfterSchedNode: true);
2815
2816	// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2817	// resets CurrMOps. Loop to handle instructions with more MOps than issue in
2818	// one cycle. Since we commonly reach the max MOps here, opportunistically
2819	// bump the cycle to avoid uselessly checking everything in the readyQ.
2820	CurrMOps += IncMOps;
2821
2822	// Bump the cycle count for issue group constraints.
2823	// This must be done after NextCycle has been adjust for all other stalls.
2824	// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2825	// currCycle to X.
2826	if ((isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())) \|\|
2827	(!isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr()))) {
2828	LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2829	<< " group\n");
2830	bumpCycle(NextCycle: ++NextCycle);
2831	}
2832
2833	while (CurrMOps >= SchedModel->getIssueWidth()) {
2834	LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2835	<< CurrCycle << `'\n'`);
2836	bumpCycle(NextCycle: ++NextCycle);
2837	}
2838	LLVM_DEBUG(dumpScheduledState());
2839	}
2840
2841	/// Release pending ready nodes in to the available queue. This makes them
2842	/// visible to heuristics.
2843	void SchedBoundary::releasePending() {
2844	// If the available queue is empty, it is safe to reset MinReadyCycle.
2845	if (Available.empty())
2846	MinReadyCycle = std::numeric_limits<unsigned>::max();
2847
2848	// Check to see if any of the pending instructions are ready to issue. If
2849	// so, add them to the available queue.
2850	for (unsigned I = `0`, E = Pending.size(); I < E; ++I) {
2851	SUnit SU = (Pending.begin() + I);
2852	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2853
2854	if (ReadyCycle < MinReadyCycle)
2855	MinReadyCycle = ReadyCycle;
2856
2857	if (Available.size() >= ReadyListLimit)
2858	break;
2859
2860	releaseNode(SU, ReadyCycle, InPQueue: true, Idx: I);
2861	if (E != Pending.size()) {
2862	--I;
2863	--E;
2864	}
2865	}
2866	CheckPending = false;
2867	}
2868
2869	/// Remove SU from the ready set for this boundary.
2870	void SchedBoundary::removeReady(SUnit *SU) {
2871	if (Available.isInQueue(SU))
2872	Available.remove(I: Available.find(SU));
2873	else {
2874	assert(Pending.isInQueue(SU) && "bad ready count");
2875	Pending.remove(I: Pending.find(SU));
2876	}
2877	}
2878
2879	/// If this queue only has one ready candidate, return it. As a side effect,
2880	/// defer any nodes that now hit a hazard, and advance the cycle until at least
2881	/// one node is ready. If multiple instructions are ready, return NULL.
2882	SUnit *SchedBoundary::pickOnlyChoice() {
2883	if (CheckPending)
2884	releasePending();
2885
2886	// Defer any ready instrs that now have a hazard.
2887	for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2888	if (checkHazard(SU: *I)) {
2889	Pending.push(SU: *I);
2890	I = Available.remove(I);
2891	continue;
2892	}
2893	++I;
2894	}
2895	for (unsigned i = `0`; Available.empty(); ++i) {
2896	// FIXME: Re-enable assert once PR20057 is resolved.
2897	// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2898	// "permanent hazard");
2899	(void)i;
2900	bumpCycle(NextCycle: CurrCycle + `1`);
2901	releasePending();
2902	}
2903
2904	LLVM_DEBUG(Pending.dump());
2905	LLVM_DEBUG(Available.dump());
2906
2907	if (Available.size() == `1`)
2908	return *Available.begin();
2909	return nullptr;
2910	}
2911
2912	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2913
2914	/// Dump the content of the \ref ReservedCycles vector for the
2915	/// resources that are used in the basic block.
2916	///
2917	LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
2918	if (!SchedModel->hasInstrSchedModel())
2919	return;
2920
2921	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2922	unsigned StartIdx = `0`;
2923
2924	for (unsigned ResIdx = `0`; ResIdx < ResourceCount; ++ResIdx) {
2925	const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2926	std::string ResName = SchedModel->getResourceName(ResIdx);
2927	for (unsigned UnitIdx = `0`; UnitIdx < NumUnits; ++UnitIdx) {
2928	dbgs() << ResName << "(" << UnitIdx << ") = ";
2929	if (SchedModel && SchedModel->enableIntervals()) {
2930	if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2931	dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2932	else
2933	dbgs() << "{ }\n";
2934	} else
2935	dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2936	}
2937	StartIdx += NumUnits;
2938	}
2939	}
2940
2941	// This is useful information to dump after bumpNode.
2942	// Note that the Queue contents are more useful before pickNodeFromQueue.
2943	LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2944	unsigned ResFactor;
2945	unsigned ResCount;
2946	if (ZoneCritResIdx) {
2947	ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2948	ResCount = getResourceCount(ZoneCritResIdx);
2949	} else {
2950	ResFactor = SchedModel->getMicroOpFactor();
2951	ResCount = RetiredMOps * ResFactor;
2952	}
2953	unsigned LFactor = SchedModel->getLatencyFactor();
2954	dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2955	<< " Retired: " << RetiredMOps;
2956	dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2957	dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2958	<< ResCount / ResFactor << " "
2959	<< SchedModel->getResourceName(ZoneCritResIdx)
2960	<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2961	<< (IsResourceLimited ? " - Resource" : " - Latency")
2962	<< " limited.\n";
2963	if (MISchedDumpReservedCycles)
2964	dumpReservedCycles();
2965	}
2966	#endif
2967
2968	//===----------------------------------------------------------------------===//
2969	// GenericScheduler - Generic implementation of MachineSchedStrategy.
2970	//===----------------------------------------------------------------------===//
2971
2972	void GenericSchedulerBase::SchedCandidate::
2973	initResourceDelta(const ScheduleDAGMI *DAG,
2974	const TargetSchedModel *SchedModel) {
2975	if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2976	return;
2977
2978	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2979	for (TargetSchedModel::ProcResIter
2980	PI = SchedModel->getWriteProcResBegin(SC),
2981	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2982	if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2983	ResDelta.CritResources += PI->ReleaseAtCycle;
2984	if (PI->ProcResourceIdx == Policy.DemandResIdx)
2985	ResDelta.DemandedResources += PI->ReleaseAtCycle;
2986	}
2987	}
2988
2989	/// Compute remaining latency. We need this both to determine whether the
2990	/// overall schedule has become latency-limited and whether the instructions
2991	/// outside this zone are resource or latency limited.
2992	///
2993	/// The "dependent" latency is updated incrementally during scheduling as the
2994	/// max height/depth of scheduled nodes minus the cycles since it was
2995	/// scheduled:
2996	/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2997	///
2998	/// The "independent" latency is the max ready queue depth:
2999	/// ILat = max N.depth for N in Available\|Pending
3000	///
3001	/// RemainingLatency is the greater of independent and dependent latency.
3002	///
3003	/// These computations are expensive, especially in DAGs with many edges, so
3004	/// only do them if necessary.
3005	static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3006	unsigned RemLatency = CurrZone.getDependentLatency();
3007	RemLatency = std::max(a: RemLatency,
3008	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Available.elements()));
3009	RemLatency = std::max(a: RemLatency,
3010	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Pending.elements()));
3011	return RemLatency;
3012	}
3013
3014	/// Returns true if the current cycle plus remaning latency is greater than
3015	/// the critical path in the scheduling region.
3016	bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3017	SchedBoundary &CurrZone,
3018	bool ComputeRemLatency,
3019	unsigned &RemLatency) const {
3020	// The current cycle is already greater than the critical path, so we are
3021	// already latency limited and don't need to compute the remaining latency.
3022	if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3023	return true;
3024
3025	// If we haven't scheduled anything yet, then we aren't latency limited.
3026	if (CurrZone.getCurrCycle() == `0`)
3027	return false;
3028
3029	if (ComputeRemLatency)
3030	RemLatency = computeRemLatency(CurrZone);
3031
3032	return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3033	}
3034
3035	/// Set the CandPolicy given a scheduling zone given the current resources and
3036	/// latencies inside and outside the zone.
3037	void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
3038	SchedBoundary &CurrZone,
3039	SchedBoundary *OtherZone) {
3040	// Apply preemptive heuristics based on the total latency and resources
3041	// inside and outside this zone. Potential stalls should be considered before
3042	// following this policy.
3043
3044	// Compute the critical resource outside the zone.
3045	unsigned OtherCritIdx = `0`;
3046	unsigned OtherCount =
3047	OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : `0`;
3048
3049	bool OtherResLimited = false;
3050	unsigned RemLatency = `0`;
3051	bool RemLatencyComputed = false;
3052	if (SchedModel->hasInstrSchedModel() && OtherCount != `0`) {
3053	RemLatency = computeRemLatency(CurrZone);
3054	RemLatencyComputed = true;
3055	OtherResLimited = checkResourceLimit(LFactor: SchedModel->getLatencyFactor(),
3056	Count: OtherCount, Latency: RemLatency, AfterSchedNode: false);
3057	}
3058
3059	// Schedule aggressively for latency in PostRA mode. We don't check for
3060	// acyclic latency during PostRA, and highly out-of-order processors will
3061	// skip PostRA scheduling.
3062	if (!OtherResLimited &&
3063	(IsPostRA \|\| shouldReduceLatency(Policy, CurrZone, ComputeRemLatency: !RemLatencyComputed,
3064	RemLatency))) {
3065	Policy.ReduceLatency \|= true;
3066	LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3067	<< " RemainingLatency " << RemLatency << " + "
3068	<< CurrZone.getCurrCycle() << "c > CritPath "
3069	<< Rem.CriticalPath << "\n");
3070	}
3071	// If the same resource is limiting inside and outside the zone, do nothing.
3072	if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3073	return;
3074
3075	LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3076	dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3077	<< SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3078	} if (OtherResLimited) dbgs()
3079	<< " RemainingLimit: "
3080	<< SchedModel->getResourceName(OtherCritIdx) << "\n";
3081	if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3082	<< " Latency limited both directions.\n");
3083
3084	if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3085	Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3086
3087	if (OtherResLimited)
3088	Policy.DemandResIdx = OtherCritIdx;
3089	}
3090
3091	#ifndef NDEBUG
3092	const char *GenericSchedulerBase::getReasonStr(
3093	GenericSchedulerBase::CandReason Reason) {
3094	switch (Reason) {
3095	case NoCand: return "NOCAND ";
3096	case Only1: return "ONLY1 ";
3097	case PhysReg: return "PHYS-REG ";
3098	case RegExcess: return "REG-EXCESS";
3099	case RegCritical: return "REG-CRIT ";
3100	case Stall: return "STALL ";
3101	case Cluster: return "CLUSTER ";
3102	case Weak: return "WEAK ";
3103	case RegMax: return "REG-MAX ";
3104	case ResourceReduce: return "RES-REDUCE";
3105	case ResourceDemand: return "RES-DEMAND";
3106	case TopDepthReduce: return "TOP-DEPTH ";
3107	case TopPathReduce: return "TOP-PATH ";
3108	case BotHeightReduce:return "BOT-HEIGHT";
3109	case BotPathReduce: return "BOT-PATH ";
3110	case NextDefUse: return "DEF-USE ";
3111	case NodeOrder: return "ORDER ";
3112	};
3113	llvm_unreachable("Unknown reason!");
3114	}
3115
3116	void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3117	PressureChange P;
3118	unsigned ResIdx = `0`;
3119	unsigned Latency = `0`;
3120	switch (Cand.Reason) {
3121	default:
3122	break;
3123	case RegExcess:
3124	P = Cand.RPDelta.Excess;
3125	break;
3126	case RegCritical:
3127	P = Cand.RPDelta.CriticalMax;
3128	break;
3129	case RegMax:
3130	P = Cand.RPDelta.CurrentMax;
3131	break;
3132	case ResourceReduce:
3133	ResIdx = Cand.Policy.ReduceResIdx;
3134	break;
3135	case ResourceDemand:
3136	ResIdx = Cand.Policy.DemandResIdx;
3137	break;
3138	case TopDepthReduce:
3139	Latency = Cand.SU->getDepth();
3140	break;
3141	case TopPathReduce:
3142	Latency = Cand.SU->getHeight();
3143	break;
3144	case BotHeightReduce:
3145	Latency = Cand.SU->getHeight();
3146	break;
3147	case BotPathReduce:
3148	Latency = Cand.SU->getDepth();
3149	break;
3150	}
3151	dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3152	if (P.isValid())
3153	dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3154	<< ":" << P.getUnitInc() << " ";
3155	else
3156	dbgs() << " ";
3157	if (ResIdx)
3158	dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3159	else
3160	dbgs() << " ";
3161	if (Latency)
3162	dbgs() << " " << Latency << " cycles ";
3163	else
3164	dbgs() << " ";
3165	dbgs() << `'\n'`;
3166	}
3167	#endif
3168
3169	namespace llvm {
3170	/// Return true if this heuristic determines order.
3171	/// TODO: Consider refactor return type of these functions as integer or enum,
3172	/// as we may need to differentiate whether TryCand is better than Cand.
3173	bool tryLess(int TryVal, int CandVal,
3174	GenericSchedulerBase::SchedCandidate &TryCand,
3175	GenericSchedulerBase::SchedCandidate &Cand,
3176	GenericSchedulerBase::CandReason Reason) {
3177	if (TryVal < CandVal) {
3178	TryCand.Reason = Reason;
3179	return true;
3180	}
3181	if (TryVal > CandVal) {
3182	if (Cand.Reason > Reason)
3183	Cand.Reason = Reason;
3184	return true;
3185	}
3186	return false;
3187	}
3188
3189	bool tryGreater(int TryVal, int CandVal,
3190	GenericSchedulerBase::SchedCandidate &TryCand,
3191	GenericSchedulerBase::SchedCandidate &Cand,
3192	GenericSchedulerBase::CandReason Reason) {
3193	if (TryVal > CandVal) {
3194	TryCand.Reason = Reason;
3195	return true;
3196	}
3197	if (TryVal < CandVal) {
3198	if (Cand.Reason > Reason)
3199	Cand.Reason = Reason;
3200	return true;
3201	}
3202	return false;
3203	}
3204
3205	bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3206	GenericSchedulerBase::SchedCandidate &Cand,
3207	SchedBoundary &Zone) {
3208	if (Zone.isTop()) {
3209	// Prefer the candidate with the lesser depth, but only if one of them has
3210	// depth greater than the total latency scheduled so far, otherwise either
3211	// of them could be scheduled now with no stall.
3212	if (std::max(a: TryCand.SU->getDepth(), b: Cand.SU->getDepth()) >
3213	Zone.getScheduledLatency()) {
3214	if (tryLess(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3215	TryCand, Cand, Reason: GenericSchedulerBase::TopDepthReduce))
3216	return true;
3217	}
3218	if (tryGreater(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3219	TryCand, Cand, Reason: GenericSchedulerBase::TopPathReduce))
3220	return true;
3221	} else {
3222	// Prefer the candidate with the lesser height, but only if one of them has
3223	// height greater than the total latency scheduled so far, otherwise either
3224	// of them could be scheduled now with no stall.
3225	if (std::max(a: TryCand.SU->getHeight(), b: Cand.SU->getHeight()) >
3226	Zone.getScheduledLatency()) {
3227	if (tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3228	TryCand, Cand, Reason: GenericSchedulerBase::BotHeightReduce))
3229	return true;
3230	}
3231	if (tryGreater(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3232	TryCand, Cand, Reason: GenericSchedulerBase::BotPathReduce))
3233	return true;
3234	}
3235	return false;
3236	}
3237	} // end namespace llvm
3238
3239	static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3240	LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3241	<< GenericSchedulerBase::getReasonStr(Reason) << `'\n'`);
3242	}
3243
3244	static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
3245	tracePick(Reason: Cand.Reason, IsTop: Cand.AtTop);
3246	}
3247
3248	void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3249	assert(dag->hasVRegLiveness() &&
3250	"(PreRA)GenericScheduler needs vreg liveness");
3251	DAG = static_cast<ScheduleDAGMILive*>(dag);
3252	SchedModel = DAG->getSchedModel();
3253	TRI = DAG->TRI;
3254
3255	if (RegionPolicy.ComputeDFSResult)
3256	DAG->computeDFSResult();
3257
3258	Rem.init(DAG, SchedModel);
3259	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3260	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3261
3262	// Initialize resource counts.
3263
3264	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3265	// are disabled, then these HazardRecs will be disabled.
3266	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3267	if (!Top.HazardRec) {
3268	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3269	}
3270	if (!Bot.HazardRec) {
3271	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3272	}
3273	TopCand.SU = nullptr;
3274	BotCand.SU = nullptr;
3275	}
3276
3277	/// Initialize the per-region scheduling policy.
3278	void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3279	MachineBasicBlock::iterator End,
3280	unsigned NumRegionInstrs) {
3281	const MachineFunction &MF = *Begin ->getMF();
3282	const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3283
3284	// Avoid setting up the register pressure tracker for small regions to save
3285	// compile time. As a rough heuristic, only track pressure when the number of
3286	// schedulable instructions exceeds half the allocatable integer register file
3287	// that is the largest legal integer regiser type.
3288	RegionPolicy.ShouldTrackPressure = true;
3289	for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3290	MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3291	if (TLI->isTypeLegal(VT: LegalIntVT)) {
3292	unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3293	RC: TLI->getRegClassFor(VT: LegalIntVT));
3294	RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / `2`);
3295	break;
3296	}
3297	}
3298
3299	// For generic targets, we default to bottom-up, because it's simpler and more
3300	// compile-time optimizations have been implemented in that direction.
3301	RegionPolicy.OnlyBottomUp = true;
3302
3303	// Allow the subtarget to override default policy.
3304	MF.getSubtarget().overrideSchedPolicy(Policy&: RegionPolicy, NumRegionInstrs);
3305
3306	// After subtarget overrides, apply command line options.
3307	if (!EnableRegPressure) {
3308	RegionPolicy.ShouldTrackPressure = false;
3309	RegionPolicy.ShouldTrackLaneMasks = false;
3310	}
3311
3312	// Check -misched-topdown/bottomup can force or unforce scheduling direction.
3313	// e.g. -misched-bottomup=false allows scheduling in both directions.
3314	assert((!ForceTopDown \|\| !ForceBottomUp) &&
3315	"-misched-topdown incompatible with -misched-bottomup");
3316	if (ForceBottomUp.getNumOccurrences() > `0`) {
3317	RegionPolicy.OnlyBottomUp = ForceBottomUp;
3318	if (RegionPolicy.OnlyBottomUp)
3319	RegionPolicy.OnlyTopDown = false;
3320	}
3321	if (ForceTopDown.getNumOccurrences() > `0`) {
3322	RegionPolicy.OnlyTopDown = ForceTopDown;
3323	if (RegionPolicy.OnlyTopDown)
3324	RegionPolicy.OnlyBottomUp = false;
3325	}
3326	}
3327
3328	void GenericScheduler::dumpPolicy() const {
3329	// Cannot completely remove virtual function even in release mode.
3330	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3331	dbgs() << "GenericScheduler RegionPolicy: "
3332	<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3333	<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3334	<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3335	<< "\n";
3336	#endif
3337	}
3338
3339	/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3340	/// critical path by more cycles than it takes to drain the instruction buffer.
3341	/// We estimate an upper bounds on in-flight instructions as:
3342	///
3343	/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3344	/// InFlightIterations = AcyclicPath / CyclesPerIteration
3345	/// InFlightResources = InFlightIterations LoopResources*
3346	///
3347	/// TODO: Check execution resources in addition to IssueCount.
3348	void GenericScheduler::checkAcyclicLatency() {
3349	if (Rem.CyclicCritPath == `0` \|\| Rem.CyclicCritPath >= Rem.CriticalPath)
3350	return;
3351
3352	// Scaled number of cycles per loop iteration.
3353	unsigned IterCount =
3354	std::max(a: Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3355	b: Rem.RemIssueCount);
3356	// Scaled acyclic critical path.
3357	unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3358	// InFlightCount = (AcyclicPath / IterCycles) InstrPerLoop*
3359	unsigned InFlightCount =
3360	(AcyclicCount * Rem.RemIssueCount + IterCount-`1`) / IterCount;
3361	unsigned BufferLimit =
3362	SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3363
3364	Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3365
3366	LLVM_DEBUG(
3367	dbgs() << "IssueCycles="
3368	<< Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3369	<< "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3370	<< "c NumIters=" << (AcyclicCount + IterCount - `1`) / IterCount
3371	<< " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3372	<< "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3373	if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3374	}
3375
3376	void GenericScheduler::registerRoots() {
3377	Rem.CriticalPath = DAG->ExitSU.getDepth();
3378
3379	// Some roots may not feed into ExitSU. Check all of them in case.
3380	for (const SUnit *SU : Bot.Available) {
3381	if (SU->getDepth() > Rem.CriticalPath)
3382	Rem.CriticalPath = SU->getDepth();
3383	}
3384	LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << `'\n'`);
3385	if (DumpCriticalPathLength) {
3386	errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3387	}
3388
3389	if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > `0`) {
3390	Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3391	checkAcyclicLatency();
3392	}
3393	}
3394
3395	namespace llvm {
3396	bool tryPressure(const PressureChange &TryP,
3397	const PressureChange &CandP,
3398	GenericSchedulerBase::SchedCandidate &TryCand,
3399	GenericSchedulerBase::SchedCandidate &Cand,
3400	GenericSchedulerBase::CandReason Reason,
3401	const TargetRegisterInfo *TRI,
3402	const MachineFunction &MF) {
3403	// If one candidate decreases and the other increases, go with it.
3404	// Invalid candidates have UnitInc==0.
3405	if (tryGreater(TryVal: TryP.getUnitInc() < `0`, CandVal: CandP.getUnitInc() < `0`, TryCand, Cand,
3406	Reason)) {
3407	return true;
3408	}
3409	// Do not compare the magnitude of pressure changes between top and bottom
3410	// boundary.
3411	if (Cand.AtTop != TryCand.AtTop)
3412	return false;
3413
3414	// If both candidates affect the same set in the same boundary, go with the
3415	// smallest increase.
3416	unsigned TryPSet = TryP.getPSetOrMax();
3417	unsigned CandPSet = CandP.getPSetOrMax();
3418	if (TryPSet == CandPSet) {
3419	return tryLess(TryVal: TryP.getUnitInc(), CandVal: CandP.getUnitInc(), TryCand, Cand,
3420	Reason);
3421	}
3422
3423	int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: TryPSet) :
3424	std::numeric_limits<int>::max();
3425
3426	int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: CandPSet) :
3427	std::numeric_limits<int>::max();
3428
3429	// If the candidates are decreasing pressure, reverse priority.
3430	if (TryP.getUnitInc() < `0`)
3431	std::swap(a&: TryRank, b&: CandRank);
3432	return tryGreater(TryVal: TryRank, CandVal: CandRank, TryCand, Cand, Reason);
3433	}
3434
3435	unsigned getWeakLeft(const SUnit SU, bool* isTop) {
3436	return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3437	}
3438
3439	/// Minimize physical register live ranges. Regalloc wants them adjacent to
3440	/// their physreg def/use.
3441	///
3442	/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3443	/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3444	/// with the operation that produces or consumes the physreg. We'll do this when
3445	/// regalloc has support for parallel copies.
3446	int biasPhysReg(const SUnit SU, bool* isTop) {
3447	const MachineInstr *MI = SU->getInstr();
3448
3449	if (MI->isCopy()) {
3450	unsigned ScheduledOper = isTop ? `1` : `0`;
3451	unsigned UnscheduledOper = isTop ? `0` : `1`;
3452	// If we have already scheduled the physreg produce/consumer, immediately
3453	// schedule the copy.
3454	if (MI->getOperand(i: ScheduledOper).getReg().isPhysical())
3455	return `1`;
3456	// If the physreg is at the boundary, defer it. Otherwise schedule it
3457	// immediately to free the dependent. We can hoist the copy later.
3458	bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3459	if (MI->getOperand(i: UnscheduledOper).getReg().isPhysical())
3460	return AtBoundary ? -`1` : `1`;
3461	}
3462
3463	if (MI->isMoveImmediate()) {
3464	// If we have a move immediate and all successors have been assigned, bias
3465	// towards scheduling this later. Make sure all register defs are to
3466	// physical registers.
3467	bool DoBias = true;
3468	for (const MachineOperand &Op : MI->defs()) {
3469	if (Op.isReg() && !Op.getReg().isPhysical()) {
3470	DoBias = false;
3471	break;
3472	}
3473	}
3474
3475	if (DoBias)
3476	return isTop ? -`1` : `1`;
3477	}
3478
3479	return `0`;
3480	}
3481	} // end namespace llvm
3482
3483	void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3484	bool AtTop,
3485	const RegPressureTracker &RPTracker,
3486	RegPressureTracker &TempTracker) {
3487	Cand.SU = SU;
3488	Cand.AtTop = AtTop;
3489	if (DAG->isTrackingPressure()) {
3490	if (AtTop) {
3491	TempTracker.getMaxDownwardPressureDelta(
3492	MI: Cand.SU->getInstr(),
3493	Delta&: Cand.RPDelta,
3494	CriticalPSets: DAG->getRegionCriticalPSets(),
3495	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3496	} else {
3497	if (VerifyScheduling) {
3498	TempTracker.getMaxUpwardPressureDelta(
3499	MI: Cand.SU->getInstr(),
3500	PDiff: &DAG->getPressureDiff(SU: Cand.SU),
3501	Delta&: Cand.RPDelta,
3502	CriticalPSets: DAG->getRegionCriticalPSets(),
3503	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3504	} else {
3505	RPTracker.getUpwardPressureDelta(
3506	MI: Cand.SU->getInstr(),
3507	PDiff&: DAG->getPressureDiff(SU: Cand.SU),
3508	Delta&: Cand.RPDelta,
3509	CriticalPSets: DAG->getRegionCriticalPSets(),
3510	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3511	}
3512	}
3513	}
3514	LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3515	<< " Try SU(" << Cand.SU->NodeNum << ") "
3516	<< TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3517	<< Cand.RPDelta.Excess.getUnitInc() << "\n");
3518	}
3519
3520	/// Apply a set of heuristics to a new candidate. Heuristics are currently
3521	/// hierarchical. This may be more efficient than a graduated cost model because
3522	/// we don't need to evaluate all aspects of the model for each node in the
3523	/// queue. But it's really done to make the heuristics easier to debug and
3524	/// statistically analyze.
3525	///
3526	/// \param Cand provides the policy and current best candidate.
3527	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3528	/// \param Zone describes the scheduled zone that we are extending, or nullptr
3529	/// if Cand is from a different zone than TryCand.
3530	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3531	bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3532	SchedCandidate &TryCand,
3533	SchedBoundary Zone) const* {
3534	// Initialize the candidate if needed.
3535	if (!Cand.isValid()) {
3536	TryCand.Reason = NodeOrder;
3537	return true;
3538	}
3539
3540	// Bias PhysReg Defs and copies to their uses and defined respectively.
3541	if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
3542	CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
3543	return TryCand.Reason != NoCand;
3544
3545	// Avoid exceeding the target's limit.
3546	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.Excess,
3547	CandP: Cand.RPDelta.Excess,
3548	TryCand, Cand, Reason: RegExcess, TRI,
3549	MF: DAG->MF))
3550	return TryCand.Reason != NoCand;
3551
3552	// Avoid increasing the max critical pressure in the scheduled region.
3553	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CriticalMax,
3554	CandP: Cand.RPDelta.CriticalMax,
3555	TryCand, Cand, Reason: RegCritical, TRI,
3556	MF: DAG->MF))
3557	return TryCand.Reason != NoCand;
3558
3559	// We only compare a subset of features when comparing nodes between
3560	// Top and Bottom boundary. Some properties are simply incomparable, in many
3561	// other instances we should only override the other boundary if something
3562	// is a clear good pick on one boundary. Skip heuristics that are more
3563	// "tie-breaking" in nature.
3564	bool SameBoundary = Zone != nullptr;
3565	if (SameBoundary) {
3566	// For loops that are acyclic path limited, aggressively schedule for
3567	// latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3568	// heuristics to take precedence.
3569	if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3570	tryLatency(TryCand, Cand, Zone&: *Zone))
3571	return TryCand.Reason != NoCand;
3572
3573	// Prioritize instructions that read unbuffered resources by stall cycles.
3574	if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
3575	CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3576	return TryCand.Reason != NoCand;
3577	}
3578
3579	// Keep clustered nodes together to encourage downstream peephole
3580	// optimizations which may reduce resource requirements.
3581	//
3582	// This is a best effort to set things up for a post-RA pass. Optimizations
3583	// like generating loads of multiple registers should ideally be done within
3584	// the scheduler pass by combining the loads during DAG postprocessing.
3585	const SUnit *CandNextClusterSU =
3586	Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3587	const SUnit *TryCandNextClusterSU =
3588	TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3589	if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU,
3590	CandVal: Cand.SU == CandNextClusterSU,
3591	TryCand, Cand, Reason: Cluster))
3592	return TryCand.Reason != NoCand;
3593
3594	if (SameBoundary) {
3595	// Weak edges are for clustering and other constraints.
3596	if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
3597	CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop),
3598	TryCand, Cand, Reason: Weak))
3599	return TryCand.Reason != NoCand;
3600	}
3601
3602	// Avoid increasing the max pressure of the entire region.
3603	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CurrentMax,
3604	CandP: Cand.RPDelta.CurrentMax,
3605	TryCand, Cand, Reason: RegMax, TRI,
3606	MF: DAG->MF))
3607	return TryCand.Reason != NoCand;
3608
3609	if (SameBoundary) {
3610	// Avoid critical resource consumption and balance the schedule.
3611	TryCand.initResourceDelta(DAG, SchedModel);
3612	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3613	TryCand, Cand, Reason: ResourceReduce))
3614	return TryCand.Reason != NoCand;
3615	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3616	CandVal: Cand.ResDelta.DemandedResources,
3617	TryCand, Cand, Reason: ResourceDemand))
3618	return TryCand.Reason != NoCand;
3619
3620	// Avoid serializing long latency dependence chains.
3621	// For acyclic path limited loops, latency was already checked above.
3622	if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3623	!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone))
3624	return TryCand.Reason != NoCand;
3625
3626	// Fall through to original instruction order.
3627	if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3628	\|\| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3629	TryCand.Reason = NodeOrder;
3630	return true;
3631	}
3632	}
3633
3634	return false;
3635	}
3636
3637	/// Pick the best candidate from the queue.
3638	///
3639	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3640	/// DAG building. To adjust for the current scheduling location we need to
3641	/// maintain the number of vreg uses remaining to be top-scheduled.
3642	void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3643	const CandPolicy &ZonePolicy,
3644	const RegPressureTracker &RPTracker,
3645	SchedCandidate &Cand) {
3646	// getMaxPressureDelta temporarily modifies the tracker.
3647	RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3648
3649	ReadyQueue &Q = Zone.Available;
3650	for (SUnit *SU : Q) {
3651
3652	SchedCandidate TryCand(ZonePolicy);
3653	initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, TempTracker);
3654	// Pass SchedBoundary only when comparing nodes from the same boundary.
3655	SchedBoundary ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr*;
3656	if (tryCandidate(Cand, TryCand, Zone: ZoneArg)) {
3657	// Initialize resource delta if needed in case future heuristics query it.
3658	if (TryCand.ResDelta == SchedResourceDelta ())
3659	TryCand.initResourceDelta(DAG, SchedModel);
3660	Cand.setBest(TryCand);
3661	LLVM_DEBUG(traceCandidate(Cand));
3662	}
3663	}
3664	}
3665
3666	/// Pick the best candidate node from either the top or bottom queue.
3667	SUnit GenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
3668	// Schedule as far as possible in the direction of no choice. This is most
3669	// efficient, but also provides the best heuristics for CriticalPSets.
3670	if (SUnit *SU = Bot.pickOnlyChoice()) {
3671	IsTopNode = false;
3672	tracePick(Reason: Only1, IsTop: false);
3673	return SU;
3674	}
3675	if (SUnit *SU = Top.pickOnlyChoice()) {
3676	IsTopNode = true;
3677	tracePick(Reason: Only1, IsTop: true);
3678	return SU;
3679	}
3680	// Set the bottom-up policy based on the state of the current bottom zone and
3681	// the instructions outside the zone, including the top zone.
3682	CandPolicy BotPolicy;
3683	setPolicy(Policy&: BotPolicy, /IsPostRA=/false, CurrZone&: Bot, OtherZone: &Top);
3684	// Set the top-down policy based on the state of the current top zone and
3685	// the instructions outside the zone, including the bottom zone.
3686	CandPolicy TopPolicy;
3687	setPolicy(Policy&: TopPolicy, /IsPostRA=/false, CurrZone&: Top, OtherZone: &Bot);
3688
3689	// See if BotCand is still valid (because we previously scheduled from Top).
3690	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3691	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
3692	BotCand.Policy != BotPolicy) {
3693	BotCand.reset(NewPolicy: CandPolicy ());
3694	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3695	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3696	} else {
3697	LLVM_DEBUG(traceCandidate(BotCand));
3698	#ifndef NDEBUG
3699	if (VerifyScheduling) {
3700	SchedCandidate TCand;
3701	TCand.reset(CandPolicy());
3702	pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3703	assert(TCand.SU == BotCand.SU &&
3704	"Last pick result should correspond to re-picking right now");
3705	}
3706	#endif
3707	}
3708
3709	// Check if the top Q has a better candidate.
3710	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3711	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
3712	TopCand.Policy != TopPolicy) {
3713	TopCand.reset(NewPolicy: CandPolicy ());
3714	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3715	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3716	} else {
3717	LLVM_DEBUG(traceCandidate(TopCand));
3718	#ifndef NDEBUG
3719	if (VerifyScheduling) {
3720	SchedCandidate TCand;
3721	TCand.reset(CandPolicy());
3722	pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3723	assert(TCand.SU == TopCand.SU &&
3724	"Last pick result should correspond to re-picking right now");
3725	}
3726	#endif
3727	}
3728
3729	// Pick best from BotCand and TopCand.
3730	assert(BotCand.isValid());
3731	assert(TopCand.isValid());
3732	SchedCandidate Cand = BotCand;
3733	TopCand.Reason = NoCand;
3734	if (tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr)) {
3735	Cand.setBest(TopCand);
3736	LLVM_DEBUG(traceCandidate(Cand));
3737	}
3738
3739	IsTopNode = Cand.AtTop;
3740	tracePick(Cand);
3741	return Cand.SU;
3742	}
3743
3744	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3745	SUnit GenericScheduler::pickNode(bool* &IsTopNode) {
3746	if (DAG->top() == DAG->bottom()) {
3747	assert(Top.Available.empty() && Top.Pending.empty() &&
3748	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3749	return nullptr;
3750	}
3751	SUnit *SU;
3752	do {
3753	if (RegionPolicy.OnlyTopDown) {
3754	SU = Top.pickOnlyChoice();
3755	if (!SU) {
3756	CandPolicy NoPolicy;
3757	TopCand.reset(NewPolicy: NoPolicy);
3758	pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3759	assert(TopCand.Reason != NoCand && "failed to find a candidate");
3760	tracePick(Cand: TopCand);
3761	SU = TopCand.SU;
3762	}
3763	IsTopNode = true;
3764	} else if (RegionPolicy.OnlyBottomUp) {
3765	SU = Bot.pickOnlyChoice();
3766	if (!SU) {
3767	CandPolicy NoPolicy;
3768	BotCand.reset(NewPolicy: NoPolicy);
3769	pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3770	assert(BotCand.Reason != NoCand && "failed to find a candidate");
3771	tracePick(Cand: BotCand);
3772	SU = BotCand.SU;
3773	}
3774	IsTopNode = false;
3775	} else {
3776	SU = pickNodeBidirectional(IsTopNode);
3777	}
3778	} while (SU->isScheduled);
3779
3780	// If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
3781	// if isTopReady(), then SU is in either Top.Available or Top.Pending.
3782	// If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
3783	// if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
3784	//
3785	// It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
3786	// isBottomReady. That is, it didn't factor into the decision to choose SU
3787	// because it isTopReady or isBottomReady, respectively. In fact, if the
3788	// RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
3789	// queues respectivley contain the original roots and don't get updated when
3790	// picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
3791	// because we schduled everything but the top roots. Conversley, if SU
3792	// isBottomReady on OnlyTopDown, then it was because we scheduled everything
3793	// but the bottom roots. If its in a queue even coincidentally, it should be
3794	// removed so it does not get re-picked in a subsequent pickNode call.
3795	if (SU->isTopReady())
3796	Top.removeReady(SU);
3797	if (SU->isBottomReady())
3798	Bot.removeReady(SU);
3799
3800	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3801	<< *SU->getInstr());
3802	return SU;
3803	}
3804
3805	void GenericScheduler::reschedulePhysReg(SUnit SU, bool* isTop) {
3806	MachineBasicBlock::iterator InsertPos = SU->getInstr();
3807	if (!isTop)
3808	++InsertPos;
3809	SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3810
3811	// Find already scheduled copies with a single physreg dependence and move
3812	// them just above the scheduled instruction.
3813	for (SDep &Dep : Deps) {
3814	if (Dep.getKind() != SDep::Data \|\|
3815	!Register::isPhysicalRegister(Reg: Dep.getReg()))
3816	continue;
3817	SUnit *DepSU = Dep.getSUnit();
3818	if (isTop ? DepSU->Succs.size() > `1` : DepSU->Preds.size() > `1`)
3819	continue;
3820	MachineInstr *Copy = DepSU->getInstr();
3821	if (!Copy->isCopy() && !Copy->isMoveImmediate())
3822	continue;
3823	LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3824	DAG->dumpNode(*Dep.getSUnit()));
3825	DAG->moveInstruction(MI: Copy, InsertPos);
3826	}
3827	}
3828
3829	/// Update the scheduler's state after scheduling a node. This is the same node
3830	/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3831	/// update it's state based on the current cycle before MachineSchedStrategy
3832	/// does.
3833	///
3834	/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3835	/// them here. See comments in biasPhysReg.
3836	void GenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
3837	if (IsTopNode) {
3838	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
3839	Top.bumpNode(SU);
3840	if (SU->hasPhysRegUses)
3841	reschedulePhysReg(SU, isTop: true);
3842	} else {
3843	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
3844	Bot.bumpNode(SU);
3845	if (SU->hasPhysRegDefs)
3846	reschedulePhysReg(SU, isTop: false);
3847	}
3848	}
3849
3850	/// Create the standard converging machine scheduler. This will be used as the
3851	/// default scheduler if the target does not set a default.
3852	ScheduleDAGMILive llvm::createGenericSchedLive(MachineSchedContext C) {
3853	ScheduleDAGMILive *DAG =
3854	new ScheduleDAGMILive (C, std::make_unique<GenericScheduler>(args&: C));
3855	// Register DAG post-processors.
3856	//
3857	// FIXME: extend the mutation API to allow earlier mutations to instantiate
3858	// data and pass it to later mutations. Have a single mutation that gathers
3859	// the interesting nodes in one pass.
3860	DAG->addMutation(Mutation: createCopyConstrainDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
3861
3862	const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3863	// Add MacroFusion mutation if fusions are not empty.
3864	const auto &MacroFusions = STI.getMacroFusions();
3865	if (!MacroFusions.empty())
3866	DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
3867	return DAG;
3868	}
3869
3870	static ScheduleDAGInstrs createConvergingSched(MachineSchedContext C) {
3871	return createGenericSchedLive(C);
3872	}
3873
3874	static MachineSchedRegistry
3875	GenericSchedRegistry("converge", "Standard converging scheduler.",
3876	createConvergingSched);
3877
3878	//===----------------------------------------------------------------------===//
3879	// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3880	//===----------------------------------------------------------------------===//
3881
3882	void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3883	DAG = Dag;
3884	SchedModel = DAG->getSchedModel();
3885	TRI = DAG->TRI;
3886
3887	Rem.init(DAG, SchedModel);
3888	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3889	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3890
3891	// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3892	// or are disabled, then these HazardRecs will be disabled.
3893	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3894	if (!Top.HazardRec) {
3895	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3896	}
3897	if (!Bot.HazardRec) {
3898	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3899	}
3900	}
3901
3902	void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3903	MachineBasicBlock::iterator End,
3904	unsigned NumRegionInstrs) {
3905	if (PostRADirection == MISchedPostRASched::TopDown) {
3906	RegionPolicy.OnlyTopDown = true;
3907	RegionPolicy.OnlyBottomUp = false;
3908	} else if (PostRADirection == MISchedPostRASched::BottomUp) {
3909	RegionPolicy.OnlyTopDown = false;
3910	RegionPolicy.OnlyBottomUp = true;
3911	} else if (PostRADirection == MISchedPostRASched::Bidirectional) {
3912	RegionPolicy.OnlyBottomUp = false;
3913	RegionPolicy.OnlyTopDown = false;
3914	}
3915	}
3916
3917	void PostGenericScheduler::registerRoots() {
3918	Rem.CriticalPath = DAG->ExitSU.getDepth();
3919
3920	// Some roots may not feed into ExitSU. Check all of them in case.
3921	for (const SUnit *SU : Bot.Available) {
3922	if (SU->getDepth() > Rem.CriticalPath)
3923	Rem.CriticalPath = SU->getDepth();
3924	}
3925	LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << `'\n'`);
3926	if (DumpCriticalPathLength) {
3927	errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3928	}
3929	}
3930
3931	/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3932	///
3933	/// \param Cand provides the policy and current best candidate.
3934	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3935	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3936	bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3937	SchedCandidate &TryCand) {
3938	// Initialize the candidate if needed.
3939	if (!Cand.isValid()) {
3940	TryCand.Reason = NodeOrder;
3941	return true;
3942	}
3943
3944	// Prioritize instructions that read unbuffered resources by stall cycles.
3945	if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU),
3946	CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3947	return TryCand.Reason != NoCand;
3948
3949	// Keep clustered nodes together.
3950	if (tryGreater(TryVal: TryCand.SU == DAG->getNextClusterSucc(),
3951	CandVal: Cand.SU == DAG->getNextClusterSucc(),
3952	TryCand, Cand, Reason: Cluster))
3953	return TryCand.Reason != NoCand;
3954
3955	// Avoid critical resource consumption and balance the schedule.
3956	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3957	TryCand, Cand, Reason: ResourceReduce))
3958	return TryCand.Reason != NoCand;
3959	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3960	CandVal: Cand.ResDelta.DemandedResources,
3961	TryCand, Cand, Reason: ResourceDemand))
3962	return TryCand.Reason != NoCand;
3963
3964	// Avoid serializing long latency dependence chains.
3965	if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Zone&: Top)) {
3966	return TryCand.Reason != NoCand;
3967	}
3968
3969	// Fall through to original instruction order.
3970	if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3971	TryCand.Reason = NodeOrder;
3972	return true;
3973	}
3974
3975	return false;
3976	}
3977
3978	void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3979	SchedCandidate &Cand) {
3980	ReadyQueue &Q = Zone.Available;
3981	for (SUnit *SU : Q) {
3982	SchedCandidate TryCand(Cand.Policy);
3983	TryCand.SU = SU;
3984	TryCand.AtTop = Zone.isTop();
3985	TryCand.initResourceDelta(DAG, SchedModel);
3986	if (tryCandidate(Cand, TryCand)) {
3987	Cand.setBest(TryCand);
3988	LLVM_DEBUG(traceCandidate(Cand));
3989	}
3990	}
3991	}
3992
3993	/// Pick the best candidate node from either the top or bottom queue.
3994	SUnit PostGenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
3995	// FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
3996	// out common parts.
3997
3998	// Schedule as far as possible in the direction of no choice. This is most
3999	// efficient, but also provides the best heuristics for CriticalPSets.
4000	if (SUnit *SU = Bot.pickOnlyChoice()) {
4001	IsTopNode = false;
4002	tracePick(Reason: Only1, IsTop: false);
4003	return SU;
4004	}
4005	if (SUnit *SU = Top.pickOnlyChoice()) {
4006	IsTopNode = true;
4007	tracePick(Reason: Only1, IsTop: true);
4008	return SU;
4009	}
4010	// Set the bottom-up policy based on the state of the current bottom zone and
4011	// the instructions outside the zone, including the top zone.
4012	CandPolicy BotPolicy;
4013	setPolicy(Policy&: BotPolicy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: &Top);
4014	// Set the top-down policy based on the state of the current top zone and
4015	// the instructions outside the zone, including the bottom zone.
4016	CandPolicy TopPolicy;
4017	setPolicy(Policy&: TopPolicy, /IsPostRA=/true, CurrZone&: Top, OtherZone: &Bot);
4018
4019	// See if BotCand is still valid (because we previously scheduled from Top).
4020	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4021	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
4022	BotCand.Policy != BotPolicy) {
4023	BotCand.reset(NewPolicy: CandPolicy ());
4024	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4025	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4026	} else {
4027	LLVM_DEBUG(traceCandidate(BotCand));
4028	#ifndef NDEBUG
4029	if (VerifyScheduling) {
4030	SchedCandidate TCand;
4031	TCand.reset(CandPolicy());
4032	pickNodeFromQueue(Bot, BotCand);
4033	assert(TCand.SU == BotCand.SU &&
4034	"Last pick result should correspond to re-picking right now");
4035	}
4036	#endif
4037	}
4038
4039	// Check if the top Q has a better candidate.
4040	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4041	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
4042	TopCand.Policy != TopPolicy) {
4043	TopCand.reset(NewPolicy: CandPolicy ());
4044	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4045	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4046	} else {
4047	LLVM_DEBUG(traceCandidate(TopCand));
4048	#ifndef NDEBUG
4049	if (VerifyScheduling) {
4050	SchedCandidate TCand;
4051	TCand.reset(CandPolicy());
4052	pickNodeFromQueue(Top, TopCand);
4053	assert(TCand.SU == TopCand.SU &&
4054	"Last pick result should correspond to re-picking right now");
4055	}
4056	#endif
4057	}
4058
4059	// Pick best from BotCand and TopCand.
4060	assert(BotCand.isValid());
4061	assert(TopCand.isValid());
4062	SchedCandidate Cand = BotCand;
4063	TopCand.Reason = NoCand;
4064	if (tryCandidate(Cand, TryCand&: TopCand)) {
4065	Cand.setBest(TopCand);
4066	LLVM_DEBUG(traceCandidate(Cand));
4067	}
4068
4069	IsTopNode = Cand.AtTop;
4070	tracePick(Cand);
4071	return Cand.SU;
4072	}
4073
4074	/// Pick the next node to schedule.
4075	SUnit PostGenericScheduler::pickNode(bool* &IsTopNode) {
4076	if (DAG->top() == DAG->bottom()) {
4077	assert(Top.Available.empty() && Top.Pending.empty() &&
4078	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4079	return nullptr;
4080	}
4081	SUnit *SU;
4082	do {
4083	if (RegionPolicy.OnlyBottomUp) {
4084	SU = Bot.pickOnlyChoice();
4085	if (SU) {
4086	tracePick(Reason: Only1, IsTop: true);
4087	} else {
4088	CandPolicy NoPolicy;
4089	BotCand.reset(NewPolicy: NoPolicy);
4090	// Set the bottom-up policy based on the state of the current bottom
4091	// zone and the instructions outside the zone, including the top zone.
4092	setPolicy(Policy&: BotCand.Policy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: nullptr);
4093	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4094	assert(BotCand.Reason != NoCand && "failed to find a candidate");
4095	tracePick(Cand: BotCand);
4096	SU = BotCand.SU;
4097	}
4098	IsTopNode = false;
4099	} else if (RegionPolicy.OnlyTopDown) {
4100	SU = Top.pickOnlyChoice();
4101	if (SU) {
4102	tracePick(Reason: Only1, IsTop: true);
4103	} else {
4104	CandPolicy NoPolicy;
4105	TopCand.reset(NewPolicy: NoPolicy);
4106	// Set the top-down policy based on the state of the current top zone
4107	// and the instructions outside the zone, including the bottom zone.
4108	setPolicy(Policy&: TopCand.Policy, /IsPostRA=/true, CurrZone&: Top, OtherZone: nullptr);
4109	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4110	assert(TopCand.Reason != NoCand && "failed to find a candidate");
4111	tracePick(Cand: TopCand);
4112	SU = TopCand.SU;
4113	}
4114	IsTopNode = true;
4115	} else {
4116	SU = pickNodeBidirectional(IsTopNode);
4117	}
4118	} while (SU->isScheduled);
4119
4120	if (SU->isTopReady())
4121	Top.removeReady(SU);
4122	if (SU->isBottomReady())
4123	Bot.removeReady(SU);
4124
4125	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4126	<< *SU->getInstr());
4127	return SU;
4128	}
4129
4130	/// Called after ScheduleDAGMI has scheduled an instruction and updated
4131	/// scheduled/remaining flags in the DAG nodes.
4132	void PostGenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
4133	if (IsTopNode) {
4134	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
4135	Top.bumpNode(SU);
4136	} else {
4137	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
4138	Bot.bumpNode(SU);
4139	}
4140	}
4141
4142	ScheduleDAGMI llvm::createGenericSchedPostRA(MachineSchedContext C) {
4143	ScheduleDAGMI *DAG =
4144	new ScheduleDAGMI (C, std::make_unique<PostGenericScheduler>(args&: C),
4145	/RemoveKillFlags=/true);
4146	const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4147	// Add MacroFusion mutation if fusions are not empty.
4148	const auto &MacroFusions = STI.getMacroFusions();
4149	if (!MacroFusions.empty())
4150	DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
4151	return DAG;
4152	}
4153
4154	//===----------------------------------------------------------------------===//
4155	// ILP Scheduler. Currently for experimental analysis of heuristics.
4156	//===----------------------------------------------------------------------===//
4157
4158	namespace {
4159
4160	/// Order nodes by the ILP metric.
4161	struct ILPOrder {
4162	const SchedDFSResult DFSResult = nullptr*;
4163	const BitVector ScheduledTrees = nullptr*;
4164	bool MaximizeILP;
4165
4166	ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4167
4168	/// Apply a less-than relation on node priority.
4169	///
4170	/// (Return true if A comes after B in the Q.)
4171	bool operator()(const SUnit A, const* SUnit B) const* {
4172	unsigned SchedTreeA = DFSResult->getSubtreeID(SU: A);
4173	unsigned SchedTreeB = DFSResult->getSubtreeID(SU: B);
4174	if (SchedTreeA != SchedTreeB) {
4175	// Unscheduled trees have lower priority.
4176	if (ScheduledTrees->test(Idx: SchedTreeA) != ScheduledTrees->test(Idx: SchedTreeB))
4177	return ScheduledTrees->test(Idx: SchedTreeB);
4178
4179	// Trees with shallower connections have lower priority.
4180	if (DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4181	!= DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB)) {
4182	return DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4183	< DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB);
4184	}
4185	}
4186	if (MaximizeILP)
4187	return DFSResult->getILP(SU: A) < DFSResult->getILP(SU: B);
4188	else
4189	return DFSResult->getILP(SU: A) > DFSResult->getILP(SU: B);
4190	}
4191	};
4192
4193	/// Schedule based on the ILP metric.
4194	class ILPScheduler : public MachineSchedStrategy {
4195	ScheduleDAGMILive DAG = nullptr*;
4196	ILPOrder Cmp;
4197
4198	std::vector<SUnit*> ReadyQ;
4199
4200	public:
4201	ILPScheduler(bool MaximizeILP) : Cmp (MaximizeILP) {}
4202
4203	void initialize(ScheduleDAGMI *dag) override {
4204	assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4205	DAG = static_cast<ScheduleDAGMILive*>(dag);
4206	DAG->computeDFSResult();
4207	Cmp.DFSResult = DAG->getDFSResult();
4208	Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4209	ReadyQ.clear();
4210	}
4211
4212	void registerRoots() override {
4213	// Restore the heap in ReadyQ with the updated DFS results.
4214	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4215	}
4216
4217	/// Implement MachineSchedStrategy interface.
4218	/// -----------------------------------------
4219
4220	/// Callback to select the highest priority node from the ready Q.
4221	SUnit pickNode(bool* &IsTopNode) override {
4222	if (ReadyQ.empty()) return nullptr;
4223	std::pop_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4224	SUnit *SU = ReadyQ.back();
4225	ReadyQ.pop_back();
4226	IsTopNode = false;
4227	LLVM_DEBUG(dbgs() << "Pick node "
4228	<< "SU(" << SU->NodeNum << ") "
4229	<< " ILP: " << DAG->getDFSResult()->getILP(SU)
4230	<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4231	<< " @"
4232	<< DAG->getDFSResult()->getSubtreeLevel(
4233	DAG->getDFSResult()->getSubtreeID(SU))
4234	<< `'\n'`
4235	<< "Scheduling " << *SU->getInstr());
4236	return SU;
4237	}
4238
4239	/// Scheduler callback to notify that a new subtree is scheduled.
4240	void scheduleTree(unsigned SubtreeID) override {
4241	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4242	}
4243
4244	/// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4245	/// DFSResults, and resort the priority Q.
4246	void schedNode(SUnit SU, bool* IsTopNode) override {
4247	assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4248	}
4249
4250	void releaseTopNode(SUnit ) override { /only called for top roots/* }
4251
4252	void releaseBottomNode(SUnit *SU) override {
4253	ReadyQ.push_back(x: SU);
4254	std::push_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4255	}
4256	};
4257
4258	} // end anonymous namespace
4259
4260	static ScheduleDAGInstrs createILPMaxScheduler(MachineSchedContext C) {
4261	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: true));
4262	}
4263	static ScheduleDAGInstrs createILPMinScheduler(MachineSchedContext C) {
4264	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: false));
4265	}
4266
4267	static MachineSchedRegistry ILPMaxRegistry(
4268	"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4269	static MachineSchedRegistry ILPMinRegistry(
4270	"ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4271
4272	//===----------------------------------------------------------------------===//
4273	// Machine Instruction Shuffler for Correctness Testing
4274	//===----------------------------------------------------------------------===//
4275
4276	#ifndef NDEBUG
4277	namespace {
4278
4279	/// Apply a less-than relation on the node order, which corresponds to the
4280	/// instruction order prior to scheduling. IsReverse implements greater-than.
4281	template<bool IsReverse>
4282	struct SUnitOrder {
4283	bool operator()(SUnit A, SUnit B) const {
4284	if (IsReverse)
4285	return A->NodeNum > B->NodeNum;
4286	else
4287	return A->NodeNum < B->NodeNum;
4288	}
4289	};
4290
4291	/// Reorder instructions as much as possible.
4292	class InstructionShuffler : public MachineSchedStrategy {
4293	bool IsAlternating;
4294	bool IsTopDown;
4295
4296	// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4297	// gives nodes with a higher number higher priority causing the latest
4298	// instructions to be scheduled first.
4299	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<false>>
4300	TopQ;
4301
4302	// When scheduling bottom-up, use greater-than as the queue priority.
4303	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<true>>
4304	BottomQ;
4305
4306	public:
4307	InstructionShuffler(bool alternate, bool topdown)
4308	: IsAlternating(alternate), IsTopDown(topdown) {}
4309
4310	void initialize(ScheduleDAGMI*) override {
4311	TopQ.clear();
4312	BottomQ.clear();
4313	}
4314
4315	/// Implement MachineSchedStrategy interface.
4316	/// -----------------------------------------
4317
4318	SUnit pickNode(bool* &IsTopNode) override {
4319	SUnit *SU;
4320	if (IsTopDown) {
4321	do {
4322	if (TopQ.empty()) return nullptr;
4323	SU = TopQ.top();
4324	TopQ.pop();
4325	} while (SU->isScheduled);
4326	IsTopNode = true;
4327	} else {
4328	do {
4329	if (BottomQ.empty()) return nullptr;
4330	SU = BottomQ.top();
4331	BottomQ.pop();
4332	} while (SU->isScheduled);
4333	IsTopNode = false;
4334	}
4335	if (IsAlternating)
4336	IsTopDown = !IsTopDown;
4337	return SU;
4338	}
4339
4340	void schedNode(SUnit SU, bool* IsTopNode) override {}
4341
4342	void releaseTopNode(SUnit *SU) override {
4343	TopQ.push(SU);
4344	}
4345	void releaseBottomNode(SUnit *SU) override {
4346	BottomQ.push(SU);
4347	}
4348	};
4349
4350	} // end anonymous namespace
4351
4352	static ScheduleDAGInstrs createInstructionShuffler(MachineSchedContext C) {
4353	bool Alternate = !ForceTopDown && !ForceBottomUp;
4354	bool TopDown = !ForceBottomUp;
4355	assert((TopDown \|\| !ForceTopDown) &&
4356	"-misched-topdown incompatible with -misched-bottomup");
4357	return new ScheduleDAGMILive(
4358	C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4359	}
4360
4361	static MachineSchedRegistry ShufflerRegistry(
4362	"shuffle", "Shuffle machine instructions alternating directions",
4363	createInstructionShuffler);
4364	#endif // !NDEBUG
4365
4366	//===----------------------------------------------------------------------===//
4367	// GraphWriter support for ScheduleDAGMILive.
4368	//===----------------------------------------------------------------------===//
4369
4370	#ifndef NDEBUG
4371	namespace llvm {
4372
4373	template<> struct GraphTraits<
4374	ScheduleDAGMI> : public* GraphTraits<ScheduleDAG*> {};
4375
4376	template<>
4377	struct DOTGraphTraits<ScheduleDAGMI> : public* DefaultDOTGraphTraits {
4378	DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
4379
4380	static std::string getGraphName(const ScheduleDAG *G) {
4381	return std::string(G->MF.getName());
4382	}
4383
4384	static bool renderGraphFromBottomUp() {
4385	return true;
4386	}
4387
4388	static bool isNodeHidden(const SUnit Node, const* ScheduleDAG *G) {
4389	if (ViewMISchedCutoff == `0`)
4390	return false;
4391	return (Node->Preds.size() > ViewMISchedCutoff
4392	\|\| Node->Succs.size() > ViewMISchedCutoff);
4393	}
4394
4395	/// If you want to override the dot attributes printed for a particular
4396	/// edge, override this method.
4397	static std::string getEdgeAttributes(const SUnit *Node,
4398	SUnitIterator EI,
4399	const ScheduleDAG *Graph) {
4400	if (EI.isArtificialDep())
4401	return "color=cyan,style=dashed";
4402	if (EI.isCtrlDep())
4403	return "color=blue,style=dashed";
4404	return "";
4405	}
4406
4407	static std::string getNodeLabel(const SUnit SU, const* ScheduleDAG *G) {
4408	std::string Str;
4409	raw_string_ostream SS(Str);
4410	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4411	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4412	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4413	SS << "SU:" << SU->NodeNum;
4414	if (DFS)
4415	SS << " I:" << DFS->getNumInstrs(SU);
4416	return Str;
4417	}
4418
4419	static std::string getNodeDescription(const SUnit SU, const* ScheduleDAG *G) {
4420	return G->getGraphNodeLabel(SU);
4421	}
4422
4423	static std::string getNodeAttributes(const SUnit N, const* ScheduleDAG *G) {
4424	std::string Str("shape=Mrecord");
4425	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4426	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4427	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4428	if (DFS) {
4429	Str += ",style=filled,fillcolor=\"#";
4430	Str += DOT::getColorString(DFS->getSubtreeID(N));
4431	Str += `'"'`;
4432	}
4433	return Str;
4434	}
4435	};
4436
4437	} // end namespace llvm
4438	#endif // NDEBUG
4439
4440	/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4441	/// rendered using 'dot'.
4442	void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4443	#ifndef NDEBUG
4444	ViewGraph(this, Name, false, Title);
4445	#else
4446	errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4447	<< "systems with Graphviz or gv!\n";
4448	#endif // NDEBUG
4449	}
4450
4451	/// Out-of-line implementation with no arguments is handy for gdb.
4452	void ScheduleDAGMI::viewGraph() {
4453	viewGraph(Name: getDAGName(), Title: "Scheduling-Units Graph for " + getDAGName());
4454	}
4455
4456	/// Sort predicate for the intervals stored in an instance of
4457	/// ResourceSegments. Intervals are always disjoint (no intersection
4458	/// for any pairs of intervals), therefore we can sort the totality of
4459	/// the intervals by looking only at the left boundary.
4460	static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4461	const ResourceSegments::IntervalTy &B) {
4462	return A.first < B.first;
4463	}
4464
4465	unsigned ResourceSegments::getFirstAvailableAt(
4466	unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4467	std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4468	IntervalBuilder) const {
4469	assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4470	sortIntervals) &&
4471	"Cannot execute on an un-sorted set of intervals.");
4472
4473	// Zero resource usage is allowed by TargetSchedule.td but we do not construct
4474	// a ResourceSegment interval for that situation.
4475	if (AcquireAtCycle == ReleaseAtCycle)
4476	return CurrCycle;
4477
4478	unsigned RetCycle = CurrCycle;
4479	ResourceSegments::IntervalTy NewInterval =
4480	IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4481	for (auto &Interval : _Intervals) {
4482	if (!intersects(A: NewInterval, B: Interval))
4483	continue;
4484
4485	// Move the interval right next to the top of the one it
4486	// intersects.
4487	assert(Interval.second > NewInterval.first &&
4488	"Invalid intervals configuration.");
4489	RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4490	NewInterval = IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4491	}
4492	return RetCycle;
4493	}
4494
4495	void ResourceSegments::add(ResourceSegments::IntervalTy A,
4496	const unsigned CutOff) {
4497	assert(A.first <= A.second && "Cannot add negative resource usage");
4498	assert(CutOff > `0` && "0-size interval history has no use.");
4499	// Zero resource usage is allowed by TargetSchedule.td, in the case that the
4500	// instruction needed the resource to be available but does not use it.
4501	// However, ResourceSegment represents an interval that is closed on the left
4502	// and open on the right. It is impossible to represent an empty interval when
4503	// the left is closed. Do not add it to Intervals.
4504	if (A.first == A.second)
4505	return;
4506
4507	assert(all_of(_Intervals,
4508	[&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4509	return !intersects(A, Interval);
4510	}) &&
4511	"A resource is being overwritten");
4512	_Intervals.push_back(x: A);
4513
4514	sortAndMerge();
4515
4516	// Do not keep the full history of the intervals, just the
4517	// latest #CutOff.
4518	while (_Intervals.size() > CutOff)
4519	_Intervals.pop_front();
4520	}
4521
4522	bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4523	ResourceSegments::IntervalTy B) {
4524	assert(A.first <= A.second && "Invalid interval");
4525	assert(B.first <= B.second && "Invalid interval");
4526
4527	// Share one boundary.
4528	if ((A.first == B.first) \|\| (A.second == B.second))
4529	return true;
4530
4531	// full intersersect: [ ** ) B*
4532	// [*) A
4533	if ((A.first > B.first) && (A.second < B.second))
4534	return true;
4535
4536	// right intersect: [ *) B
4537	// [** ) A*
4538	if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4539	return true;
4540
4541	// left intersect: [** ) B*
4542	// [ *) A
4543	if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4544	return true;
4545
4546	return false;
4547	}
4548
4549	void ResourceSegments::sortAndMerge() {
4550	if (_Intervals.size() <= `1`)
4551	return;
4552
4553	// First sort the collection.
4554	_Intervals.sort(sortIntervals);
4555
4556	// can use next because I have at least 2 elements in the list
4557	auto next = std::next(x: std::begin(cont&: _Intervals));
4558	auto E = std::end(cont&: _Intervals);
4559	for (; next != E; ++next) {
4560	if (std::prev(x: next)->second >= next ->first) {
4561	next ->first = std::prev(x: next)->first;
4562	_Intervals.erase(position: std::prev(x: next));
4563	continue;
4564	}
4565	}
4566	}
4567

Browse the source code of llvm_projects/llvm/lib/CodeGen/MachineScheduler.cpp