MachineScheduler.cpp source code [llvm_projects/llvm/lib/CodeGen/MachineScheduler.cpp]

1	//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/MachineScheduler.h"
15	#include "llvm/ADT/ArrayRef.h"
16	#include "llvm/ADT/BitVector.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/EquivalenceClasses.h"
19	#include "llvm/ADT/PriorityQueue.h"
20	#include "llvm/ADT/STLExtras.h"
21	#include "llvm/ADT/SmallVector.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/ADT/iterator_range.h"
24	#include "llvm/Analysis/AliasAnalysis.h"
25	#include "llvm/CodeGen/LiveInterval.h"
26	#include "llvm/CodeGen/LiveIntervals.h"
27	#include "llvm/CodeGen/MachineBasicBlock.h"
28	#include "llvm/CodeGen/MachineDominators.h"
29	#include "llvm/CodeGen/MachineFunction.h"
30	#include "llvm/CodeGen/MachineFunctionPass.h"
31	#include "llvm/CodeGen/MachineInstr.h"
32	#include "llvm/CodeGen/MachineLoopInfo.h"
33	#include "llvm/CodeGen/MachineOperand.h"
34	#include "llvm/CodeGen/MachinePassRegistry.h"
35	#include "llvm/CodeGen/MachineRegisterInfo.h"
36	#include "llvm/CodeGen/RegisterClassInfo.h"
37	#include "llvm/CodeGen/RegisterPressure.h"
38	#include "llvm/CodeGen/ScheduleDAG.h"
39	#include "llvm/CodeGen/ScheduleDAGInstrs.h"
40	#include "llvm/CodeGen/ScheduleDAGMutation.h"
41	#include "llvm/CodeGen/ScheduleDFS.h"
42	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
43	#include "llvm/CodeGen/SlotIndexes.h"
44	#include "llvm/CodeGen/TargetFrameLowering.h"
45	#include "llvm/CodeGen/TargetInstrInfo.h"
46	#include "llvm/CodeGen/TargetLowering.h"
47	#include "llvm/CodeGen/TargetPassConfig.h"
48	#include "llvm/CodeGen/TargetRegisterInfo.h"
49	#include "llvm/CodeGen/TargetSchedule.h"
50	#include "llvm/CodeGen/TargetSubtargetInfo.h"
51	#include "llvm/CodeGenTypes/MachineValueType.h"
52	#include "llvm/Config/llvm-config.h"
53	#include "llvm/InitializePasses.h"
54	#include "llvm/MC/LaneBitmask.h"
55	#include "llvm/Pass.h"
56	#include "llvm/Support/CommandLine.h"
57	#include "llvm/Support/Compiler.h"
58	#include "llvm/Support/Debug.h"
59	#include "llvm/Support/ErrorHandling.h"
60	#include "llvm/Support/GraphWriter.h"
61	#include "llvm/Support/raw_ostream.h"
62	#include "llvm/Target/TargetMachine.h"
63	#include <algorithm>
64	#include <cassert>
65	#include <cstdint>
66	#include <iterator>
67	#include <limits>
68	#include <memory>
69	#include <string>
70	#include <tuple>
71	#include <utility>
72	#include <vector>
73
74	using namespace llvm;
75
76	#define DEBUG_TYPE "machine-scheduler"
77
78	STATISTIC(NumInstrsInSourceOrderPreRA,
79	"Number of instructions in source order after pre-RA scheduling");
80	STATISTIC(NumInstrsInSourceOrderPostRA,
81	"Number of instructions in source order after post-RA scheduling");
82	STATISTIC(NumInstrsScheduledPreRA,
83	"Number of instructions scheduled by pre-RA scheduler");
84	STATISTIC(NumInstrsScheduledPostRA,
85	"Number of instructions scheduled by post-RA scheduler");
86	STATISTIC(NumClustered, "Number of load/store pairs clustered");
87
88	STATISTIC(NumTopPreRA,
89	"Number of scheduling units chosen from top queue pre-RA");
90	STATISTIC(NumBotPreRA,
91	"Number of scheduling units chosen from bottom queue pre-RA");
92	STATISTIC(NumNoCandPreRA,
93	"Number of scheduling units chosen for NoCand heuristic pre-RA");
94	STATISTIC(NumOnly1PreRA,
95	"Number of scheduling units chosen for Only1 heuristic pre-RA");
96	STATISTIC(NumPhysRegPreRA,
97	"Number of scheduling units chosen for PhysReg heuristic pre-RA");
98	STATISTIC(NumRegExcessPreRA,
99	"Number of scheduling units chosen for RegExcess heuristic pre-RA");
100	STATISTIC(NumRegCriticalPreRA,
101	"Number of scheduling units chosen for RegCritical heuristic pre-RA");
102	STATISTIC(NumStallPreRA,
103	"Number of scheduling units chosen for Stall heuristic pre-RA");
104	STATISTIC(NumClusterPreRA,
105	"Number of scheduling units chosen for Cluster heuristic pre-RA");
106	STATISTIC(NumWeakPreRA,
107	"Number of scheduling units chosen for Weak heuristic pre-RA");
108	STATISTIC(NumRegMaxPreRA,
109	"Number of scheduling units chosen for RegMax heuristic pre-RA");
110	STATISTIC(
111	NumResourceReducePreRA,
112	"Number of scheduling units chosen for ResourceReduce heuristic pre-RA");
113	STATISTIC(
114	NumResourceDemandPreRA,
115	"Number of scheduling units chosen for ResourceDemand heuristic pre-RA");
116	STATISTIC(
117	NumTopDepthReducePreRA,
118	"Number of scheduling units chosen for TopDepthReduce heuristic pre-RA");
119	STATISTIC(
120	NumTopPathReducePreRA,
121	"Number of scheduling units chosen for TopPathReduce heuristic pre-RA");
122	STATISTIC(
123	NumBotHeightReducePreRA,
124	"Number of scheduling units chosen for BotHeightReduce heuristic pre-RA");
125	STATISTIC(
126	NumBotPathReducePreRA,
127	"Number of scheduling units chosen for BotPathReduce heuristic pre-RA");
128	STATISTIC(NumNodeOrderPreRA,
129	"Number of scheduling units chosen for NodeOrder heuristic pre-RA");
130	STATISTIC(NumFirstValidPreRA,
131	"Number of scheduling units chosen for FirstValid heuristic pre-RA");
132
133	STATISTIC(NumTopPostRA,
134	"Number of scheduling units chosen from top queue post-RA");
135	STATISTIC(NumBotPostRA,
136	"Number of scheduling units chosen from bottom queue post-RA");
137	STATISTIC(NumNoCandPostRA,
138	"Number of scheduling units chosen for NoCand heuristic post-RA");
139	STATISTIC(NumOnly1PostRA,
140	"Number of scheduling units chosen for Only1 heuristic post-RA");
141	STATISTIC(NumPhysRegPostRA,
142	"Number of scheduling units chosen for PhysReg heuristic post-RA");
143	STATISTIC(NumRegExcessPostRA,
144	"Number of scheduling units chosen for RegExcess heuristic post-RA");
145	STATISTIC(
146	NumRegCriticalPostRA,
147	"Number of scheduling units chosen for RegCritical heuristic post-RA");
148	STATISTIC(NumStallPostRA,
149	"Number of scheduling units chosen for Stall heuristic post-RA");
150	STATISTIC(NumClusterPostRA,
151	"Number of scheduling units chosen for Cluster heuristic post-RA");
152	STATISTIC(NumWeakPostRA,
153	"Number of scheduling units chosen for Weak heuristic post-RA");
154	STATISTIC(NumRegMaxPostRA,
155	"Number of scheduling units chosen for RegMax heuristic post-RA");
156	STATISTIC(
157	NumResourceReducePostRA,
158	"Number of scheduling units chosen for ResourceReduce heuristic post-RA");
159	STATISTIC(
160	NumResourceDemandPostRA,
161	"Number of scheduling units chosen for ResourceDemand heuristic post-RA");
162	STATISTIC(
163	NumTopDepthReducePostRA,
164	"Number of scheduling units chosen for TopDepthReduce heuristic post-RA");
165	STATISTIC(
166	NumTopPathReducePostRA,
167	"Number of scheduling units chosen for TopPathReduce heuristic post-RA");
168	STATISTIC(
169	NumBotHeightReducePostRA,
170	"Number of scheduling units chosen for BotHeightReduce heuristic post-RA");
171	STATISTIC(
172	NumBotPathReducePostRA,
173	"Number of scheduling units chosen for BotPathReduce heuristic post-RA");
174	STATISTIC(NumNodeOrderPostRA,
175	"Number of scheduling units chosen for NodeOrder heuristic post-RA");
176	STATISTIC(NumFirstValidPostRA,
177	"Number of scheduling units chosen for FirstValid heuristic post-RA");
178
179	namespace llvm {
180
181	cl::opt<MISched::Direction> PreRADirection(
182	"misched-prera-direction", cl::Hidden,
183	cl::desc ("Pre reg-alloc list scheduling direction"),
184	cl::init(Val: MISched::Unspecified),
185	cl::values(
186	clEnumValN(MISched::TopDown, "topdown",
187	"Force top-down pre reg-alloc list scheduling"),
188	clEnumValN(MISched::BottomUp, "bottomup",
189	"Force bottom-up pre reg-alloc list scheduling"),
190	clEnumValN(MISched::Bidirectional, "bidirectional",
191	"Force bidirectional pre reg-alloc list scheduling")));
192
193	static cl::opt<MISched::Direction> PostRADirection(
194	"misched-postra-direction", cl::Hidden,
195	cl::desc ("Post reg-alloc list scheduling direction"),
196	cl::init(Val: MISched::Unspecified),
197	cl::values(
198	clEnumValN(MISched::TopDown, "topdown",
199	"Force top-down post reg-alloc list scheduling"),
200	clEnumValN(MISched::BottomUp, "bottomup",
201	"Force bottom-up post reg-alloc list scheduling"),
202	clEnumValN(MISched::Bidirectional, "bidirectional",
203	"Force bidirectional post reg-alloc list scheduling")));
204
205	static cl::opt<bool>
206	DumpCriticalPathLength("misched-dcpl", cl::Hidden,
207	cl::desc ("Print critical path length to stdout"));
208
209	cl::opt<bool> VerifyScheduling(
210	"verify-misched", cl::Hidden,
211	cl::desc ("Verify machine instrs before and after machine scheduling"));
212
213	#ifndef NDEBUG
214	cl::opt<bool> ViewMISchedDAGs(
215	"view-misched-dags", cl::Hidden,
216	cl::desc("Pop up a window to show MISched dags after they are processed"));
217	cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
218	cl::desc("Print schedule DAGs"));
219	cl::opt<bool> MISchedDumpReservedCycles(
220	"misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
221	cl::desc("Dump resource usage at schedule boundary."));
222	cl::opt<bool> MischedDetailResourceBooking(
223	"misched-detail-resource-booking", cl::Hidden, cl::init(false),
224	cl::desc("Show details of invoking getNextResoufceCycle."));
225	#else
226	const bool ViewMISchedDAGs = false;
227	const bool PrintDAGs = false;
228	const bool MischedDetailResourceBooking = false;
229	#ifdef LLVM_ENABLE_DUMP
230	const bool MISchedDumpReservedCycles = false;
231	#endif // LLVM_ENABLE_DUMP
232	#endif // NDEBUG
233
234	} // end namespace llvm
235
236	#ifndef NDEBUG
237	/// In some situations a few uninteresting nodes depend on nearly all other
238	/// nodes in the graph, provide a cutoff to hide them.
239	static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
240	cl::desc("Hide nodes with more predecessor/successor than cutoff"));
241
242	static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
243	cl::desc("Stop scheduling after N instructions"), cl::init(~`0U`));
244
245	static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
246	cl::desc("Only schedule this function"));
247	static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
248	cl::desc("Only schedule this MBB#"));
249	#endif // NDEBUG
250
251	/// Avoid quadratic complexity in unusually large basic blocks by limiting the
252	/// size of the ready lists.
253	static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
254	cl::desc ("Limit ready list to N instructions"), cl::init(Val: `256`));
255
256	static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
257	cl::desc ("Enable register pressure scheduling."), cl::init(Val: true));
258
259	static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
260	cl::desc ("Enable cyclic critical path analysis."), cl::init(Val: true));
261
262	static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
263	cl::desc ("Enable memop clustering."),
264	cl::init(Val: true));
265	static cl::opt<bool>
266	ForceFastCluster("force-fast-cluster", cl::Hidden,
267	cl::desc ("Switch to fast cluster algorithm with the lost "
268	"of some fusion opportunities"),
269	cl::init(Val: false));
270	static cl::opt<unsigned>
271	FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
272	cl::desc ("The threshold for fast cluster"),
273	cl::init(Val: `1000`));
274
275	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
276	static cl::opt<bool> MISchedDumpScheduleTrace(
277	"misched-dump-schedule-trace", cl::Hidden, cl::init(false),
278	cl::desc("Dump resource usage at schedule boundary."));
279	static cl::opt<unsigned>
280	HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
281	cl::desc("Set width of the columns with "
282	"the resources and schedule units"),
283	cl::init(`19`));
284	static cl::opt<unsigned>
285	ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
286	cl::desc("Set width of the columns showing resource booking."),
287	cl::init(`5`));
288	static cl::opt<bool> MISchedSortResourcesInTrace(
289	"misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
290	cl::desc("Sort the resources printed in the dump trace"));
291	#endif
292
293	static cl::opt<unsigned>
294	MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
295	cl::desc ("Number of intervals to track"), cl::init(Val: `10`));
296
297	// DAG subtrees must have at least this many nodes.
298	static const unsigned MinSubtreeSize = `8`;
299
300	// Pin the vtables to this file.
301	void MachineSchedStrategy::anchor() {}
302
303	void ScheduleDAGMutation::anchor() {}
304
305	//===----------------------------------------------------------------------===//
306	// Machine Instruction Scheduling Pass and Registry
307	//===----------------------------------------------------------------------===//
308
309	MachineSchedContext::MachineSchedContext() {
310	RegClassInfo = new RegisterClassInfo ();
311	}
312
313	MachineSchedContext::~MachineSchedContext() {
314	delete RegClassInfo;
315	}
316
317	namespace llvm {
318	namespace impl_detail {
319
320	/// Base class for the machine scheduler classes.
321	class MachineSchedulerBase : public MachineSchedContext {
322	protected:
323	void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
324	};
325
326	/// Impl class for MachineScheduler.
327	class MachineSchedulerImpl : public MachineSchedulerBase {
328	// These are only for using MF.verify()
329	// remove when verify supports passing in all analyses
330	MachineFunctionPass P = nullptr*;
331	MachineFunctionAnalysisManager MFAM = nullptr*;
332
333	public:
334	struct RequiredAnalyses {
335	MachineLoopInfo &MLI;
336	MachineDominatorTree &MDT;
337	AAResults &AA;
338	LiveIntervals &LIS;
339	};
340
341	MachineSchedulerImpl() {}
342	// Migration only
343	void setLegacyPass(MachineFunctionPass P) { this*->P = P; }
344	void setMFAM(MachineFunctionAnalysisManager MFAM) { this*->MFAM = MFAM; }
345
346	bool run(MachineFunction &MF, const TargetMachine &TM,
347	const RequiredAnalyses &Analyses);
348
349	protected:
350	ScheduleDAGInstrs *createMachineScheduler();
351	};
352
353	/// Impl class for PostMachineScheduler.
354	class PostMachineSchedulerImpl : public MachineSchedulerBase {
355	// These are only for using MF.verify()
356	// remove when verify supports passing in all analyses
357	MachineFunctionPass P = nullptr*;
358	MachineFunctionAnalysisManager MFAM = nullptr*;
359
360	public:
361	struct RequiredAnalyses {
362	MachineLoopInfo &MLI;
363	AAResults &AA;
364	};
365	PostMachineSchedulerImpl() {}
366	// Migration only
367	void setLegacyPass(MachineFunctionPass P) { this*->P = P; }
368	void setMFAM(MachineFunctionAnalysisManager MFAM) { this*->MFAM = MFAM; }
369
370	bool run(MachineFunction &Func, const TargetMachine &TM,
371	const RequiredAnalyses &Analyses);
372
373	protected:
374	ScheduleDAGInstrs *createPostMachineScheduler();
375	};
376
377	} // namespace impl_detail
378	} // namespace llvm
379
380	using impl_detail::MachineSchedulerBase;
381	using impl_detail::MachineSchedulerImpl;
382	using impl_detail::PostMachineSchedulerImpl;
383
384	namespace {
385	/// MachineScheduler runs after coalescing and before register allocation.
386	class MachineSchedulerLegacy : public MachineFunctionPass {
387	MachineSchedulerImpl Impl;
388
389	public:
390	MachineSchedulerLegacy();
391	void getAnalysisUsage(AnalysisUsage &AU) const override;
392	bool runOnMachineFunction(MachineFunction&) override;
393
394	static char ID; // Class identification, replacement for typeinfo
395	};
396
397	/// PostMachineScheduler runs after shortly before code emission.
398	class PostMachineSchedulerLegacy : public MachineFunctionPass {
399	PostMachineSchedulerImpl Impl;
400
401	public:
402	PostMachineSchedulerLegacy();
403	void getAnalysisUsage(AnalysisUsage &AU) const override;
404	bool runOnMachineFunction(MachineFunction &) override;
405
406	static char ID; // Class identification, replacement for typeinfo
407	};
408
409	} // end anonymous namespace
410
411	char MachineSchedulerLegacy::ID = `0`;
412
413	char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
414
415	INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
416	"Machine Instruction Scheduler", false, false)
417	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
418	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
419	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
420	INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
421	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
422	INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
423	"Machine Instruction Scheduler", false, false)
424
425	MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass (ID) {
426	initializeMachineSchedulerLegacyPass(Registry&: *PassRegistry::getPassRegistry());
427	}
428
429	void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
430	AU.setPreservesCFG();
431	AU.addRequired<MachineDominatorTreeWrapperPass>();
432	AU.addRequired<MachineLoopInfoWrapperPass>();
433	AU.addRequired<AAResultsWrapperPass>();
434	AU.addRequired<TargetPassConfig>();
435	AU.addRequired<SlotIndexesWrapperPass>();
436	AU.addPreserved<SlotIndexesWrapperPass>();
437	AU.addRequired<LiveIntervalsWrapperPass>();
438	AU.addPreserved<LiveIntervalsWrapperPass>();
439	MachineFunctionPass::getAnalysisUsage(AU);
440	}
441
442	char PostMachineSchedulerLegacy::ID = `0`;
443
444	char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
445
446	INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
447	"PostRA Machine Instruction Scheduler", false, false)
448	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
449	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
450	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
451	INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
452	"PostRA Machine Instruction Scheduler", false, false)
453
454	PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
455	: MachineFunctionPass (ID) {
456	initializePostMachineSchedulerLegacyPass(Registry&: *PassRegistry::getPassRegistry());
457	}
458
459	void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
460	AU.setPreservesCFG();
461	AU.addRequired<MachineDominatorTreeWrapperPass>();
462	AU.addRequired<MachineLoopInfoWrapperPass>();
463	AU.addRequired<AAResultsWrapperPass>();
464	AU.addRequired<TargetPassConfig>();
465	MachineFunctionPass::getAnalysisUsage(AU);
466	}
467
468	MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
469	MachineSchedRegistry::Registry;
470
471	/// A dummy default scheduler factory indicates whether the scheduler
472	/// is overridden on the command line.
473	static ScheduleDAGInstrs useDefaultMachineSched(MachineSchedContext C) {
474	return nullptr;
475	}
476
477	/// MachineSchedOpt allows command line selection of the scheduler.
478	static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
479	RegisterPassParser<MachineSchedRegistry>>
480	MachineSchedOpt("misched",
481	cl::init(Val: &useDefaultMachineSched), cl::Hidden,
482	cl::desc ("Machine instruction scheduler to use"));
483
484	static MachineSchedRegistry
485	DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
486	useDefaultMachineSched);
487
488	static cl::opt<bool> EnableMachineSched(
489	"enable-misched",
490	cl::desc ("Enable the machine instruction scheduling pass."), cl::init(Val: true),
491	cl::Hidden);
492
493	static cl::opt<bool> EnablePostRAMachineSched(
494	"enable-post-misched",
495	cl::desc ("Enable the post-ra machine instruction scheduling pass."),
496	cl::init(Val: true), cl::Hidden);
497
498	/// Decrement this iterator until reaching the top or a non-debug instr.
499	static MachineBasicBlock::const_iterator
500	priorNonDebug(MachineBasicBlock::const_iterator I,
501	MachineBasicBlock::const_iterator Beg) {
502	assert(I != Beg && "reached the top of the region, cannot decrement");
503	while (--I != Beg) {
504	if (!I ->isDebugOrPseudoInstr())
505	break;
506	}
507	return I;
508	}
509
510	/// Non-const version.
511	static MachineBasicBlock::iterator
512	priorNonDebug(MachineBasicBlock::iterator I,
513	MachineBasicBlock::const_iterator Beg) {
514	return priorNonDebug(I: MachineBasicBlock::const_iterator (I), Beg)
515	.getNonConstIterator();
516	}
517
518	/// If this iterator is a debug value, increment until reaching the End or a
519	/// non-debug instruction.
520	static MachineBasicBlock::const_iterator
521	nextIfDebug(MachineBasicBlock::const_iterator I,
522	MachineBasicBlock::const_iterator End) {
523	for(; I != End; ++I) {
524	if (!I ->isDebugOrPseudoInstr())
525	break;
526	}
527	return I;
528	}
529
530	/// Non-const version.
531	static MachineBasicBlock::iterator
532	nextIfDebug(MachineBasicBlock::iterator I,
533	MachineBasicBlock::const_iterator End) {
534	return nextIfDebug(I: MachineBasicBlock::const_iterator (I), End)
535	.getNonConstIterator();
536	}
537
538	/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
539	ScheduleDAGInstrs *MachineSchedulerImpl::createMachineScheduler() {
540	// Select the scheduler, or set the default.
541	MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
542	if (Ctor != useDefaultMachineSched)
543	return Ctor(this);
544
545	// Get the default scheduler set by the target for this function.
546	ScheduleDAGInstrs Scheduler = TM->createMachineScheduler(C: this*);
547	if (Scheduler)
548	return Scheduler;
549
550	// Default to GenericScheduler.
551	return createSchedLive(C: this);
552	}
553
554	bool MachineSchedulerImpl::run(MachineFunction &Func, const TargetMachine &TM,
555	const RequiredAnalyses &Analyses) {
556	MF = &Func;
557	MLI = &Analyses.MLI;
558	MDT = &Analyses.MDT;
559	this->TM = &TM;
560	AA = &Analyses.AA;
561	LIS = &Analyses.LIS;
562
563	if (VerifyScheduling) {
564	LLVM_DEBUG(LIS->dump());
565	const char *MSchedBanner = "Before machine scheduling.";
566	if (P)
567	MF->verify(p: P, Banner: MSchedBanner, OS: &errs());
568	else
569	MF->verify(MFAM&: *MFAM, Banner: MSchedBanner, OS: &errs());
570	}
571	RegClassInfo->runOnMachineFunction(MF: *MF);
572
573	// Instantiate the selected scheduler for this target, function, and
574	// optimization level.
575	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
576	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: false*);
577
578	LLVM_DEBUG(LIS->dump());
579	if (VerifyScheduling) {
580	const char *MSchedBanner = "After machine scheduling.";
581	if (P)
582	MF->verify(p: P, Banner: MSchedBanner, OS: &errs());
583	else
584	MF->verify(MFAM&: *MFAM, Banner: MSchedBanner, OS: &errs());
585	}
586	return true;
587	}
588
589	/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
590	/// the caller. We don't have a command line option to override the postRA
591	/// scheduler. The Target must configure it.
592	ScheduleDAGInstrs *PostMachineSchedulerImpl::createPostMachineScheduler() {
593	// Get the postRA scheduler set by the target for this function.
594	ScheduleDAGInstrs Scheduler = TM->createPostMachineScheduler(C: this*);
595	if (Scheduler)
596	return Scheduler;
597
598	// Default to GenericScheduler.
599	return createSchedPostRA(C: this);
600	}
601
602	bool PostMachineSchedulerImpl::run(MachineFunction &Func,
603	const TargetMachine &TM,
604	const RequiredAnalyses &Analyses) {
605	MF = &Func;
606	MLI = &Analyses.MLI;
607	this->TM = &TM;
608	AA = &Analyses.AA;
609
610	if (VerifyScheduling) {
611	const char *PostMSchedBanner = "Before post machine scheduling.";
612	if (P)
613	MF->verify(p: P, Banner: PostMSchedBanner, OS: &errs());
614	else
615	MF->verify(MFAM&: *MFAM, Banner: PostMSchedBanner, OS: &errs());
616	}
617
618	// Instantiate the selected scheduler for this target, function, and
619	// optimization level.
620	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
621	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: true*);
622
623	if (VerifyScheduling) {
624	const char *PostMSchedBanner = "After post machine scheduling.";
625	if (P)
626	MF->verify(p: P, Banner: PostMSchedBanner, OS: &errs());
627	else
628	MF->verify(MFAM&: *MFAM, Banner: PostMSchedBanner, OS: &errs());
629	}
630	return true;
631	}
632
633	/// Top-level MachineScheduler pass driver.
634	///
635	/// Visit blocks in function order. Divide each block into scheduling regions
636	/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
637	/// consistent with the DAG builder, which traverses the interior of the
638	/// scheduling regions bottom-up.
639	///
640	/// This design avoids exposing scheduling boundaries to the DAG builder,
641	/// simplifying the DAG builder's support for "special" target instructions.
642	/// At the same time the design allows target schedulers to operate across
643	/// scheduling boundaries, for example to bundle the boundary instructions
644	/// without reordering them. This creates complexity, because the target
645	/// scheduler must update the RegionBegin and RegionEnd positions cached by
646	/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
647	/// design would be to split blocks at scheduling boundaries, but LLVM has a
648	/// general bias against block splitting purely for implementation simplicity.
649	bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
650	if (skipFunction(F: MF.getFunction()))
651	return false;
652
653	if (EnableMachineSched.getNumOccurrences()) {
654	if (!EnableMachineSched)
655	return false;
656	} else if (!MF.getSubtarget().enableMachineScheduler()) {
657	return false;
658	}
659
660	LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
661
662	auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
663	auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
664	auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
665	auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
666	auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
667	Impl.setLegacyPass(this);
668	return Impl.run(Func&: MF, TM, Analyses: {.MLI: MLI, .MDT: MDT, .AA: AA, .LIS: LIS});
669	}
670
671	MachineSchedulerPass::MachineSchedulerPass(const TargetMachine *TM)
672	: Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
673	MachineSchedulerPass::~MachineSchedulerPass() = default;
674	MachineSchedulerPass::MachineSchedulerPass(MachineSchedulerPass &&Other) =
675	default;
676
677	PostMachineSchedulerPass::PostMachineSchedulerPass(const TargetMachine *TM)
678	: Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
679	PostMachineSchedulerPass::PostMachineSchedulerPass(
680	PostMachineSchedulerPass &&Other) = default;
681	PostMachineSchedulerPass::~PostMachineSchedulerPass() = default;
682
683	PreservedAnalyses
684	MachineSchedulerPass::run(MachineFunction &MF,
685	MachineFunctionAnalysisManager &MFAM) {
686	if (EnableMachineSched.getNumOccurrences()) {
687	if (!EnableMachineSched)
688	return PreservedAnalyses::all();
689	} else if (!MF.getSubtarget().enableMachineScheduler()) {
690	return PreservedAnalyses::all();
691	}
692
693	LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
694	auto &MLI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
695	auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
696	auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
697	.getManager();
698	auto &AA = FAM.getResult<AAManager>(IR&: MF.getFunction());
699	auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
700	Impl ->setMFAM(&MFAM);
701	bool Changed = Impl ->run(Func&: MF, TM: *TM, Analyses: {.MLI: MLI, .MDT: MDT, .AA: AA, .LIS: LIS});
702	if (!Changed)
703	return PreservedAnalyses::all();
704
705	return getMachineFunctionPassPreservedAnalyses()
706	.preserveSet<CFGAnalyses>()
707	.preserve<SlotIndexesAnalysis>()
708	.preserve<LiveIntervalsAnalysis>();
709	}
710
711	bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
712	if (skipFunction(F: MF.getFunction()))
713	return false;
714
715	if (EnablePostRAMachineSched.getNumOccurrences()) {
716	if (!EnablePostRAMachineSched)
717	return false;
718	} else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
719	LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
720	return false;
721	}
722	LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
723	auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
724	auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
725	auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
726	Impl.setLegacyPass(this);
727	return Impl.run(Func&: MF, TM, Analyses: {.MLI: MLI, .AA: AA});
728	}
729
730	PreservedAnalyses
731	PostMachineSchedulerPass::run(MachineFunction &MF,
732	MachineFunctionAnalysisManager &MFAM) {
733	if (EnablePostRAMachineSched.getNumOccurrences()) {
734	if (!EnablePostRAMachineSched)
735	return PreservedAnalyses::all();
736	} else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
737	LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
738	return PreservedAnalyses::all();
739	}
740	LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
741	auto &MLI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
742	auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
743	.getManager();
744	auto &AA = FAM.getResult<AAManager>(IR&: MF.getFunction());
745
746	Impl ->setMFAM(&MFAM);
747	bool Changed = Impl ->run(Func&: MF, TM: *TM, Analyses: {.MLI: MLI, .AA: AA});
748	if (!Changed)
749	return PreservedAnalyses::all();
750
751	PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
752	PA.preserveSet<CFGAnalyses>();
753	return PA;
754	}
755
756	/// Return true of the given instruction should not be included in a scheduling
757	/// region.
758	///
759	/// MachineScheduler does not currently support scheduling across calls. To
760	/// handle calls, the DAG builder needs to be modified to create register
761	/// anti/output dependencies on the registers clobbered by the call's regmask
762	/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
763	/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
764	/// the boundary, but there would be no benefit to postRA scheduling across
765	/// calls this late anyway.
766	static bool isSchedBoundary(MachineBasicBlock::iterator MI,
767	MachineBasicBlock *MBB,
768	MachineFunction *MF,
769	const TargetInstrInfo *TII) {
770	return MI ->isCall() \|\| TII->isSchedulingBoundary(MI: MI, MBB, MF: MF) \|\|
771	MI ->isFakeUse();
772	}
773
774	/// A region of an MBB for scheduling.
775	namespace {
776	struct SchedRegion {
777	/// RegionBegin is the first instruction in the scheduling region, and
778	/// RegionEnd is either MBB->end() or the scheduling boundary after the
779	/// last instruction in the scheduling region. These iterators cannot refer
780	/// to instructions outside of the identified scheduling region because
781	/// those may be reordered before scheduling this region.
782	MachineBasicBlock::iterator RegionBegin;
783	MachineBasicBlock::iterator RegionEnd;
784	unsigned NumRegionInstrs;
785
786	SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
787	unsigned N) :
788	RegionBegin (B), RegionEnd (E), NumRegionInstrs(N) {}
789	};
790	} // end anonymous namespace
791
792	using MBBRegionsVector = SmallVector<SchedRegion, `16`>;
793
794	static void
795	getSchedRegions(MachineBasicBlock *MBB,
796	MBBRegionsVector &Regions,
797	bool RegionsTopDown) {
798	MachineFunction *MF = MBB->getParent();
799	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
800
801	MachineBasicBlock::iterator I = nullptr;
802	for(MachineBasicBlock::iterator RegionEnd = MBB->end();
803	RegionEnd != MBB->begin(); RegionEnd = I) {
804
805	// Avoid decrementing RegionEnd for blocks with no terminator.
806	if (RegionEnd != MBB->end() \|\|
807	isSchedBoundary(MI: &std::prev(x: RegionEnd), MBB: &MBB, MF, TII)) {
808	--RegionEnd;
809	}
810
811	// The next region starts above the previous region. Look backward in the
812	// instruction stream until we find the nearest boundary.
813	unsigned NumRegionInstrs = `0`;
814	I = RegionEnd;
815	for (;I != MBB->begin(); --I) {
816	MachineInstr &MI = *std::prev(x: I);
817	if (isSchedBoundary(MI: &MI, MBB: &*MBB, MF, TII))
818	break;
819	if (!MI.isDebugOrPseudoInstr()) {
820	// MBB::size() uses instr_iterator to count. Here we need a bundle to
821	// count as a single instruction.
822	++NumRegionInstrs;
823	}
824	}
825
826	// It's possible we found a scheduling region that only has debug
827	// instructions. Don't bother scheduling these.
828	if (NumRegionInstrs != `0`)
829	Regions.push_back(Elt: SchedRegion (I, RegionEnd, NumRegionInstrs));
830	}
831
832	if (RegionsTopDown)
833	std::reverse(first: Regions.begin(), last: Regions.end());
834	}
835
836	/// Main driver for both MachineScheduler and PostMachineScheduler.
837	void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
838	bool FixKillFlags) {
839	// Visit all machine basic blocks.
840	//
841	// TODO: Visit blocks in global postorder or postorder within the bottom-up
842	// loop tree. Then we can optionally compute global RegPressure.
843	for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
844	MBB != MBBEnd; ++MBB) {
845
846	Scheduler.startBlock(BB: &*MBB);
847
848	#ifndef NDEBUG
849	if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
850	continue;
851	if (SchedOnlyBlock.getNumOccurrences()
852	&& (int)SchedOnlyBlock != MBB->getNumber())
853	continue;
854	#endif
855
856	// Break the block into scheduling regions [I, RegionEnd). RegionEnd
857	// points to the scheduling boundary at the bottom of the region. The DAG
858	// does not include RegionEnd, but the region does (i.e. the next
859	// RegionEnd is above the previous RegionBegin). If the current block has
860	// no terminator then RegionEnd == MBB->end() for the bottom region.
861	//
862	// All the regions of MBB are first found and stored in MBBRegions, which
863	// will be processed (MBB) top-down if initialized with true.
864	//
865	// The Scheduler may insert instructions during either schedule() or
866	// exitRegion(), even for empty regions. So the local iterators 'I' and
867	// 'RegionEnd' are invalid across these calls. Instructions must not be
868	// added to other regions than the current one without updating MBBRegions.
869
870	MBBRegionsVector MBBRegions;
871	getSchedRegions(MBB: &*MBB, Regions&: MBBRegions, RegionsTopDown: Scheduler.doMBBSchedRegionsTopDown());
872	bool ScheduleSingleMI = Scheduler.shouldScheduleSingleMIRegions();
873	for (const SchedRegion &R : MBBRegions) {
874	MachineBasicBlock::iterator I = R.RegionBegin;
875	MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
876	unsigned NumRegionInstrs = R.NumRegionInstrs;
877
878	// Notify the scheduler of the region, even if we may skip scheduling
879	// it. Perhaps it still needs to be bundled.
880	Scheduler.enterRegion(bb: &*MBB, begin: I, end: RegionEnd, regioninstrs: NumRegionInstrs);
881
882	// Skip empty scheduling regions and, conditionally, regions with a single
883	// MI.
884	if (I == RegionEnd \|\| (!ScheduleSingleMI && I == std::prev(x: RegionEnd))) {
885	// Close the current region. Bundle the terminator if needed.
886	// This invalidates 'RegionEnd' and 'I'.
887	Scheduler.exitRegion();
888	continue;
889	}
890	LLVM_DEBUG(dbgs() << "******** MI Scheduling ********\n");
891	LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
892	<< " " << MBB->getName() << "\n From: " << *I
893	<< " To: ";
894	if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
895	else dbgs() << "End\n";
896	dbgs() << " RegionInstrs: " << NumRegionInstrs << `'\n'`);
897	if (DumpCriticalPathLength) {
898	errs() << MF->getName();
899	errs() << ":%bb. " << MBB ->getNumber();
900	errs() << " " << MBB ->getName() << " \n";
901	}
902
903	// Schedule a region: possibly reorder instructions.
904	// This invalidates the original region iterators.
905	Scheduler.schedule();
906
907	// Close the current region.
908	Scheduler.exitRegion();
909	}
910	Scheduler.finishBlock();
911	// FIXME: Ideally, no further passes should rely on kill flags. However,
912	// thumb2 size reduction is currently an exception, so the PostMIScheduler
913	// needs to do this.
914	if (FixKillFlags)
915	Scheduler.fixupKills(MBB&: *MBB);
916	}
917	Scheduler.finalizeSchedule();
918	}
919
920	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
921	LLVM_DUMP_METHOD void ReadyQueue::dump() const {
922	dbgs() << "Queue " << Name << ": ";
923	for (const SUnit *SU : Queue)
924	dbgs() << SU->NodeNum << " ";
925	dbgs() << "\n";
926	}
927	#endif
928
929	//===----------------------------------------------------------------------===//
930	// ScheduleDAGMI - Basic machine instruction scheduling. This is
931	// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
932	// virtual registers.
933	// ===----------------------------------------------------------------------===/
934
935	// Provide a vtable anchor.
936	ScheduleDAGMI::~ScheduleDAGMI() = default;
937
938	/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
939	/// NumPredsLeft reaches zero, release the successor node.
940	///
941	/// FIXME: Adjust SuccSU height based on MinLatency.
942	void ScheduleDAGMI::releaseSucc(SUnit SU, SDep SuccEdge) {
943	SUnit *SuccSU = SuccEdge->getSUnit();
944
945	if (SuccEdge->isWeak()) {
946	--SuccSU->WeakPredsLeft;
947	return;
948	}
949	#ifndef NDEBUG
950	if (SuccSU->NumPredsLeft == `0`) {
951	dbgs() << "* Scheduling failed! *\n";
952	dumpNode(*SuccSU);
953	dbgs() << " has been released too many times!\n";
954	llvm_unreachable(nullptr);
955	}
956	#endif
957	// SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
958	// CurrCycle may have advanced since then.
959	if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
960	SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
961
962	--SuccSU->NumPredsLeft;
963	if (SuccSU->NumPredsLeft == `0` && SuccSU != &ExitSU)
964	SchedImpl ->releaseTopNode(SU: SuccSU);
965	}
966
967	/// releaseSuccessors - Call releaseSucc on each of SU's successors.
968	void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
969	for (SDep &Succ : SU->Succs)
970	releaseSucc(SU, SuccEdge: &Succ);
971	}
972
973	/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
974	/// NumSuccsLeft reaches zero, release the predecessor node.
975	///
976	/// FIXME: Adjust PredSU height based on MinLatency.
977	void ScheduleDAGMI::releasePred(SUnit SU, SDep PredEdge) {
978	SUnit *PredSU = PredEdge->getSUnit();
979
980	if (PredEdge->isWeak()) {
981	--PredSU->WeakSuccsLeft;
982	return;
983	}
984	#ifndef NDEBUG
985	if (PredSU->NumSuccsLeft == `0`) {
986	dbgs() << "* Scheduling failed! *\n";
987	dumpNode(*PredSU);
988	dbgs() << " has been released too many times!\n";
989	llvm_unreachable(nullptr);
990	}
991	#endif
992	// SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
993	// CurrCycle may have advanced since then.
994	if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
995	PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
996
997	--PredSU->NumSuccsLeft;
998	if (PredSU->NumSuccsLeft == `0` && PredSU != &EntrySU)
999	SchedImpl ->releaseBottomNode(SU: PredSU);
1000	}
1001
1002	/// releasePredecessors - Call releasePred on each of SU's predecessors.
1003	void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
1004	for (SDep &Pred : SU->Preds)
1005	releasePred(SU, PredEdge: &Pred);
1006	}
1007
1008	void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
1009	ScheduleDAGInstrs::startBlock(BB: bb);
1010	SchedImpl ->enterMBB(MBB: bb);
1011	}
1012
1013	void ScheduleDAGMI::finishBlock() {
1014	SchedImpl ->leaveMBB();
1015	ScheduleDAGInstrs::finishBlock();
1016	}
1017
1018	/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
1019	/// after crossing a scheduling boundary. [begin, end) includes all instructions
1020	/// in the region, including the boundary itself and single-instruction regions
1021	/// that don't get scheduled.
1022	void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
1023	MachineBasicBlock::iterator begin,
1024	MachineBasicBlock::iterator end,
1025	unsigned regioninstrs)
1026	{
1027	ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
1028
1029	SchedImpl ->initPolicy(Begin: begin, End: end, NumRegionInstrs: regioninstrs);
1030
1031	// Set dump direction after initializing sched policy.
1032	ScheduleDAGMI::DumpDirection D;
1033	if (SchedImpl ->getPolicy().OnlyTopDown)
1034	D = ScheduleDAGMI::DumpDirection::TopDown;
1035	else if (SchedImpl ->getPolicy().OnlyBottomUp)
1036	D = ScheduleDAGMI::DumpDirection::BottomUp;
1037	else
1038	D = ScheduleDAGMI::DumpDirection::Bidirectional;
1039	setDumpDirection(D);
1040	}
1041
1042	/// This is normally called from the main scheduler loop but may also be invoked
1043	/// by the scheduling strategy to perform additional code motion.
1044	void ScheduleDAGMI::moveInstruction(
1045	MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
1046	// Advance RegionBegin if the first instruction moves down.
1047	if (&*RegionBegin == MI)
1048	++RegionBegin;
1049
1050	// Update the instruction stream.
1051	BB->splice(Where: InsertPos, Other: BB, From: MI);
1052
1053	// Update LiveIntervals
1054	if (LIS)
1055	LIS->handleMove(MI&: MI, /UpdateFlags=/*true);
1056
1057	// Recede RegionBegin if an instruction moves above the first.
1058	if (RegionBegin == InsertPos)
1059	RegionBegin = MI;
1060	}
1061
1062	bool ScheduleDAGMI::checkSchedLimit() {
1063	#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
1064	if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~`0U`) {
1065	CurrentTop = CurrentBottom;
1066	return false;
1067	}
1068	++NumInstrsScheduled;
1069	#endif
1070	return true;
1071	}
1072
1073	/// Per-region scheduling driver, called back from
1074	/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
1075	/// that does not consider liveness or register pressure. It is useful for
1076	/// PostRA scheduling and potentially other custom schedulers.
1077	void ScheduleDAGMI::schedule() {
1078	LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
1079	LLVM_DEBUG(SchedImpl->dumpPolicy());
1080
1081	// Build the DAG.
1082	buildSchedGraph(AA);
1083
1084	postProcessDAG();
1085
1086	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
1087	findRootsAndBiasEdges(TopRoots, BotRoots);
1088
1089	LLVM_DEBUG(dump());
1090	if (PrintDAGs) dump();
1091	if (ViewMISchedDAGs) viewGraph();
1092
1093	// Initialize the strategy before modifying the DAG.
1094	// This may initialize a DFSResult to be used for queue priority.
1095	SchedImpl ->initialize(DAG: this);
1096
1097	// Initialize ready queues now that the DAG and priority data are finalized.
1098	initQueues(TopRoots, BotRoots);
1099
1100	bool IsTopNode = false;
1101	while (true) {
1102	if (!checkSchedLimit())
1103	break;
1104
1105	LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1106	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
1107	if (!SU) break;
1108
1109	assert(!SU->isScheduled && "Node already scheduled");
1110
1111	MachineInstr *MI = SU->getInstr();
1112	if (IsTopNode) {
1113	assert(SU->isTopReady() && "node still has unscheduled dependencies");
1114	if (&*CurrentTop == MI)
1115	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
1116	else
1117	moveInstruction(MI, InsertPos: CurrentTop);
1118	} else {
1119	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1120	MachineBasicBlock::iterator priorII =
1121	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
1122	if (&*priorII == MI)
1123	CurrentBottom = priorII;
1124	else {
1125	if (&*CurrentTop == MI)
1126	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
1127	moveInstruction(MI, InsertPos: CurrentBottom);
1128	CurrentBottom = MI;
1129	}
1130	}
1131	// Notify the scheduling strategy before updating the DAG.
1132	// This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1133	// runs, it can then use the accurate ReadyCycle time to determine whether
1134	// newly released nodes can move to the readyQ.
1135	SchedImpl ->schedNode(SU, IsTopNode);
1136
1137	updateQueues(SU, IsTopNode);
1138	}
1139	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1140
1141	placeDebugValues();
1142
1143	LLVM_DEBUG({
1144	dbgs() << "*** Final schedule for "
1145	<< printMBBReference(begin()->getParent()) << " **\n";
1146	dumpSchedule();
1147	dbgs() << `'\n'`;
1148	});
1149	}
1150
1151	/// Apply each ScheduleDAGMutation step in order.
1152	void ScheduleDAGMI::postProcessDAG() {
1153	for (auto &m : Mutations)
1154	m ->apply(DAG: this);
1155	}
1156
1157	void ScheduleDAGMI::
1158	findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
1159	SmallVectorImpl<SUnit*> &BotRoots) {
1160	for (SUnit &SU : SUnits) {
1161	assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1162
1163	// Order predecessors so DFSResult follows the critical path.
1164	SU.biasCriticalPath();
1165
1166	// A SUnit is ready to top schedule if it has no predecessors.
1167	if (!SU.NumPredsLeft)
1168	TopRoots.push_back(Elt: &SU);
1169	// A SUnit is ready to bottom schedule if it has no successors.
1170	if (!SU.NumSuccsLeft)
1171	BotRoots.push_back(Elt: &SU);
1172	}
1173	ExitSU.biasCriticalPath();
1174	}
1175
1176	/// Identify DAG roots and setup scheduler queues.
1177	void ScheduleDAGMI::initQueues(ArrayRef<SUnit *> TopRoots,
1178	ArrayRef<SUnit *> BotRoots) {
1179	// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1180	//
1181	// Nodes with unreleased weak edges can still be roots.
1182	// Release top roots in forward order.
1183	for (SUnit *SU : TopRoots)
1184	SchedImpl ->releaseTopNode(SU);
1185
1186	// Release bottom roots in reverse order so the higher priority nodes appear
1187	// first. This is more natural and slightly more efficient.
1188	for (SmallVectorImpl<SUnit*>::const_reverse_iterator
1189	I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1190	SchedImpl ->releaseBottomNode(SU: *I);
1191	}
1192
1193	releaseSuccessors(SU: &EntrySU);
1194	releasePredecessors(SU: &ExitSU);
1195
1196	SchedImpl ->registerRoots();
1197
1198	// Advance past initial DebugValues.
1199	CurrentTop = nextIfDebug(I: RegionBegin, End: RegionEnd);
1200	CurrentBottom = RegionEnd;
1201	}
1202
1203	/// Update scheduler queues after scheduling an instruction.
1204	void ScheduleDAGMI::updateQueues(SUnit SU, bool* IsTopNode) {
1205	// Release dependent instructions for scheduling.
1206	if (IsTopNode)
1207	releaseSuccessors(SU);
1208	else
1209	releasePredecessors(SU);
1210
1211	SU->isScheduled = true;
1212	}
1213
1214	/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1215	void ScheduleDAGMI::placeDebugValues() {
1216	// If first instruction was a DBG_VALUE then put it back.
1217	if (FirstDbgValue) {
1218	BB->splice(Where: RegionBegin, Other: BB, From: FirstDbgValue);
1219	RegionBegin = FirstDbgValue;
1220	}
1221
1222	for (std::vector<std::pair<MachineInstr , MachineInstr >>::iterator
1223	DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1224	std::pair<MachineInstr , MachineInstr > P = *std::prev(x: DI);
1225	MachineInstr *DbgValue = P.first;
1226	MachineBasicBlock::iterator OrigPrevMI = P.second;
1227	if (&*RegionBegin == DbgValue)
1228	++RegionBegin;
1229	BB->splice(Where: std::next(x: OrigPrevMI), Other: BB, From: DbgValue);
1230	if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1231	RegionEnd = DbgValue;
1232	}
1233	}
1234
1235	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1236	static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1237
1238	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
1239	// Bail off when there is no schedule model to query.
1240	if (!SchedModel.hasInstrSchedModel())
1241	return;
1242
1243	// Nothing to show if there is no or just one instruction.
1244	if (BB->size() < `2`)
1245	return;
1246
1247	dbgs() << " * Schedule table (TopDown):\n";
1248	dbgs() << scheduleTableLegend << "\n";
1249	const unsigned FirstCycle = getSUnit(&(std::begin(this)))->TopReadyCycle;
1250	unsigned LastCycle = getSUnit(&(std::prev(std::end(this))))->TopReadyCycle;
1251	for (MachineInstr &MI : *this) {
1252	SUnit *SU = getSUnit(&MI);
1253	if (!SU)
1254	continue;
1255	const MCSchedClassDesc *SC = getSchedClass(SU);
1256	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1257	PE = SchedModel.getWriteProcResEnd(SC);
1258	PI != PE; ++PI) {
1259	if (SU->TopReadyCycle + PI->ReleaseAtCycle - `1` > LastCycle)
1260	LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - `1`;
1261	}
1262	}
1263	// Print the header with the cycles
1264	dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1265	for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1266	dbgs() << llvm::left_justify("\| " + std::to_string(C), ColWidth);
1267	dbgs() << "\|\n";
1268
1269	for (MachineInstr &MI : *this) {
1270	SUnit *SU = getSUnit(&MI);
1271	if (!SU) {
1272	dbgs() << "Missing SUnit\n";
1273	continue;
1274	}
1275	std::string NodeName("SU(");
1276	NodeName += std::to_string(SU->NodeNum) + ")";
1277	dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1278	unsigned C = FirstCycle;
1279	for (; C <= LastCycle; ++C) {
1280	if (C == SU->TopReadyCycle)
1281	dbgs() << llvm::left_justify("\| i", ColWidth);
1282	else
1283	dbgs() << llvm::left_justify("\|", ColWidth);
1284	}
1285	dbgs() << "\|\n";
1286	const MCSchedClassDesc *SC = getSchedClass(SU);
1287
1288	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1289	make_range(SchedModel.getWriteProcResBegin(SC),
1290	SchedModel.getWriteProcResEnd(SC)));
1291
1292	if (MISchedSortResourcesInTrace)
1293	llvm::stable_sort(
1294	ResourcesIt,
1295	[](const MCWriteProcResEntry &LHS,
1296	const MCWriteProcResEntry &RHS) -> bool {
1297	return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1298	std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1299	});
1300	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1301	C = FirstCycle;
1302	const std::string ResName =
1303	SchedModel.getResourceName(PI.ProcResourceIdx);
1304	dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1305	for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1306	dbgs() << llvm::left_justify("\|", ColWidth);
1307	}
1308	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1309	++I, ++C)
1310	dbgs() << llvm::left_justify("\| x", ColWidth);
1311	while (C++ <= LastCycle)
1312	dbgs() << llvm::left_justify("\|", ColWidth);
1313	// Place end char
1314	dbgs() << "\| \n";
1315	}
1316	}
1317	}
1318
1319	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1320	// Bail off when there is no schedule model to query.
1321	if (!SchedModel.hasInstrSchedModel())
1322	return;
1323
1324	// Nothing to show if there is no or just one instruction.
1325	if (BB->size() < `2`)
1326	return;
1327
1328	dbgs() << " * Schedule table (BottomUp):\n";
1329	dbgs() << scheduleTableLegend << "\n";
1330
1331	const int FirstCycle = getSUnit(&(std::begin(this)))->BotReadyCycle;
1332	int LastCycle = getSUnit(&(std::prev(std::end(this))))->BotReadyCycle;
1333	for (MachineInstr &MI : *this) {
1334	SUnit *SU = getSUnit(&MI);
1335	if (!SU)
1336	continue;
1337	const MCSchedClassDesc *SC = getSchedClass(SU);
1338	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1339	PE = SchedModel.getWriteProcResEnd(SC);
1340	PI != PE; ++PI) {
1341	if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1` < LastCycle)
1342	LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1`;
1343	}
1344	}
1345	// Print the header with the cycles
1346	dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1347	for (int C = FirstCycle; C >= LastCycle; --C)
1348	dbgs() << llvm::left_justify("\| " + std::to_string(C), ColWidth);
1349	dbgs() << "\|\n";
1350
1351	for (MachineInstr &MI : *this) {
1352	SUnit *SU = getSUnit(&MI);
1353	if (!SU) {
1354	dbgs() << "Missing SUnit\n";
1355	continue;
1356	}
1357	std::string NodeName("SU(");
1358	NodeName += std::to_string(SU->NodeNum) + ")";
1359	dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1360	int C = FirstCycle;
1361	for (; C >= LastCycle; --C) {
1362	if (C == (int)SU->BotReadyCycle)
1363	dbgs() << llvm::left_justify("\| i", ColWidth);
1364	else
1365	dbgs() << llvm::left_justify("\|", ColWidth);
1366	}
1367	dbgs() << "\|\n";
1368	const MCSchedClassDesc *SC = getSchedClass(SU);
1369	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1370	make_range(SchedModel.getWriteProcResBegin(SC),
1371	SchedModel.getWriteProcResEnd(SC)));
1372
1373	if (MISchedSortResourcesInTrace)
1374	llvm::stable_sort(
1375	ResourcesIt,
1376	[](const MCWriteProcResEntry &LHS,
1377	const MCWriteProcResEntry &RHS) -> bool {
1378	return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1379	std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1380	});
1381	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1382	C = FirstCycle;
1383	const std::string ResName =
1384	SchedModel.getResourceName(PI.ProcResourceIdx);
1385	dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1386	for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1387	dbgs() << llvm::left_justify("\|", ColWidth);
1388	}
1389	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1390	++I, --C)
1391	dbgs() << llvm::left_justify("\| x", ColWidth);
1392	while (C-- >= LastCycle)
1393	dbgs() << llvm::left_justify("\|", ColWidth);
1394	// Place end char
1395	dbgs() << "\| \n";
1396	}
1397	}
1398	}
1399	#endif
1400
1401	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1402	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1403	if (MISchedDumpScheduleTrace) {
1404	if (DumpDir == DumpDirection::TopDown)
1405	dumpScheduleTraceTopDown();
1406	else if (DumpDir == DumpDirection::BottomUp)
1407	dumpScheduleTraceBottomUp();
1408	else if (DumpDir == DumpDirection::Bidirectional) {
1409	dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1410	} else {
1411	dbgs() << "* Schedule table: DumpDirection not set.\n";
1412	}
1413	}
1414
1415	for (MachineInstr &MI : *this) {
1416	if (SUnit *SU = getSUnit(&MI))
1417	dumpNode(*SU);
1418	else
1419	dbgs() << "Missing SUnit\n";
1420	}
1421	}
1422	#endif
1423
1424	//===----------------------------------------------------------------------===//
1425	// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1426	// preservation.
1427	//===----------------------------------------------------------------------===//
1428
1429	ScheduleDAGMILive::~ScheduleDAGMILive() {
1430	delete DFSResult;
1431	}
1432
1433	void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1434	const MachineInstr &MI = *SU.getInstr();
1435	for (const MachineOperand &MO : MI.operands()) {
1436	if (!MO.isReg())
1437	continue;
1438	if (!MO.readsReg())
1439	continue;
1440	if (TrackLaneMasks && !MO.isUse())
1441	continue;
1442
1443	Register Reg = MO.getReg();
1444	if (!Reg.isVirtual())
1445	continue;
1446
1447	// Ignore re-defs.
1448	if (TrackLaneMasks) {
1449	bool FoundDef = false;
1450	for (const MachineOperand &MO2 : MI.all_defs()) {
1451	if (MO2.getReg() == Reg && !MO2.isDead()) {
1452	FoundDef = true;
1453	break;
1454	}
1455	}
1456	if (FoundDef)
1457	continue;
1458	}
1459
1460	// Record this local VReg use.
1461	VReg2SUnitMultiMap::iterator UI = VRegUses.find(Key: Reg);
1462	for (; UI != VRegUses.end(); ++UI) {
1463	if (UI ->SU == &SU)
1464	break;
1465	}
1466	if (UI == VRegUses.end())
1467	VRegUses.insert(Val: VReg2SUnit (Reg, LaneBitmask::getNone(), &SU));
1468	}
1469	}
1470
1471	/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1472	/// crossing a scheduling boundary. [begin, end) includes all instructions in
1473	/// the region, including the boundary itself and single-instruction regions
1474	/// that don't get scheduled.
1475	void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1476	MachineBasicBlock::iterator begin,
1477	MachineBasicBlock::iterator end,
1478	unsigned regioninstrs)
1479	{
1480	// ScheduleDAGMI initializes SchedImpl's per-region policy.
1481	ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1482
1483	// For convenience remember the end of the liveness region.
1484	LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(x: RegionEnd);
1485
1486	SUPressureDiffs.clear();
1487
1488	ShouldTrackPressure = SchedImpl ->shouldTrackPressure();
1489	ShouldTrackLaneMasks = SchedImpl ->shouldTrackLaneMasks();
1490
1491	assert((!ShouldTrackLaneMasks \|\| ShouldTrackPressure) &&
1492	"ShouldTrackLaneMasks requires ShouldTrackPressure");
1493	}
1494
1495	// Setup the register pressure trackers for the top scheduled and bottom
1496	// scheduled regions.
1497	void ScheduleDAGMILive::initRegPressure() {
1498	VRegUses.clear();
1499	VRegUses.setUniverse(MRI.getNumVirtRegs());
1500	for (SUnit &SU : SUnits)
1501	collectVRegUses(SU);
1502
1503	TopRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin,
1504	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1505	BotRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1506	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1507
1508	// Close the RPTracker to finalize live ins.
1509	RPTracker.closeRegion();
1510
1511	LLVM_DEBUG(RPTracker.dump());
1512
1513	// Initialize the live ins and live outs.
1514	TopRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveInRegs);
1515	BotRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveOutRegs);
1516
1517	// Close one end of the tracker so we can call
1518	// getMaxUpward/DownwardPressureDelta before advancing across any
1519	// instructions. This converts currently live regs into live ins/outs.
1520	TopRPTracker.closeTop();
1521	BotRPTracker.closeBottom();
1522
1523	BotRPTracker.initLiveThru(RPTracker);
1524	if (!BotRPTracker.getLiveThru().empty()) {
1525	TopRPTracker.initLiveThru(PressureSet: BotRPTracker.getLiveThru());
1526	LLVM_DEBUG(dbgs() << "Live Thru: ";
1527	dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1528	};
1529
1530	// For each live out vreg reduce the pressure change associated with other
1531	// uses of the same vreg below the live-out reaching def.
1532	updatePressureDiffs(LiveUses: RPTracker.getPressure().LiveOutRegs);
1533
1534	// Account for liveness generated by the region boundary.
1535	if (LiveRegionEnd != RegionEnd) {
1536	SmallVector<VRegMaskOrUnit, `8`> LiveUses;
1537	BotRPTracker.recede(LiveUses: &LiveUses);
1538	updatePressureDiffs(LiveUses);
1539	}
1540
1541	LLVM_DEBUG(dbgs() << "Top Pressure: ";
1542	dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1543	dbgs() << "Bottom Pressure: ";
1544	dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1545
1546	assert((BotRPTracker.getPos() == RegionEnd \|\|
1547	(RegionEnd->isDebugInstr() &&
1548	BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1549	"Can't find the region bottom");
1550
1551	// Cache the list of excess pressure sets in this region. This will also track
1552	// the max pressure in the scheduled code for these sets.
1553	RegionCriticalPSets.clear();
1554	const std::vector<unsigned> &RegionPressure =
1555	RPTracker.getPressure().MaxSetPressure;
1556	for (unsigned i = `0`, e = RegionPressure.size(); i < e; ++i) {
1557	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: i);
1558	if (RegionPressure [i] > Limit) {
1559	LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1560	<< " Actual " << RegionPressure[i] << "\n");
1561	RegionCriticalPSets.push_back(x: PressureChange (i));
1562	}
1563	}
1564	LLVM_DEBUG({
1565	if (RegionCriticalPSets.size() > `0`) {
1566	dbgs() << "Excess PSets: ";
1567	for (const PressureChange &RCPS : RegionCriticalPSets)
1568	dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1569	dbgs() << "\n";
1570	}
1571	});
1572	}
1573
1574	void ScheduleDAGMILive::
1575	updateScheduledPressure(const SUnit *SU,
1576	const std::vector<unsigned> &NewMaxPressure) {
1577	const PressureDiff &PDiff = getPressureDiff(SU);
1578	unsigned CritIdx = `0`, CritEnd = RegionCriticalPSets.size();
1579	for (const PressureChange &PC : PDiff) {
1580	if (!PC.isValid())
1581	break;
1582	unsigned ID = PC.getPSet();
1583	while (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() < ID)
1584	++CritIdx;
1585	if (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() == ID) {
1586	if ((int)NewMaxPressure [ID] > RegionCriticalPSets [CritIdx].getUnitInc()
1587	&& NewMaxPressure [ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1588	RegionCriticalPSets [CritIdx].setUnitInc(NewMaxPressure [ID]);
1589	}
1590	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: ID);
1591	if (NewMaxPressure [ID] >= Limit - `2`) {
1592	LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1593	<< NewMaxPressure[ID]
1594	<< ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1595	<< Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1596	<< " livethru)\n");
1597	}
1598	}
1599	}
1600
1601	/// Update the PressureDiff array for liveness after scheduling this
1602	/// instruction.
1603	void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses) {
1604	for (const VRegMaskOrUnit &P : LiveUses) {
1605	Register Reg = P.RegUnit;
1606	/// FIXME: Currently assuming single-use physregs.
1607	if (!Reg.isVirtual())
1608	continue;
1609
1610	if (ShouldTrackLaneMasks) {
1611	// If the register has just become live then other uses won't change
1612	// this fact anymore => decrement pressure.
1613	// If the register has just become dead then other uses make it come
1614	// back to life => increment pressure.
1615	bool Decrement = P.LaneMask.any();
1616
1617	for (const VReg2SUnit &V2SU
1618	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1619	SUnit &SU = *V2SU.SU;
1620	if (SU.isScheduled \|\| &SU == &ExitSU)
1621	continue;
1622
1623	PressureDiff &PDiff = getPressureDiff(SU: &SU);
1624	PDiff.addPressureChange(RegUnit: Reg, IsDec: Decrement, MRI: &MRI);
1625	if (llvm::any_of(Range&: PDiff, P: [](const PressureChange &Change) {
1626	return Change.isValid();
1627	}))
1628	LLVM_DEBUG(dbgs()
1629	<< " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1630	<< printReg(Reg, TRI) << `':'`
1631	<< PrintLaneMask(P.LaneMask) << `' '` << *SU.getInstr();
1632	dbgs() << " to "; PDiff.dump(*TRI););
1633	}
1634	} else {
1635	assert(P.LaneMask.any());
1636	LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1637	// This may be called before CurrentBottom has been initialized. However,
1638	// BotRPTracker must have a valid position. We want the value live into the
1639	// instruction or live out of the block, so ask for the previous
1640	// instruction's live-out.
1641	const LiveInterval &LI = LIS->getInterval(Reg);
1642	VNInfo *VNI;
1643	MachineBasicBlock::const_iterator I =
1644	nextIfDebug(I: BotRPTracker.getPos(), End: BB->end());
1645	if (I == BB->end())
1646	VNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1647	else {
1648	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *I));
1649	VNI = LRQ.valueIn();
1650	}
1651	// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1652	assert(VNI && "No live value at use.");
1653	for (const VReg2SUnit &V2SU
1654	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1655	SUnit *SU = V2SU.SU;
1656	// If this use comes before the reaching def, it cannot be a last use,
1657	// so decrease its pressure change.
1658	if (!SU->isScheduled && SU != &ExitSU) {
1659	LiveQueryResult LRQ =
1660	LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1661	if (LRQ.valueIn() == VNI) {
1662	PressureDiff &PDiff = getPressureDiff(SU);
1663	PDiff.addPressureChange(RegUnit: Reg, IsDec: true, MRI: &MRI);
1664	if (llvm::any_of(Range&: PDiff, P: [](const PressureChange &Change) {
1665	return Change.isValid();
1666	}))
1667	LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1668	<< ") " << *SU->getInstr();
1669	dbgs() << " to ";
1670	PDiff.dump(*TRI););
1671	}
1672	}
1673	}
1674	}
1675	}
1676	}
1677
1678	void ScheduleDAGMILive::dump() const {
1679	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1680	if (EntrySU.getInstr() != nullptr)
1681	dumpNodeAll(EntrySU);
1682	for (const SUnit &SU : SUnits) {
1683	dumpNodeAll(SU);
1684	if (ShouldTrackPressure) {
1685	dbgs() << " Pressure Diff : ";
1686	getPressureDiff(&SU).dump(*TRI);
1687	}
1688	dbgs() << " Single Issue : ";
1689	if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1690	SchedModel.mustEndGroup(SU.getInstr()))
1691	dbgs() << "true;";
1692	else
1693	dbgs() << "false;";
1694	dbgs() << `'\n'`;
1695	}
1696	if (ExitSU.getInstr() != nullptr)
1697	dumpNodeAll(ExitSU);
1698	#endif
1699	}
1700
1701	/// schedule - Called back from MachineScheduler::runOnMachineFunction
1702	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1703	/// only includes instructions that have DAG nodes, not scheduling boundaries.
1704	///
1705	/// This is a skeletal driver, with all the functionality pushed into helpers,
1706	/// so that it can be easily extended by experimental schedulers. Generally,
1707	/// implementing MachineSchedStrategy should be sufficient to implement a new
1708	/// scheduling algorithm. However, if a scheduler further subclasses
1709	/// ScheduleDAGMILive then it will want to override this virtual method in order
1710	/// to update any specialized state.
1711	void ScheduleDAGMILive::schedule() {
1712	LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1713	LLVM_DEBUG(SchedImpl->dumpPolicy());
1714	buildDAGWithRegPressure();
1715
1716	postProcessDAG();
1717
1718	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
1719	findRootsAndBiasEdges(TopRoots, BotRoots);
1720
1721	// Initialize the strategy before modifying the DAG.
1722	// This may initialize a DFSResult to be used for queue priority.
1723	SchedImpl ->initialize(DAG: this);
1724
1725	LLVM_DEBUG(dump());
1726	if (PrintDAGs) dump();
1727	if (ViewMISchedDAGs) viewGraph();
1728
1729	// Initialize ready queues now that the DAG and priority data are finalized.
1730	initQueues(TopRoots, BotRoots);
1731
1732	bool IsTopNode = false;
1733	while (true) {
1734	if (!checkSchedLimit())
1735	break;
1736
1737	LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1738	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
1739	if (!SU) break;
1740
1741	assert(!SU->isScheduled && "Node already scheduled");
1742
1743	scheduleMI(SU, IsTopNode);
1744
1745	if (DFSResult) {
1746	unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1747	if (!ScheduledTrees.test(Idx: SubtreeID)) {
1748	ScheduledTrees.set(SubtreeID);
1749	DFSResult->scheduleTree(SubtreeID);
1750	SchedImpl ->scheduleTree(SubtreeID);
1751	}
1752	}
1753
1754	// Notify the scheduling strategy after updating the DAG.
1755	SchedImpl ->schedNode(SU, IsTopNode);
1756
1757	updateQueues(SU, IsTopNode);
1758	}
1759	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1760
1761	placeDebugValues();
1762
1763	LLVM_DEBUG({
1764	dbgs() << "*** Final schedule for "
1765	<< printMBBReference(begin()->getParent()) << " **\n";
1766	dumpSchedule();
1767	dbgs() << `'\n'`;
1768	});
1769	}
1770
1771	/// Build the DAG and setup three register pressure trackers.
1772	void ScheduleDAGMILive::buildDAGWithRegPressure() {
1773	if (!ShouldTrackPressure) {
1774	RPTracker.reset();
1775	RegionCriticalPSets.clear();
1776	buildSchedGraph(AA);
1777	return;
1778	}
1779
1780	// Initialize the register pressure tracker used by buildSchedGraph.
1781	RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1782	TrackLaneMasks: ShouldTrackLaneMasks, /TrackUntiedDefs=/true);
1783
1784	// Account for liveness generate by the region boundary.
1785	if (LiveRegionEnd != RegionEnd)
1786	RPTracker.recede();
1787
1788	// Build the DAG, and compute current register pressure.
1789	buildSchedGraph(AA, RPTracker: &RPTracker, PDiffs: &SUPressureDiffs, LIS, TrackLaneMasks: ShouldTrackLaneMasks);
1790
1791	// Initialize top/bottom trackers after computing region pressure.
1792	initRegPressure();
1793	}
1794
1795	void ScheduleDAGMILive::computeDFSResult() {
1796	if (!DFSResult)
1797	DFSResult = new SchedDFSResult (/BottomU/true, MinSubtreeSize);
1798	DFSResult->clear();
1799	ScheduledTrees.clear();
1800	DFSResult->resize(NumSUnits: SUnits.size());
1801	DFSResult->compute(SUnits);
1802	ScheduledTrees.resize(N: DFSResult->getNumSubtrees());
1803	}
1804
1805	/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1806	/// only provides the critical path for single block loops. To handle loops that
1807	/// span blocks, we could use the vreg path latencies provided by
1808	/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1809	/// available for use in the scheduler.
1810	///
1811	/// The cyclic path estimation identifies a def-use pair that crosses the back
1812	/// edge and considers the depth and height of the nodes. For example, consider
1813	/// the following instruction sequence where each instruction has unit latency
1814	/// and defines an eponymous virtual register:
1815	///
1816	/// a->b(a,c)->c(b)->d(c)->exit
1817	///
1818	/// The cyclic critical path is a two cycles: b->c->b
1819	/// The acyclic critical path is four cycles: a->b->c->d->exit
1820	/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1821	/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1822	/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1823	/// LiveInDepth = depth(b) = len(a->b) = 1
1824	///
1825	/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1826	/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1827	/// CyclicCriticalPath = min(2, 2) = 2
1828	///
1829	/// This could be relevant to PostRA scheduling, but is currently implemented
1830	/// assuming LiveIntervals.
1831	unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1832	// This only applies to single block loop.
1833	if (!BB->isSuccessor(MBB: BB))
1834	return `0`;
1835
1836	unsigned MaxCyclicLatency = `0`;
1837	// Visit each live out vreg def to find def/use pairs that cross iterations.
1838	for (const VRegMaskOrUnit &P : RPTracker.getPressure().LiveOutRegs) {
1839	Register Reg = P.RegUnit;
1840	if (!Reg.isVirtual())
1841	continue;
1842	const LiveInterval &LI = LIS->getInterval(Reg);
1843	const VNInfo *DefVNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1844	if (!DefVNI)
1845	continue;
1846
1847	MachineInstr *DefMI = LIS->getInstructionFromIndex(index: DefVNI->def);
1848	const SUnit *DefSU = getSUnit(MI: DefMI);
1849	if (!DefSU)
1850	continue;
1851
1852	unsigned LiveOutHeight = DefSU->getHeight();
1853	unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1854	// Visit all local users of the vreg def.
1855	for (const VReg2SUnit &V2SU
1856	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1857	SUnit *SU = V2SU.SU;
1858	if (SU == &ExitSU)
1859	continue;
1860
1861	// Only consider uses of the phi.
1862	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1863	if (!LRQ.valueIn()->isPHIDef())
1864	continue;
1865
1866	// Assume that a path spanning two iterations is a cycle, which could
1867	// overestimate in strange cases. This allows cyclic latency to be
1868	// estimated as the minimum slack of the vreg's depth or height.
1869	unsigned CyclicLatency = `0`;
1870	if (LiveOutDepth > SU->getDepth())
1871	CyclicLatency = LiveOutDepth - SU->getDepth();
1872
1873	unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1874	if (LiveInHeight > LiveOutHeight) {
1875	if (LiveInHeight - LiveOutHeight < CyclicLatency)
1876	CyclicLatency = LiveInHeight - LiveOutHeight;
1877	} else
1878	CyclicLatency = `0`;
1879
1880	LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1881	<< SU->NodeNum << ") = " << CyclicLatency << "c\n");
1882	if (CyclicLatency > MaxCyclicLatency)
1883	MaxCyclicLatency = CyclicLatency;
1884	}
1885	}
1886	LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1887	return MaxCyclicLatency;
1888	}
1889
1890	/// Release ExitSU predecessors and setup scheduler queues. Re-position
1891	/// the Top RP tracker in case the region beginning has changed.
1892	void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1893	ArrayRef<SUnit*> BotRoots) {
1894	ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1895	if (ShouldTrackPressure) {
1896	assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1897	TopRPTracker.setPos(CurrentTop);
1898	}
1899	}
1900
1901	/// Move an instruction and update register pressure.
1902	void ScheduleDAGMILive::scheduleMI(SUnit SU, bool* IsTopNode) {
1903	// Move the instruction to its new location in the instruction stream.
1904	MachineInstr *MI = SU->getInstr();
1905
1906	if (IsTopNode) {
1907	assert(SU->isTopReady() && "node still has unscheduled dependencies");
1908	if (&*CurrentTop == MI)
1909	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
1910	else {
1911	moveInstruction(MI, InsertPos: CurrentTop);
1912	TopRPTracker.setPos(MI);
1913	}
1914
1915	if (ShouldTrackPressure) {
1916	// Update top scheduled pressure.
1917	RegisterOperands RegOpers;
1918	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks,
1919	/IgnoreDead=/false);
1920	if (ShouldTrackLaneMasks) {
1921	// Adjust liveness and add missing dead+read-undef flags.
1922	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1923	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1924	} else {
1925	// Adjust for missing dead-def flags.
1926	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1927	}
1928
1929	TopRPTracker.advance(RegOpers);
1930	assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1931	LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1932	TopRPTracker.getRegSetPressureAtPos(), TRI););
1933
1934	updateScheduledPressure(SU, NewMaxPressure: TopRPTracker.getPressure().MaxSetPressure);
1935	}
1936	} else {
1937	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1938	MachineBasicBlock::iterator priorII =
1939	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
1940	if (&*priorII == MI)
1941	CurrentBottom = priorII;
1942	else {
1943	if (&*CurrentTop == MI) {
1944	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
1945	TopRPTracker.setPos(CurrentTop);
1946	}
1947	moveInstruction(MI, InsertPos: CurrentBottom);
1948	CurrentBottom = MI;
1949	BotRPTracker.setPos(CurrentBottom);
1950	}
1951	if (ShouldTrackPressure) {
1952	RegisterOperands RegOpers;
1953	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks,
1954	/IgnoreDead=/false);
1955	if (ShouldTrackLaneMasks) {
1956	// Adjust liveness and add missing dead+read-undef flags.
1957	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1958	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1959	} else {
1960	// Adjust for missing dead-def flags.
1961	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1962	}
1963
1964	if (BotRPTracker.getPos() != CurrentBottom)
1965	BotRPTracker.recedeSkipDebugValues();
1966	SmallVector<VRegMaskOrUnit, `8`> LiveUses;
1967	BotRPTracker.recede(RegOpers, LiveUses: &LiveUses);
1968	assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1969	LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1970	BotRPTracker.getRegSetPressureAtPos(), TRI););
1971
1972	updateScheduledPressure(SU, NewMaxPressure: BotRPTracker.getPressure().MaxSetPressure);
1973	updatePressureDiffs(LiveUses);
1974	}
1975	}
1976	}
1977
1978	//===----------------------------------------------------------------------===//
1979	// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1980	//===----------------------------------------------------------------------===//
1981
1982	namespace {
1983
1984	/// Post-process the DAG to create cluster edges between neighboring
1985	/// loads or between neighboring stores.
1986	class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1987	struct MemOpInfo {
1988	SUnit *SU;
1989	SmallVector<const MachineOperand *, `4`> BaseOps;
1990	int64_t Offset;
1991	LocationSize Width;
1992	bool OffsetIsScalable;
1993
1994	MemOpInfo(SUnit SU, ArrayRef<const* MachineOperand *> BaseOps,
1995	int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1996	: SU(SU), BaseOps (BaseOps), Offset(Offset), Width (Width),
1997	OffsetIsScalable(OffsetIsScalable) {}
1998
1999	static bool Compare(const MachineOperand *const &A,
2000	const MachineOperand *const &B) {
2001	if (A->getType() != B->getType())
2002	return A->getType() < B->getType();
2003	if (A->isReg())
2004	return A->getReg() < B->getReg();
2005	if (A->isFI()) {
2006	const MachineFunction &MF = *A->getParent()->getParent()->getParent();
2007	const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2008	bool StackGrowsDown = TFI.getStackGrowthDirection() ==
2009	TargetFrameLowering::StackGrowsDown;
2010	return StackGrowsDown ? A->getIndex() > B->getIndex()
2011	: A->getIndex() < B->getIndex();
2012	}
2013
2014	llvm_unreachable("MemOpClusterMutation only supports register or frame "
2015	"index bases.");
2016	}
2017
2018	bool operator<(const MemOpInfo &RHS) const {
2019	// FIXME: Don't compare everything twice. Maybe use C++20 three way
2020	// comparison instead when it's available.
2021	if (std::lexicographical_compare(first1: BaseOps.begin(), last1: BaseOps.end(),
2022	first2: RHS.BaseOps.begin(), last2: RHS.BaseOps.end(),
2023	comp: Compare))
2024	return true;
2025	if (std::lexicographical_compare(first1: RHS.BaseOps.begin(), last1: RHS.BaseOps.end(),
2026	first2: BaseOps.begin(), last2: BaseOps.end(), comp: Compare))
2027	return false;
2028	if (Offset != RHS.Offset)
2029	return Offset < RHS.Offset;
2030	return SU->NodeNum < RHS.SU->NodeNum;
2031	}
2032	};
2033
2034	const TargetInstrInfo *TII;
2035	const TargetRegisterInfo *TRI;
2036	bool IsLoad;
2037	bool ReorderWhileClustering;
2038
2039	public:
2040	BaseMemOpClusterMutation(const TargetInstrInfo *tii,
2041	const TargetRegisterInfo tri, bool* IsLoad,
2042	bool ReorderWhileClustering)
2043	: TII(tii), TRI(tri), IsLoad(IsLoad),
2044	ReorderWhileClustering(ReorderWhileClustering) {}
2045
2046	void apply(ScheduleDAGInstrs *DAGInstrs) override;
2047
2048	protected:
2049	void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
2050	ScheduleDAGInstrs *DAG);
2051	void collectMemOpRecords(std::vector<SUnit> &SUnits,
2052	SmallVectorImpl<MemOpInfo> &MemOpRecords);
2053	bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2054	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups);
2055	};
2056
2057	class StoreClusterMutation : public BaseMemOpClusterMutation {
2058	public:
2059	StoreClusterMutation(const TargetInstrInfo *tii,
2060	const TargetRegisterInfo *tri,
2061	bool ReorderWhileClustering)
2062	: BaseMemOpClusterMutation (tii, tri, false, ReorderWhileClustering) {}
2063	};
2064
2065	class LoadClusterMutation : public BaseMemOpClusterMutation {
2066	public:
2067	LoadClusterMutation(const TargetInstrInfo tii, const* TargetRegisterInfo *tri,
2068	bool ReorderWhileClustering)
2069	: BaseMemOpClusterMutation (tii, tri, true, ReorderWhileClustering) {}
2070	};
2071
2072	} // end anonymous namespace
2073
2074	namespace llvm {
2075
2076	std::unique_ptr<ScheduleDAGMutation>
2077	createLoadClusterDAGMutation(const TargetInstrInfo *TII,
2078	const TargetRegisterInfo *TRI,
2079	bool ReorderWhileClustering) {
2080	return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
2081	args&: TII, args&: TRI, args&: ReorderWhileClustering)
2082	: nullptr;
2083	}
2084
2085	std::unique_ptr<ScheduleDAGMutation>
2086	createStoreClusterDAGMutation(const TargetInstrInfo *TII,
2087	const TargetRegisterInfo *TRI,
2088	bool ReorderWhileClustering) {
2089	return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
2090	args&: TII, args&: TRI, args&: ReorderWhileClustering)
2091	: nullptr;
2092	}
2093
2094	} // end namespace llvm
2095
2096	// Sorting all the loads/stores first, then for each load/store, checking the
2097	// following load/store one by one, until reach the first non-dependent one and
2098	// call target hook to see if they can cluster.
2099	// If FastCluster is enabled, we assume that, all the loads/stores have been
2100	// preprocessed and now, they didn't have dependencies on each other.
2101	void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2102	ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2103	ScheduleDAGInstrs *DAG) {
2104	// Keep track of the current cluster length and bytes for each SUnit.
2105	DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
2106	EquivalenceClasses<SUnit *> Clusters;
2107
2108	// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2109	// cluster mem ops collected within `MemOpRecords` array.
2110	for (unsigned Idx = `0`, End = MemOpRecords.size(); Idx < (End - `1`); ++Idx) {
2111	// Decision to cluster mem ops is taken based on target dependent logic
2112	auto MemOpa = MemOpRecords [Idx];
2113
2114	// Seek for the next load/store to do the cluster.
2115	unsigned NextIdx = Idx + `1`;
2116	for (; NextIdx < End; ++NextIdx)
2117	// Skip if MemOpb has been clustered already or has dependency with
2118	// MemOpa.
2119	if (!SUnit2ClusterInfo.count(Val: MemOpRecords [NextIdx].SU->NodeNum) &&
2120	(FastCluster \|\|
2121	(!DAG->IsReachable(SU: MemOpRecords [NextIdx].SU, TargetSU: MemOpa.SU) &&
2122	!DAG->IsReachable(SU: MemOpa.SU, TargetSU: MemOpRecords [NextIdx].SU))))
2123	break;
2124	if (NextIdx == End)
2125	continue;
2126
2127	auto MemOpb = MemOpRecords [NextIdx];
2128	unsigned ClusterLength = `2`;
2129	unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2130	MemOpb.Width.getValue().getKnownMinValue();
2131	auto It = SUnit2ClusterInfo.find(Val: MemOpa.SU->NodeNum);
2132	if (It != SUnit2ClusterInfo.end()) {
2133	const auto &[Len, Bytes] = It ->second;
2134	ClusterLength = Len + `1`;
2135	CurrentClusterBytes = Bytes + MemOpb.Width.getValue().getKnownMinValue();
2136	}
2137
2138	if (!TII->shouldClusterMemOps(BaseOps1: MemOpa.BaseOps, Offset1: MemOpa.Offset,
2139	OffsetIsScalable1: MemOpa.OffsetIsScalable, BaseOps2: MemOpb.BaseOps,
2140	Offset2: MemOpb.Offset, OffsetIsScalable2: MemOpb.OffsetIsScalable,
2141	ClusterSize: ClusterLength, NumBytes: CurrentClusterBytes))
2142	continue;
2143
2144	SUnit *SUa = MemOpa.SU;
2145	SUnit *SUb = MemOpb.SU;
2146
2147	if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2148	std::swap(a&: SUa, b&: SUb);
2149
2150	// FIXME: Is this check really required?
2151	if (!DAG->addEdge(SuccSU: SUb, PredDep: SDep (SUa, SDep::Cluster)))
2152	continue;
2153
2154	Clusters.unionSets(V1: SUa, V2: SUb);
2155	LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2156	<< SUb->NodeNum << ")\n");
2157	++NumClustered;
2158
2159	if (IsLoad) {
2160	// Copy successor edges from SUa to SUb. Interleaving computation
2161	// dependent on SUa can prevent load combining due to register reuse.
2162	// Predecessor edges do not need to be copied from SUb to SUa since
2163	// nearby loads should have effectively the same inputs.
2164	for (const SDep &Succ : SUa->Succs) {
2165	if (Succ.getSUnit() == SUb)
2166	continue;
2167	LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2168	<< ")\n");
2169	DAG->addEdge(SuccSU: Succ.getSUnit(), PredDep: SDep (SUb, SDep::Artificial));
2170	}
2171	} else {
2172	// Copy predecessor edges from SUb to SUa to avoid the SUnits that
2173	// SUb dependent on scheduled in-between SUb and SUa. Successor edges
2174	// do not need to be copied from SUa to SUb since no one will depend
2175	// on stores.
2176	// Notice that, we don't need to care about the memory dependency as
2177	// we won't try to cluster them if they have any memory dependency.
2178	for (const SDep &Pred : SUb->Preds) {
2179	if (Pred.getSUnit() == SUa)
2180	continue;
2181	LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2182	<< ")\n");
2183	DAG->addEdge(SuccSU: SUa, PredDep: SDep (Pred.getSUnit(), SDep::Artificial));
2184	}
2185	}
2186
2187	SUnit2ClusterInfo [MemOpb.SU->NodeNum] = {ClusterLength,
2188	CurrentClusterBytes};
2189
2190	LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2191	<< ", Curr cluster bytes: " << CurrentClusterBytes
2192	<< "\n");
2193	}
2194
2195	// Add cluster group information.
2196	// Iterate over all of the equivalence sets.
2197	auto &AllClusters = DAG->getClusters();
2198	for (const EquivalenceClasses<SUnit >::ECValue I : Clusters) {
2199	if (!I->isLeader())
2200	continue;
2201	ClusterInfo Group;
2202	unsigned ClusterIdx = AllClusters.size();
2203	for (SUnit MemberI : Clusters.members(ECV: I)) {
2204	MemberI->ParentClusterIdx = ClusterIdx;
2205	Group.insert(Ptr: MemberI);
2206	}
2207	AllClusters.push_back(Elt: Group);
2208	}
2209	}
2210
2211	void BaseMemOpClusterMutation::collectMemOpRecords(
2212	std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2213	for (auto &SU : SUnits) {
2214	if ((IsLoad && !SU.getInstr()->mayLoad()) \|\|
2215	(!IsLoad && !SU.getInstr()->mayStore()))
2216	continue;
2217
2218	const MachineInstr &MI = *SU.getInstr();
2219	SmallVector<const MachineOperand *, `4`> BaseOps;
2220	int64_t Offset;
2221	bool OffsetIsScalable;
2222	LocationSize Width = LocationSize::precise(Value: `0`);
2223	if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
2224	OffsetIsScalable, Width, TRI)) {
2225	if (!Width.hasValue())
2226	continue;
2227
2228	MemOpRecords.push_back(
2229	Elt: MemOpInfo (&SU, BaseOps, Offset, OffsetIsScalable, Width));
2230
2231	LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2232	<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
2233	<< ", Width: " << Width << "\n");
2234	}
2235	#ifndef NDEBUG
2236	for (const auto *Op : BaseOps)
2237	assert(Op);
2238	#endif
2239	}
2240	}
2241
2242	bool BaseMemOpClusterMutation::groupMemOps(
2243	ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2244	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups) {
2245	bool FastCluster =
2246	ForceFastCluster \|\|
2247	MemOps.size() * DAG->SUnits.size() / `1000` > FastClusterThreshold;
2248
2249	for (const auto &MemOp : MemOps) {
2250	unsigned ChainPredID = DAG->SUnits.size();
2251	if (FastCluster) {
2252	for (const SDep &Pred : MemOp.SU->Preds) {
2253	// We only want to cluster the mem ops that have the same ctrl(non-data)
2254	// pred so that they didn't have ctrl dependency for each other. But for
2255	// store instrs, we can still cluster them if the pred is load instr.
2256	if ((Pred.isCtrl() &&
2257	(IsLoad \|\|
2258	(Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2259	!Pred.isArtificial()) {
2260	ChainPredID = Pred.getSUnit()->NodeNum;
2261	break;
2262	}
2263	}
2264	} else
2265	ChainPredID = `0`;
2266
2267	Groups [ChainPredID].push_back(Elt: MemOp);
2268	}
2269	return FastCluster;
2270	}
2271
2272	/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2273	void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2274	// Collect all the clusterable loads/stores
2275	SmallVector<MemOpInfo, `32`> MemOpRecords;
2276	collectMemOpRecords(SUnits&: DAG->SUnits, MemOpRecords);
2277
2278	if (MemOpRecords.size() < `2`)
2279	return;
2280
2281	// Put the loads/stores without dependency into the same group with some
2282	// heuristic if the DAG is too complex to avoid compiling time blow up.
2283	// Notice that, some fusion pair could be lost with this.
2284	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> Groups;
2285	bool FastCluster = groupMemOps(MemOps: MemOpRecords, DAG, Groups);
2286
2287	for (auto &Group : Groups) {
2288	// Sorting the loads/stores, so that, we can stop the cluster as early as
2289	// possible.
2290	llvm::sort(C&: Group.second);
2291
2292	// Trying to cluster all the neighboring loads/stores.
2293	clusterNeighboringMemOps(MemOpRecords: Group.second, FastCluster, DAG);
2294	}
2295	}
2296
2297	//===----------------------------------------------------------------------===//
2298	// CopyConstrain - DAG post-processing to encourage copy elimination.
2299	//===----------------------------------------------------------------------===//
2300
2301	namespace {
2302
2303	/// Post-process the DAG to create weak edges from all uses of a copy to
2304	/// the one use that defines the copy's source vreg, most likely an induction
2305	/// variable increment.
2306	class CopyConstrain : public ScheduleDAGMutation {
2307	// Transient state.
2308	SlotIndex RegionBeginIdx;
2309
2310	// RegionEndIdx is the slot index of the last non-debug instruction in the
2311	// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2312	SlotIndex RegionEndIdx;
2313
2314	public:
2315	CopyConstrain(const TargetInstrInfo , const* TargetRegisterInfo *) {}
2316
2317	void apply(ScheduleDAGInstrs *DAGInstrs) override;
2318
2319	protected:
2320	void constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG);
2321	};
2322
2323	} // end anonymous namespace
2324
2325	namespace llvm {
2326
2327	std::unique_ptr<ScheduleDAGMutation>
2328	createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
2329	const TargetRegisterInfo *TRI) {
2330	return std::make_unique<CopyConstrain>(args&: TII, args&: TRI);
2331	}
2332
2333	} // end namespace llvm
2334
2335	/// constrainLocalCopy handles two possibilities:
2336	/// 1) Local src:
2337	/// I0: = dst
2338	/// I1: src = ...
2339	/// I2: = dst
2340	/// I3: dst = src (copy)
2341	/// (create pred->succ edges I0->I1, I2->I1)
2342	///
2343	/// 2) Local copy:
2344	/// I0: dst = src (copy)
2345	/// I1: = dst
2346	/// I2: src = ...
2347	/// I3: = dst
2348	/// (create pred->succ edges I1->I2, I3->I2)
2349	///
2350	/// Although the MachineScheduler is currently constrained to single blocks,
2351	/// this algorithm should handle extended blocks. An EBB is a set of
2352	/// contiguously numbered blocks such that the previous block in the EBB is
2353	/// always the single predecessor.
2354	void CopyConstrain::constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG) {
2355	LiveIntervals *LIS = DAG->getLIS();
2356	MachineInstr *Copy = CopySU->getInstr();
2357
2358	// Check for pure vreg copies.
2359	const MachineOperand &SrcOp = Copy->getOperand(i: `1`);
2360	Register SrcReg = SrcOp.getReg();
2361	if (!SrcReg.isVirtual() \|\| !SrcOp.readsReg())
2362	return;
2363
2364	const MachineOperand &DstOp = Copy->getOperand(i: `0`);
2365	Register DstReg = DstOp.getReg();
2366	if (!DstReg.isVirtual() \|\| DstOp.isDead())
2367	return;
2368
2369	// Check if either the dest or source is local. If it's live across a back
2370	// edge, it's not local. Note that if both vregs are live across the back
2371	// edge, we cannot successfully contrain the copy without cyclic scheduling.
2372	// If both the copy's source and dest are local live intervals, then we
2373	// should treat the dest as the global for the purpose of adding
2374	// constraints. This adds edges from source's other uses to the copy.
2375	unsigned LocalReg = SrcReg;
2376	unsigned GlobalReg = DstReg;
2377	LiveInterval *LocalLI = &LIS->getInterval(Reg: LocalReg);
2378	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx)) {
2379	LocalReg = DstReg;
2380	GlobalReg = SrcReg;
2381	LocalLI = &LIS->getInterval(Reg: LocalReg);
2382	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx))
2383	return;
2384	}
2385	LiveInterval *GlobalLI = &LIS->getInterval(Reg: GlobalReg);
2386
2387	// Find the global segment after the start of the local LI.
2388	LiveInterval::iterator GlobalSegment = GlobalLI->find(Pos: LocalLI->beginIndex());
2389	// If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2390	// local live range. We could create edges from other global uses to the local
2391	// start, but the coalescer should have already eliminated these cases, so
2392	// don't bother dealing with it.
2393	if (GlobalSegment == GlobalLI->end())
2394	return;
2395
2396	// If GlobalSegment is killed at the LocalLI->start, the call to find()
2397	// returned the next global segment. But if GlobalSegment overlaps with
2398	// LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2399	// exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2400	if (GlobalSegment->contains(I: LocalLI->beginIndex()))
2401	++GlobalSegment;
2402
2403	if (GlobalSegment == GlobalLI->end())
2404	return;
2405
2406	// Check if GlobalLI contains a hole in the vicinity of LocalLI.
2407	if (GlobalSegment != GlobalLI->begin()) {
2408	// Two address defs have no hole.
2409	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->end,
2410	B: GlobalSegment->start)) {
2411	return;
2412	}
2413	// If the prior global segment may be defined by the same two-address
2414	// instruction that also defines LocalLI, then can't make a hole here.
2415	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->start,
2416	B: LocalLI->beginIndex())) {
2417	return;
2418	}
2419	// If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2420	// it would be a disconnected component in the live range.
2421	assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2422	"Disconnected LRG within the scheduling region.");
2423	}
2424	MachineInstr *GlobalDef = LIS->getInstructionFromIndex(index: GlobalSegment->start);
2425	if (!GlobalDef)
2426	return;
2427
2428	SUnit *GlobalSU = DAG->getSUnit(MI: GlobalDef);
2429	if (!GlobalSU)
2430	return;
2431
2432	// GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2433	// constraining the uses of the last local def to precede GlobalDef.
2434	SmallVector<SUnit*,`8`> LocalUses;
2435	const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(Idx: LocalLI->endIndex());
2436	MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(index: LastLocalVN->def);
2437	SUnit *LastLocalSU = DAG->getSUnit(MI: LastLocalDef);
2438	for (const SDep &Succ : LastLocalSU->Succs) {
2439	if (Succ.getKind() != SDep::Data \|\| Succ.getReg() != LocalReg)
2440	continue;
2441	if (Succ.getSUnit() == GlobalSU)
2442	continue;
2443	if (!DAG->canAddEdge(SuccSU: GlobalSU, PredSU: Succ.getSUnit()))
2444	return;
2445	LocalUses.push_back(Elt: Succ.getSUnit());
2446	}
2447	// Open the top of the GlobalLI hole by constraining any earlier global uses
2448	// to precede the start of LocalLI.
2449	SmallVector<SUnit*,`8`> GlobalUses;
2450	MachineInstr *FirstLocalDef =
2451	LIS->getInstructionFromIndex(index: LocalLI->beginIndex());
2452	SUnit *FirstLocalSU = DAG->getSUnit(MI: FirstLocalDef);
2453	for (const SDep &Pred : GlobalSU->Preds) {
2454	if (Pred.getKind() != SDep::Anti \|\| Pred.getReg() != GlobalReg)
2455	continue;
2456	if (Pred.getSUnit() == FirstLocalSU)
2457	continue;
2458	if (!DAG->canAddEdge(SuccSU: FirstLocalSU, PredSU: Pred.getSUnit()))
2459	return;
2460	GlobalUses.push_back(Elt: Pred.getSUnit());
2461	}
2462	LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2463	// Add the weak edges.
2464	for (SUnit *LU : LocalUses) {
2465	LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2466	<< GlobalSU->NodeNum << ")\n");
2467	DAG->addEdge(SuccSU: GlobalSU, PredDep: SDep (LU, SDep::Weak));
2468	}
2469	for (SUnit *GU : GlobalUses) {
2470	LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2471	<< FirstLocalSU->NodeNum << ")\n");
2472	DAG->addEdge(SuccSU: FirstLocalSU, PredDep: SDep (GU, SDep::Weak));
2473	}
2474	}
2475
2476	/// Callback from DAG postProcessing to create weak edges to encourage
2477	/// copy elimination.
2478	void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2479	ScheduleDAGMI DAG = static_cast<ScheduleDAGMI>(DAGInstrs);
2480	assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2481
2482	MachineBasicBlock::iterator FirstPos = nextIfDebug(I: DAG->begin(), End: DAG->end());
2483	if (FirstPos == DAG->end())
2484	return;
2485	RegionBeginIdx = DAG->getLIS()->getInstructionIndex(Instr: *FirstPos);
2486	RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2487	Instr: *priorNonDebug(I: DAG->end(), Beg: DAG->begin()));
2488
2489	for (SUnit &SU : DAG->SUnits) {
2490	if (!SU.getInstr()->isCopy())
2491	continue;
2492
2493	constrainLocalCopy(CopySU: &SU, DAG: static_cast<ScheduleDAGMILive*>(DAG));
2494	}
2495	}
2496
2497	//===----------------------------------------------------------------------===//
2498	// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2499	// and possibly other custom schedulers.
2500	//===----------------------------------------------------------------------===//
2501
2502	static const unsigned InvalidCycle = ~`0U`;
2503
2504	SchedBoundary::~SchedBoundary() { delete HazardRec; }
2505
2506	/// Given a Count of resource usage and a Latency value, return true if a
2507	/// SchedBoundary becomes resource limited.
2508	/// If we are checking after scheduling a node, we should return true when
2509	/// we just reach the resource limit.
2510	static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2511	unsigned Latency, bool AfterSchedNode) {
2512	int ResCntFactor = (int)(Count - (Latency * LFactor));
2513	if (AfterSchedNode)
2514	return ResCntFactor >= (int)LFactor;
2515	else
2516	return ResCntFactor > (int)LFactor;
2517	}
2518
2519	void SchedBoundary::reset() {
2520	// A new HazardRec is created for each DAG and owned by SchedBoundary.
2521	// Destroying and reconstructing it is very expensive though. So keep
2522	// invalid, placeholder HazardRecs.
2523	if (HazardRec && HazardRec->isEnabled()) {
2524	delete HazardRec;
2525	HazardRec = nullptr;
2526	}
2527	Available.clear();
2528	Pending.clear();
2529	CheckPending = false;
2530	CurrCycle = `0`;
2531	CurrMOps = `0`;
2532	MinReadyCycle = std::numeric_limits<unsigned>::max();
2533	ExpectedLatency = `0`;
2534	DependentLatency = `0`;
2535	RetiredMOps = `0`;
2536	MaxExecutedResCount = `0`;
2537	ZoneCritResIdx = `0`;
2538	IsResourceLimited = false;
2539	ReservedCycles.clear();
2540	ReservedResourceSegments.clear();
2541	ReservedCyclesIndex.clear();
2542	ResourceGroupSubUnitMasks.clear();
2543	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2544	// Track the maximum number of stall cycles that could arise either from the
2545	// latency of a DAG edge or the number of cycles that a processor resource is
2546	// reserved (SchedBoundary::ReservedCycles).
2547	MaxObservedStall = `0`;
2548	#endif
2549	// Reserve a zero-count for invalid CritResIdx.
2550	ExecutedResCounts.resize(N: `1`);
2551	assert(!ExecutedResCounts[`0`] && "nonzero count for bad resource");
2552	}
2553
2554	void SchedRemainder::
2555	init(ScheduleDAGMI DAG, const* TargetSchedModel *SchedModel) {
2556	reset();
2557	if (!SchedModel->hasInstrSchedModel())
2558	return;
2559	RemainingCounts.resize(N: SchedModel->getNumProcResourceKinds());
2560	for (SUnit &SU : DAG->SUnits) {
2561	const MCSchedClassDesc *SC = DAG->getSchedClass(SU: &SU);
2562	RemIssueCount += SchedModel->getNumMicroOps(MI: SU.getInstr(), SC)
2563	* SchedModel->getMicroOpFactor();
2564	for (TargetSchedModel::ProcResIter
2565	PI = SchedModel->getWriteProcResBegin(SC),
2566	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2567	unsigned PIdx = PI->ProcResourceIdx;
2568	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2569	assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2570	RemainingCounts [PIdx] +=
2571	(Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2572	}
2573	}
2574	}
2575
2576	void SchedBoundary::
2577	init(ScheduleDAGMI dag, const* TargetSchedModel smodel, SchedRemainder rem) {
2578	reset();
2579	DAG = dag;
2580	SchedModel = smodel;
2581	Rem = rem;
2582	if (SchedModel->hasInstrSchedModel()) {
2583	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2584	ReservedCyclesIndex.resize(N: ResourceCount);
2585	ExecutedResCounts.resize(N: ResourceCount);
2586	ResourceGroupSubUnitMasks.resize(N: ResourceCount, NV: APInt (ResourceCount, `0`));
2587	unsigned NumUnits = `0`;
2588
2589	for (unsigned i = `0`; i < ResourceCount; ++i) {
2590	ReservedCyclesIndex [i] = NumUnits;
2591	NumUnits += SchedModel->getProcResource(PIdx: i)->NumUnits;
2592	if (isUnbufferedGroup(PIdx: i)) {
2593	auto SubUnits = SchedModel->getProcResource(PIdx: i)->SubUnitsIdxBegin;
2594	for (unsigned U = `0`, UE = SchedModel->getProcResource(PIdx: i)->NumUnits;
2595	U != UE; ++U)
2596	ResourceGroupSubUnitMasks [i].setBit(SubUnits[U]);
2597	}
2598	}
2599
2600	ReservedCycles.resize(new_size: NumUnits, x: InvalidCycle);
2601	}
2602	}
2603
2604	/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2605	/// these "soft stalls" differently than the hard stall cycles based on CPU
2606	/// resources and computed by checkHazard(). A fully in-order model
2607	/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2608	/// available for scheduling until they are ready. However, a weaker in-order
2609	/// model may use this for heuristics. For example, if a processor has in-order
2610	/// behavior when reading certain resources, this may come into play.
2611	unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2612	if (!SU->isUnbuffered)
2613	return `0`;
2614
2615	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2616	if (ReadyCycle > CurrCycle)
2617	return ReadyCycle - CurrCycle;
2618	return `0`;
2619	}
2620
2621	/// Compute the next cycle at which the given processor resource unit
2622	/// can be scheduled.
2623	unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2624	unsigned ReleaseAtCycle,
2625	unsigned AcquireAtCycle) {
2626	if (SchedModel && SchedModel->enableIntervals()) {
2627	if (isTop())
2628	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromTop(
2629	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2630
2631	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromBottom(
2632	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2633	}
2634
2635	unsigned NextUnreserved = ReservedCycles [InstanceIdx];
2636	// If this resource has never been used, always return cycle zero.
2637	if (NextUnreserved == InvalidCycle)
2638	return CurrCycle;
2639	// For bottom-up scheduling add the cycles needed for the current operation.
2640	if (!isTop())
2641	NextUnreserved = std::max(a: CurrCycle, b: NextUnreserved + ReleaseAtCycle);
2642	return NextUnreserved;
2643	}
2644
2645	/// Compute the next cycle at which the given processor resource can be
2646	/// scheduled. Returns the next cycle and the index of the processor resource
2647	/// instance in the reserved cycles vector.
2648	std::pair<unsigned, unsigned>
2649	SchedBoundary::getNextResourceCycle(const MCSchedClassDesc SC, unsigned* PIdx,
2650	unsigned ReleaseAtCycle,
2651	unsigned AcquireAtCycle) {
2652	if (MischedDetailResourceBooking) {
2653	LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2654	LLVM_DEBUG(dumpReservedCycles());
2655	LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2656	}
2657	unsigned MinNextUnreserved = InvalidCycle;
2658	unsigned InstanceIdx = `0`;
2659	unsigned StartIndex = ReservedCyclesIndex [PIdx];
2660	unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2661	assert(NumberOfInstances > `0` &&
2662	"Cannot have zero instances of a ProcResource");
2663
2664	if (isUnbufferedGroup(PIdx)) {
2665	// If any subunits are used by the instruction, report that the
2666	// subunits of the resource group are available at the first cycle
2667	// in which the unit is available, effectively removing the group
2668	// record from hazarding and basing the hazarding decisions on the
2669	// subunit records. Otherwise, choose the first available instance
2670	// from among the subunits. Specifications which assign cycles to
2671	// both the subunits and the group or which use an unbuffered
2672	// group with buffered subunits will appear to schedule
2673	// strangely. In the first case, the additional cycles for the
2674	// group will be ignored. In the second, the group will be
2675	// ignored entirely.
2676	for (const MCWriteProcResEntry &PE :
2677	make_range(x: SchedModel->getWriteProcResBegin(SC),
2678	y: SchedModel->getWriteProcResEnd(SC)))
2679	if (ResourceGroupSubUnitMasks [PIdx][PE.ProcResourceIdx])
2680	return std::make_pair(x: getNextResourceCycleByInstance(
2681	InstanceIdx: StartIndex, ReleaseAtCycle, AcquireAtCycle),
2682	y&: StartIndex);
2683
2684	auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2685	for (unsigned I = `0`, End = NumberOfInstances; I < End; ++I) {
2686	unsigned NextUnreserved, NextInstanceIdx;
2687	std::tie(args&: NextUnreserved, args&: NextInstanceIdx) =
2688	getNextResourceCycle(SC, PIdx: SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2689	if (MinNextUnreserved > NextUnreserved) {
2690	InstanceIdx = NextInstanceIdx;
2691	MinNextUnreserved = NextUnreserved;
2692	}
2693	}
2694	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2695	}
2696
2697	for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2698	++I) {
2699	unsigned NextUnreserved =
2700	getNextResourceCycleByInstance(InstanceIdx: I, ReleaseAtCycle, AcquireAtCycle);
2701	if (MischedDetailResourceBooking)
2702	LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2703	<< NextUnreserved << "c\n");
2704	if (MinNextUnreserved > NextUnreserved) {
2705	InstanceIdx = I;
2706	MinNextUnreserved = NextUnreserved;
2707	}
2708	}
2709	if (MischedDetailResourceBooking)
2710	LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2711	<< "[" << InstanceIdx - StartIndex << "]"
2712	<< " available @" << MinNextUnreserved << "c"
2713	<< "\n");
2714	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2715	}
2716
2717	/// Does this SU have a hazard within the current instruction group.
2718	///
2719	/// The scheduler supports two modes of hazard recognition. The first is the
2720	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2721	/// supports highly complicated in-order reservation tables
2722	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2723	///
2724	/// The second is a streamlined mechanism that checks for hazards based on
2725	/// simple counters that the scheduler itself maintains. It explicitly checks
2726	/// for instruction dispatch limitations, including the number of micro-ops that
2727	/// can dispatch per cycle.
2728	///
2729	/// TODO: Also check whether the SU must start a new group.
2730	bool SchedBoundary::checkHazard(SUnit *SU) {
2731	if (HazardRec->isEnabled()
2732	&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2733	LLVM_DEBUG(dbgs().indent(`2`)
2734	<< "hazard: SU(" << SU->NodeNum << ") reported by HazardRec\n");
2735	return true;
2736	}
2737
2738	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
2739	if ((CurrMOps > `0`) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2740	LLVM_DEBUG(dbgs().indent(`2`) << "hazard: SU(" << SU->NodeNum << ") uops="
2741	<< uops << ", CurrMOps = " << CurrMOps << ", "
2742	<< "CurrMOps + uops > issue width of "
2743	<< SchedModel->getIssueWidth() << "\n");
2744	return true;
2745	}
2746
2747	if (CurrMOps > `0` &&
2748	((isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr())) \|\|
2749	(!isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())))) {
2750	LLVM_DEBUG(dbgs().indent(`2`) << "hazard: SU(" << SU->NodeNum << ") must "
2751	<< (isTop() ? "begin" : "end") << " group\n");
2752	return true;
2753	}
2754
2755	if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2756	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2757	for (const MCWriteProcResEntry &PE :
2758	make_range(x: SchedModel->getWriteProcResBegin(SC),
2759	y: SchedModel->getWriteProcResEnd(SC))) {
2760	unsigned ResIdx = PE.ProcResourceIdx;
2761	unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2762	unsigned AcquireAtCycle = PE.AcquireAtCycle;
2763	unsigned NRCycle, InstanceIdx;
2764	std::tie(args&: NRCycle, args&: InstanceIdx) =
2765	getNextResourceCycle(SC, PIdx: ResIdx, ReleaseAtCycle, AcquireAtCycle);
2766	if (NRCycle > CurrCycle) {
2767	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2768	MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2769	#endif
2770	LLVM_DEBUG(dbgs().indent(`2`)
2771	<< "hazard: SU(" << SU->NodeNum << ") "
2772	<< SchedModel->getResourceName(ResIdx) << `'['`
2773	<< InstanceIdx - ReservedCyclesIndex[ResIdx] << `']'` << "="
2774	<< NRCycle << "c, is later than "
2775	<< "CurrCycle = " << CurrCycle << "c\n");
2776	return true;
2777	}
2778	}
2779	}
2780	return false;
2781	}
2782
2783	// Find the unscheduled node in ReadySUs with the highest latency.
2784	unsigned SchedBoundary::
2785	findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2786	SUnit LateSU = nullptr*;
2787	unsigned RemLatency = `0`;
2788	for (SUnit *SU : ReadySUs) {
2789	unsigned L = getUnscheduledLatency(SU);
2790	if (L > RemLatency) {
2791	RemLatency = L;
2792	LateSU = SU;
2793	}
2794	}
2795	if (LateSU) {
2796	LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2797	<< LateSU->NodeNum << ") " << RemLatency << "c\n");
2798	}
2799	return RemLatency;
2800	}
2801
2802	// Count resources in this zone and the remaining unscheduled
2803	// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2804	// resource index, or zero if the zone is issue limited.
2805	unsigned SchedBoundary::
2806	getOtherResourceCount(unsigned &OtherCritIdx) {
2807	OtherCritIdx = `0`;
2808	if (!SchedModel->hasInstrSchedModel())
2809	return `0`;
2810
2811	unsigned OtherCritCount = Rem->RemIssueCount
2812	+ (RetiredMOps * SchedModel->getMicroOpFactor());
2813	LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2814	<< OtherCritCount / SchedModel->getMicroOpFactor() << `'\n'`);
2815	for (unsigned PIdx = `1`, PEnd = SchedModel->getNumProcResourceKinds();
2816	PIdx != PEnd; ++PIdx) {
2817	unsigned OtherCount = getResourceCount(ResIdx: PIdx) + Rem->RemainingCounts [PIdx];
2818	if (OtherCount > OtherCritCount) {
2819	OtherCritCount = OtherCount;
2820	OtherCritIdx = PIdx;
2821	}
2822	}
2823	if (OtherCritIdx) {
2824	LLVM_DEBUG(
2825	dbgs() << " " << Available.getName() << " + Remain CritRes: "
2826	<< OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2827	<< " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2828	}
2829	return OtherCritCount;
2830	}
2831
2832	void SchedBoundary::releaseNode(SUnit SU, unsigned* ReadyCycle, bool InPQueue,
2833	unsigned Idx) {
2834	assert(SU->getInstr() && "Scheduled SUnit must have instr");
2835
2836	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2837	// ReadyCycle was been bumped up to the CurrCycle when this node was
2838	// scheduled, but CurrCycle may have been eagerly advanced immediately after
2839	// scheduling, so may now be greater than ReadyCycle.
2840	if (ReadyCycle > CurrCycle)
2841	MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2842	#endif
2843
2844	if (ReadyCycle < MinReadyCycle)
2845	MinReadyCycle = ReadyCycle;
2846
2847	// Check for interlocks first. For the purpose of other heuristics, an
2848	// instruction that cannot issue appears as if it's not in the ReadyQueue.
2849	bool IsBuffered = SchedModel->getMicroOpBufferSize() != `0`;
2850	bool HazardDetected = !IsBuffered && ReadyCycle > CurrCycle;
2851	if (HazardDetected)
2852	LLVM_DEBUG(dbgs().indent(`2`) << "hazard: SU(" << SU->NodeNum
2853	<< ") ReadyCycle = " << ReadyCycle
2854	<< " is later than CurrCycle = " << CurrCycle
2855	<< " on an unbuffered resource" << "\n");
2856	else
2857	HazardDetected = checkHazard(SU);
2858
2859	if (!HazardDetected && Available.size() >= ReadyListLimit) {
2860	HazardDetected = true;
2861	LLVM_DEBUG(dbgs().indent(`2`) << "hazard: Available Q is full (size: "
2862	<< Available.size() << ")\n");
2863	}
2864
2865	if (!HazardDetected) {
2866	Available.push(SU);
2867	LLVM_DEBUG(dbgs().indent(`2`)
2868	<< "Move SU(" << SU->NodeNum << ") into Available Q\n");
2869
2870	if (InPQueue)
2871	Pending.remove(I: Pending.begin() + Idx);
2872	return;
2873	}
2874
2875	if (!InPQueue)
2876	Pending.push(SU);
2877	}
2878
2879	/// Move the boundary of scheduled code by one cycle.
2880	void SchedBoundary::bumpCycle(unsigned NextCycle) {
2881	if (SchedModel->getMicroOpBufferSize() == `0`) {
2882	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2883	"MinReadyCycle uninitialized");
2884	if (MinReadyCycle > NextCycle)
2885	NextCycle = MinReadyCycle;
2886	}
2887	// Update the current micro-ops, which will issue in the next cycle.
2888	unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2889	CurrMOps = (CurrMOps <= DecMOps) ? `0` : CurrMOps - DecMOps;
2890
2891	// Decrement DependentLatency based on the next cycle.
2892	if ((NextCycle - CurrCycle) > DependentLatency)
2893	DependentLatency = `0`;
2894	else
2895	DependentLatency -= (NextCycle - CurrCycle);
2896
2897	if (!HazardRec->isEnabled()) {
2898	// Bypass HazardRec virtual calls.
2899	CurrCycle = NextCycle;
2900	} else {
2901	// Bypass getHazardType calls in case of long latency.
2902	for (; CurrCycle != NextCycle; ++CurrCycle) {
2903	if (isTop())
2904	HazardRec->AdvanceCycle();
2905	else
2906	HazardRec->RecedeCycle();
2907	}
2908	}
2909	CheckPending = true;
2910	IsResourceLimited =
2911	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2912	Latency: getScheduledLatency(), AfterSchedNode: true);
2913
2914	LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << `' '` << Available.getName()
2915	<< `'\n'`);
2916	}
2917
2918	void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2919	ExecutedResCounts [PIdx] += Count;
2920	if (ExecutedResCounts [PIdx] > MaxExecutedResCount)
2921	MaxExecutedResCount = ExecutedResCounts [PIdx];
2922	}
2923
2924	/// Add the given processor resource to this scheduled zone.
2925	///
2926	/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2927	/// cycles during which this resource is released.
2928	///
2929	/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2930	/// cycles at which the resource is aquired after issue (assuming no stalls).
2931	///
2932	/// \return the next cycle at which the instruction may execute without
2933	/// oversubscribing resources.
2934	unsigned SchedBoundary::countResource(const MCSchedClassDesc SC, unsigned* PIdx,
2935	unsigned ReleaseAtCycle,
2936	unsigned NextCycle,
2937	unsigned AcquireAtCycle) {
2938	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2939	unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2940	LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2941	<< ReleaseAtCycle << "x" << Factor << "u\n");
2942
2943	// Update Executed resources counts.
2944	incExecutedResources(PIdx, Count);
2945	assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2946	Rem->RemainingCounts [PIdx] -= Count;
2947
2948	// Check if this resource exceeds the current critical resource. If so, it
2949	// becomes the critical resource.
2950	if (ZoneCritResIdx != PIdx && (getResourceCount(ResIdx: PIdx) > getCriticalCount())) {
2951	ZoneCritResIdx = PIdx;
2952	LLVM_DEBUG(dbgs() << " *** Critical resource "
2953	<< SchedModel->getResourceName(PIdx) << ": "
2954	<< getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2955	<< "c\n");
2956	}
2957	// For reserved resources, record the highest cycle using the resource.
2958	unsigned NextAvailable, InstanceIdx;
2959	std::tie(args&: NextAvailable, args&: InstanceIdx) =
2960	getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2961	if (NextAvailable > CurrCycle) {
2962	LLVM_DEBUG(dbgs() << " Resource conflict: "
2963	<< SchedModel->getResourceName(PIdx)
2964	<< `'['` << InstanceIdx - ReservedCyclesIndex[PIdx] << `']'`
2965	<< " reserved until @" << NextAvailable << "\n");
2966	}
2967	return NextAvailable;
2968	}
2969
2970	/// Move the boundary of scheduled code by one SUnit.
2971	void SchedBoundary::bumpNode(SUnit *SU) {
2972	// Update the reservation table.
2973	if (HazardRec->isEnabled()) {
2974	if (!isTop() && SU->isCall) {
2975	// Calls are scheduled with their preceding instructions. For bottom-up
2976	// scheduling, clear the pipeline state before emitting.
2977	HazardRec->Reset();
2978	}
2979	HazardRec->EmitInstruction(SU);
2980	// Scheduling an instruction may have made pending instructions available.
2981	CheckPending = true;
2982	}
2983	// checkHazard should prevent scheduling multiple instructions per cycle that
2984	// exceed the issue width.
2985	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2986	unsigned IncMOps = SchedModel->getNumMicroOps(MI: SU->getInstr());
2987	assert(
2988	(CurrMOps == `0` \|\| (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2989	"Cannot schedule this instruction's MicroOps in the current cycle.");
2990
2991	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2992	LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2993
2994	unsigned NextCycle = CurrCycle;
2995	switch (SchedModel->getMicroOpBufferSize()) {
2996	case `0`:
2997	assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2998	break;
2999	case `1`:
3000	if (ReadyCycle > NextCycle) {
3001	NextCycle = ReadyCycle;
3002	LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
3003	}
3004	break;
3005	default:
3006	// We don't currently model the OOO reorder buffer, so consider all
3007	// scheduled MOps to be "retired". We do loosely model in-order resource
3008	// latency. If this instruction uses an in-order resource, account for any
3009	// likely stall cycles.
3010	if (SU->isUnbuffered && ReadyCycle > NextCycle)
3011	NextCycle = ReadyCycle;
3012	break;
3013	}
3014	RetiredMOps += IncMOps;
3015
3016	// Update resource counts and critical resource.
3017	if (SchedModel->hasInstrSchedModel()) {
3018	unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
3019	assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
3020	Rem->RemIssueCount -= DecRemIssue;
3021	if (ZoneCritResIdx) {
3022	// Scale scheduled micro-ops for comparing with the critical resource.
3023	unsigned ScaledMOps =
3024	RetiredMOps * SchedModel->getMicroOpFactor();
3025
3026	// If scaled micro-ops are now more than the previous critical resource by
3027	// a full cycle, then micro-ops issue becomes critical.
3028	if ((int)(ScaledMOps - getResourceCount(ResIdx: ZoneCritResIdx))
3029	>= (int)SchedModel->getLatencyFactor()) {
3030	ZoneCritResIdx = `0`;
3031	LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
3032	<< ScaledMOps / SchedModel->getLatencyFactor()
3033	<< "c\n");
3034	}
3035	}
3036	for (TargetSchedModel::ProcResIter
3037	PI = SchedModel->getWriteProcResBegin(SC),
3038	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3039	unsigned RCycle =
3040	countResource(SC, PIdx: PI->ProcResourceIdx, ReleaseAtCycle: PI->ReleaseAtCycle, NextCycle,
3041	AcquireAtCycle: PI->AcquireAtCycle);
3042	if (RCycle > NextCycle)
3043	NextCycle = RCycle;
3044	}
3045	if (SU->hasReservedResource) {
3046	// For reserved resources, record the highest cycle using the resource.
3047	// For top-down scheduling, this is the cycle in which we schedule this
3048	// instruction plus the number of cycles the operations reserves the
3049	// resource. For bottom-up is it simply the instruction's cycle.
3050	for (TargetSchedModel::ProcResIter
3051	PI = SchedModel->getWriteProcResBegin(SC),
3052	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3053	unsigned PIdx = PI->ProcResourceIdx;
3054	if (SchedModel->getProcResource(PIdx)->BufferSize == `0`) {
3055
3056	if (SchedModel && SchedModel->enableIntervals()) {
3057	unsigned ReservedUntil, InstanceIdx;
3058	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
3059	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
3060	if (isTop()) {
3061	ReservedResourceSegments [InstanceIdx].add(
3062	A: ResourceSegments::getResourceIntervalTop(
3063	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
3064	CutOff: MIResourceCutOff);
3065	} else {
3066	ReservedResourceSegments [InstanceIdx].add(
3067	A: ResourceSegments::getResourceIntervalBottom(
3068	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
3069	CutOff: MIResourceCutOff);
3070	}
3071	} else {
3072
3073	unsigned ReservedUntil, InstanceIdx;
3074	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
3075	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
3076	if (isTop()) {
3077	ReservedCycles [InstanceIdx] =
3078	std::max(a: ReservedUntil, b: NextCycle + PI->ReleaseAtCycle);
3079	} else
3080	ReservedCycles [InstanceIdx] = NextCycle;
3081	}
3082	}
3083	}
3084	}
3085	}
3086	// Update ExpectedLatency and DependentLatency.
3087	unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
3088	unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
3089	if (SU->getDepth() > TopLatency) {
3090	TopLatency = SU->getDepth();
3091	LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
3092	<< SU->NodeNum << ") " << TopLatency << "c\n");
3093	}
3094	if (SU->getHeight() > BotLatency) {
3095	BotLatency = SU->getHeight();
3096	LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
3097	<< SU->NodeNum << ") " << BotLatency << "c\n");
3098	}
3099	// If we stall for any reason, bump the cycle.
3100	if (NextCycle > CurrCycle)
3101	bumpCycle(NextCycle);
3102	else
3103	// After updating ZoneCritResIdx and ExpectedLatency, check if we're
3104	// resource limited. If a stall occurred, bumpCycle does this.
3105	IsResourceLimited =
3106	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
3107	Latency: getScheduledLatency(), AfterSchedNode: true);
3108
3109	// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
3110	// resets CurrMOps. Loop to handle instructions with more MOps than issue in
3111	// one cycle. Since we commonly reach the max MOps here, opportunistically
3112	// bump the cycle to avoid uselessly checking everything in the readyQ.
3113	CurrMOps += IncMOps;
3114
3115	// Bump the cycle count for issue group constraints.
3116	// This must be done after NextCycle has been adjust for all other stalls.
3117	// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
3118	// currCycle to X.
3119	if ((isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())) \|\|
3120	(!isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr()))) {
3121	LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
3122	<< " group\n");
3123	bumpCycle(NextCycle: ++NextCycle);
3124	}
3125
3126	while (CurrMOps >= SchedModel->getIssueWidth()) {
3127	LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
3128	<< CurrCycle << `'\n'`);
3129	bumpCycle(NextCycle: ++NextCycle);
3130	}
3131	LLVM_DEBUG(dumpScheduledState());
3132	}
3133
3134	/// Release pending ready nodes in to the available queue. This makes them
3135	/// visible to heuristics.
3136	void SchedBoundary::releasePending() {
3137	// If the available queue is empty, it is safe to reset MinReadyCycle.
3138	if (Available.empty())
3139	MinReadyCycle = std::numeric_limits<unsigned>::max();
3140
3141	// Check to see if any of the pending instructions are ready to issue. If
3142	// so, add them to the available queue.
3143	for (unsigned I = `0`, E = Pending.size(); I < E; ++I) {
3144	SUnit SU = (Pending.begin() + I);
3145	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3146
3147	LLVM_DEBUG(dbgs() << "Checking pending node SU(" << SU->NodeNum << ")\n");
3148
3149	if (ReadyCycle < MinReadyCycle)
3150	MinReadyCycle = ReadyCycle;
3151
3152	if (Available.size() >= ReadyListLimit)
3153	break;
3154
3155	releaseNode(SU, ReadyCycle, InPQueue: true, Idx: I);
3156	if (E != Pending.size()) {
3157	--I;
3158	--E;
3159	}
3160	}
3161	CheckPending = false;
3162	}
3163
3164	/// Remove SU from the ready set for this boundary.
3165	void SchedBoundary::removeReady(SUnit *SU) {
3166	if (Available.isInQueue(SU))
3167	Available.remove(I: Available.find(SU));
3168	else {
3169	assert(Pending.isInQueue(SU) && "bad ready count");
3170	Pending.remove(I: Pending.find(SU));
3171	}
3172	}
3173
3174	/// If this queue only has one ready candidate, return it. As a side effect,
3175	/// defer any nodes that now hit a hazard, and advance the cycle until at least
3176	/// one node is ready. If multiple instructions are ready, return NULL.
3177	SUnit *SchedBoundary::pickOnlyChoice() {
3178	if (CheckPending)
3179	releasePending();
3180
3181	// Defer any ready instrs that now have a hazard.
3182	for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
3183	if (checkHazard(SU: *I)) {
3184	Pending.push(SU: *I);
3185	I = Available.remove(I);
3186	continue;
3187	}
3188	++I;
3189	}
3190	for (unsigned i = `0`; Available.empty(); ++i) {
3191	// FIXME: Re-enable assert once PR20057 is resolved.
3192	// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3193	// "permanent hazard");
3194	(void)i;
3195	bumpCycle(NextCycle: CurrCycle + `1`);
3196	releasePending();
3197	}
3198
3199	LLVM_DEBUG(Pending.dump());
3200	LLVM_DEBUG(Available.dump());
3201
3202	if (Available.size() == `1`)
3203	return *Available.begin();
3204	return nullptr;
3205	}
3206
3207	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3208
3209	/// Dump the content of the \ref ReservedCycles vector for the
3210	/// resources that are used in the basic block.
3211	///
3212	LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
3213	if (!SchedModel->hasInstrSchedModel())
3214	return;
3215
3216	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3217	unsigned StartIdx = `0`;
3218
3219	for (unsigned ResIdx = `0`; ResIdx < ResourceCount; ++ResIdx) {
3220	const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3221	std::string ResName = SchedModel->getResourceName(ResIdx);
3222	for (unsigned UnitIdx = `0`; UnitIdx < NumUnits; ++UnitIdx) {
3223	dbgs() << ResName << "(" << UnitIdx << ") = ";
3224	if (SchedModel && SchedModel->enableIntervals()) {
3225	if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3226	dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3227	else
3228	dbgs() << "{ }\n";
3229	} else
3230	dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3231	}
3232	StartIdx += NumUnits;
3233	}
3234	}
3235
3236	// This is useful information to dump after bumpNode.
3237	// Note that the Queue contents are more useful before pickNodeFromQueue.
3238	LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
3239	unsigned ResFactor;
3240	unsigned ResCount;
3241	if (ZoneCritResIdx) {
3242	ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3243	ResCount = getResourceCount(ZoneCritResIdx);
3244	} else {
3245	ResFactor = SchedModel->getMicroOpFactor();
3246	ResCount = RetiredMOps * ResFactor;
3247	}
3248	unsigned LFactor = SchedModel->getLatencyFactor();
3249	dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3250	<< " Retired: " << RetiredMOps;
3251	dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3252	dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3253	<< ResCount / ResFactor << " "
3254	<< SchedModel->getResourceName(ZoneCritResIdx)
3255	<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3256	<< (IsResourceLimited ? " - Resource" : " - Latency")
3257	<< " limited.\n";
3258	if (MISchedDumpReservedCycles)
3259	dumpReservedCycles();
3260	}
3261	#endif
3262
3263	//===----------------------------------------------------------------------===//
3264	// GenericScheduler - Generic implementation of MachineSchedStrategy.
3265	//===----------------------------------------------------------------------===//
3266
3267	void GenericSchedulerBase::SchedCandidate::
3268	initResourceDelta(const ScheduleDAGMI *DAG,
3269	const TargetSchedModel *SchedModel) {
3270	if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
3271	return;
3272
3273	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3274	for (TargetSchedModel::ProcResIter
3275	PI = SchedModel->getWriteProcResBegin(SC),
3276	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3277	if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3278	ResDelta.CritResources += PI->ReleaseAtCycle;
3279	if (PI->ProcResourceIdx == Policy.DemandResIdx)
3280	ResDelta.DemandedResources += PI->ReleaseAtCycle;
3281	}
3282	}
3283
3284	/// Compute remaining latency. We need this both to determine whether the
3285	/// overall schedule has become latency-limited and whether the instructions
3286	/// outside this zone are resource or latency limited.
3287	///
3288	/// The "dependent" latency is updated incrementally during scheduling as the
3289	/// max height/depth of scheduled nodes minus the cycles since it was
3290	/// scheduled:
3291	/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3292	///
3293	/// The "independent" latency is the max ready queue depth:
3294	/// ILat = max N.depth for N in Available\|Pending
3295	///
3296	/// RemainingLatency is the greater of independent and dependent latency.
3297	///
3298	/// These computations are expensive, especially in DAGs with many edges, so
3299	/// only do them if necessary.
3300	static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3301	unsigned RemLatency = CurrZone.getDependentLatency();
3302	RemLatency = std::max(a: RemLatency,
3303	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Available.elements()));
3304	RemLatency = std::max(a: RemLatency,
3305	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Pending.elements()));
3306	return RemLatency;
3307	}
3308
3309	/// Returns true if the current cycle plus remaning latency is greater than
3310	/// the critical path in the scheduling region.
3311	bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3312	SchedBoundary &CurrZone,
3313	bool ComputeRemLatency,
3314	unsigned &RemLatency) const {
3315	// The current cycle is already greater than the critical path, so we are
3316	// already latency limited and don't need to compute the remaining latency.
3317	if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3318	return true;
3319
3320	// If we haven't scheduled anything yet, then we aren't latency limited.
3321	if (CurrZone.getCurrCycle() == `0`)
3322	return false;
3323
3324	if (ComputeRemLatency)
3325	RemLatency = computeRemLatency(CurrZone);
3326
3327	return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3328	}
3329
3330	/// Set the CandPolicy given a scheduling zone given the current resources and
3331	/// latencies inside and outside the zone.
3332	void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
3333	SchedBoundary &CurrZone,
3334	SchedBoundary *OtherZone) {
3335	// Apply preemptive heuristics based on the total latency and resources
3336	// inside and outside this zone. Potential stalls should be considered before
3337	// following this policy.
3338
3339	// Compute the critical resource outside the zone.
3340	unsigned OtherCritIdx = `0`;
3341	unsigned OtherCount =
3342	OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : `0`;
3343
3344	bool OtherResLimited = false;
3345	unsigned RemLatency = `0`;
3346	bool RemLatencyComputed = false;
3347	if (SchedModel->hasInstrSchedModel() && OtherCount != `0`) {
3348	RemLatency = computeRemLatency(CurrZone);
3349	RemLatencyComputed = true;
3350	OtherResLimited = checkResourceLimit(LFactor: SchedModel->getLatencyFactor(),
3351	Count: OtherCount, Latency: RemLatency, AfterSchedNode: false);
3352	}
3353
3354	// Schedule aggressively for latency in PostRA mode. We don't check for
3355	// acyclic latency during PostRA, and highly out-of-order processors will
3356	// skip PostRA scheduling.
3357	if (!OtherResLimited &&
3358	(IsPostRA \|\| shouldReduceLatency(Policy, CurrZone, ComputeRemLatency: !RemLatencyComputed,
3359	RemLatency))) {
3360	Policy.ReduceLatency \|= true;
3361	LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3362	<< " RemainingLatency " << RemLatency << " + "
3363	<< CurrZone.getCurrCycle() << "c > CritPath "
3364	<< Rem.CriticalPath << "\n");
3365	}
3366	// If the same resource is limiting inside and outside the zone, do nothing.
3367	if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3368	return;
3369
3370	LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3371	dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3372	<< SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3373	} if (OtherResLimited) dbgs()
3374	<< " RemainingLimit: "
3375	<< SchedModel->getResourceName(OtherCritIdx) << "\n";
3376	if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3377	<< " Latency limited both directions.\n");
3378
3379	if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3380	Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3381
3382	if (OtherResLimited)
3383	Policy.DemandResIdx = OtherCritIdx;
3384	}
3385
3386	#ifndef NDEBUG
3387	const char *GenericSchedulerBase::getReasonStr(
3388	GenericSchedulerBase::CandReason Reason) {
3389	// clang-format off
3390	switch (Reason) {
3391	case NoCand: return "NOCAND ";
3392	case Only1: return "ONLY1 ";
3393	case PhysReg: return "PHYS-REG ";
3394	case RegExcess: return "REG-EXCESS";
3395	case RegCritical: return "REG-CRIT ";
3396	case Stall: return "STALL ";
3397	case Cluster: return "CLUSTER ";
3398	case Weak: return "WEAK ";
3399	case RegMax: return "REG-MAX ";
3400	case ResourceReduce: return "RES-REDUCE";
3401	case ResourceDemand: return "RES-DEMAND";
3402	case TopDepthReduce: return "TOP-DEPTH ";
3403	case TopPathReduce: return "TOP-PATH ";
3404	case BotHeightReduce:return "BOT-HEIGHT";
3405	case BotPathReduce: return "BOT-PATH ";
3406	case NodeOrder: return "ORDER ";
3407	case FirstValid: return "FIRST ";
3408	};
3409	// clang-format on
3410	llvm_unreachable("Unknown reason!");
3411	}
3412
3413	void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3414	PressureChange P;
3415	unsigned ResIdx = `0`;
3416	unsigned Latency = `0`;
3417	switch (Cand.Reason) {
3418	default:
3419	break;
3420	case RegExcess:
3421	P = Cand.RPDelta.Excess;
3422	break;
3423	case RegCritical:
3424	P = Cand.RPDelta.CriticalMax;
3425	break;
3426	case RegMax:
3427	P = Cand.RPDelta.CurrentMax;
3428	break;
3429	case ResourceReduce:
3430	ResIdx = Cand.Policy.ReduceResIdx;
3431	break;
3432	case ResourceDemand:
3433	ResIdx = Cand.Policy.DemandResIdx;
3434	break;
3435	case TopDepthReduce:
3436	Latency = Cand.SU->getDepth();
3437	break;
3438	case TopPathReduce:
3439	Latency = Cand.SU->getHeight();
3440	break;
3441	case BotHeightReduce:
3442	Latency = Cand.SU->getHeight();
3443	break;
3444	case BotPathReduce:
3445	Latency = Cand.SU->getDepth();
3446	break;
3447	}
3448	dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3449	if (P.isValid())
3450	dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3451	<< ":" << P.getUnitInc() << " ";
3452	else
3453	dbgs() << " ";
3454	if (ResIdx)
3455	dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3456	else
3457	dbgs() << " ";
3458	if (Latency)
3459	dbgs() << " " << Latency << " cycles ";
3460	else
3461	dbgs() << " ";
3462	dbgs() << `'\n'`;
3463	}
3464	#endif
3465
3466	namespace llvm {
3467	/// Return true if this heuristic determines order.
3468	/// TODO: Consider refactor return type of these functions as integer or enum,
3469	/// as we may need to differentiate whether TryCand is better than Cand.
3470	bool tryLess(int TryVal, int CandVal,
3471	GenericSchedulerBase::SchedCandidate &TryCand,
3472	GenericSchedulerBase::SchedCandidate &Cand,
3473	GenericSchedulerBase::CandReason Reason) {
3474	if (TryVal < CandVal) {
3475	TryCand.Reason = Reason;
3476	return true;
3477	}
3478	if (TryVal > CandVal) {
3479	if (Cand.Reason > Reason)
3480	Cand.Reason = Reason;
3481	return true;
3482	}
3483	return false;
3484	}
3485
3486	bool tryGreater(int TryVal, int CandVal,
3487	GenericSchedulerBase::SchedCandidate &TryCand,
3488	GenericSchedulerBase::SchedCandidate &Cand,
3489	GenericSchedulerBase::CandReason Reason) {
3490	if (TryVal > CandVal) {
3491	TryCand.Reason = Reason;
3492	return true;
3493	}
3494	if (TryVal < CandVal) {
3495	if (Cand.Reason > Reason)
3496	Cand.Reason = Reason;
3497	return true;
3498	}
3499	return false;
3500	}
3501
3502	bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3503	GenericSchedulerBase::SchedCandidate &Cand,
3504	SchedBoundary &Zone) {
3505	if (Zone.isTop()) {
3506	// Prefer the candidate with the lesser depth, but only if one of them has
3507	// depth greater than the total latency scheduled so far, otherwise either
3508	// of them could be scheduled now with no stall.
3509	if (std::max(a: TryCand.SU->getDepth(), b: Cand.SU->getDepth()) >
3510	Zone.getScheduledLatency()) {
3511	if (tryLess(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3512	TryCand, Cand, Reason: GenericSchedulerBase::TopDepthReduce))
3513	return true;
3514	}
3515	if (tryGreater(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3516	TryCand, Cand, Reason: GenericSchedulerBase::TopPathReduce))
3517	return true;
3518	} else {
3519	// Prefer the candidate with the lesser height, but only if one of them has
3520	// height greater than the total latency scheduled so far, otherwise either
3521	// of them could be scheduled now with no stall.
3522	if (std::max(a: TryCand.SU->getHeight(), b: Cand.SU->getHeight()) >
3523	Zone.getScheduledLatency()) {
3524	if (tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3525	TryCand, Cand, Reason: GenericSchedulerBase::BotHeightReduce))
3526	return true;
3527	}
3528	if (tryGreater(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3529	TryCand, Cand, Reason: GenericSchedulerBase::BotPathReduce))
3530	return true;
3531	}
3532	return false;
3533	}
3534	} // end namespace llvm
3535
3536	static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
3537	bool IsPostRA = false) {
3538	LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3539	<< GenericSchedulerBase::getReasonStr(Reason) << " ["
3540	<< (IsPostRA ? "post-RA" : "pre-RA") << "]\n");
3541
3542	if (IsPostRA) {
3543	if (IsTop)
3544	NumTopPostRA ++;
3545	else
3546	NumBotPostRA ++;
3547
3548	switch (Reason) {
3549	case GenericScheduler::NoCand:
3550	NumNoCandPostRA ++;
3551	return;
3552	case GenericScheduler::Only1:
3553	NumOnly1PostRA ++;
3554	return;
3555	case GenericScheduler::PhysReg:
3556	NumPhysRegPostRA ++;
3557	return;
3558	case GenericScheduler::RegExcess:
3559	NumRegExcessPostRA ++;
3560	return;
3561	case GenericScheduler::RegCritical:
3562	NumRegCriticalPostRA ++;
3563	return;
3564	case GenericScheduler::Stall:
3565	NumStallPostRA ++;
3566	return;
3567	case GenericScheduler::Cluster:
3568	NumClusterPostRA ++;
3569	return;
3570	case GenericScheduler::Weak:
3571	NumWeakPostRA ++;
3572	return;
3573	case GenericScheduler::RegMax:
3574	NumRegMaxPostRA ++;
3575	return;
3576	case GenericScheduler::ResourceReduce:
3577	NumResourceReducePostRA ++;
3578	return;
3579	case GenericScheduler::ResourceDemand:
3580	NumResourceDemandPostRA ++;
3581	return;
3582	case GenericScheduler::TopDepthReduce:
3583	NumTopDepthReducePostRA ++;
3584	return;
3585	case GenericScheduler::TopPathReduce:
3586	NumTopPathReducePostRA ++;
3587	return;
3588	case GenericScheduler::BotHeightReduce:
3589	NumBotHeightReducePostRA ++;
3590	return;
3591	case GenericScheduler::BotPathReduce:
3592	NumBotPathReducePostRA ++;
3593	return;
3594	case GenericScheduler::NodeOrder:
3595	NumNodeOrderPostRA ++;
3596	return;
3597	case GenericScheduler::FirstValid:
3598	NumFirstValidPostRA ++;
3599	return;
3600	};
3601	} else {
3602	if (IsTop)
3603	NumTopPreRA ++;
3604	else
3605	NumBotPreRA ++;
3606
3607	switch (Reason) {
3608	case GenericScheduler::NoCand:
3609	NumNoCandPreRA ++;
3610	return;
3611	case GenericScheduler::Only1:
3612	NumOnly1PreRA ++;
3613	return;
3614	case GenericScheduler::PhysReg:
3615	NumPhysRegPreRA ++;
3616	return;
3617	case GenericScheduler::RegExcess:
3618	NumRegExcessPreRA ++;
3619	return;
3620	case GenericScheduler::RegCritical:
3621	NumRegCriticalPreRA ++;
3622	return;
3623	case GenericScheduler::Stall:
3624	NumStallPreRA ++;
3625	return;
3626	case GenericScheduler::Cluster:
3627	NumClusterPreRA ++;
3628	return;
3629	case GenericScheduler::Weak:
3630	NumWeakPreRA ++;
3631	return;
3632	case GenericScheduler::RegMax:
3633	NumRegMaxPreRA ++;
3634	return;
3635	case GenericScheduler::ResourceReduce:
3636	NumResourceReducePreRA ++;
3637	return;
3638	case GenericScheduler::ResourceDemand:
3639	NumResourceDemandPreRA ++;
3640	return;
3641	case GenericScheduler::TopDepthReduce:
3642	NumTopDepthReducePreRA ++;
3643	return;
3644	case GenericScheduler::TopPathReduce:
3645	NumTopPathReducePreRA ++;
3646	return;
3647	case GenericScheduler::BotHeightReduce:
3648	NumBotHeightReducePreRA ++;
3649	return;
3650	case GenericScheduler::BotPathReduce:
3651	NumBotPathReducePreRA ++;
3652	return;
3653	case GenericScheduler::NodeOrder:
3654	NumNodeOrderPreRA ++;
3655	return;
3656	case GenericScheduler::FirstValid:
3657	NumFirstValidPreRA ++;
3658	return;
3659	};
3660	}
3661	llvm_unreachable("Unknown reason!");
3662	}
3663
3664	static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
3665	bool IsPostRA = false) {
3666	tracePick(Reason: Cand.Reason, IsTop: Cand.AtTop, IsPostRA);
3667	}
3668
3669	void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3670	assert(dag->hasVRegLiveness() &&
3671	"(PreRA)GenericScheduler needs vreg liveness");
3672	DAG = static_cast<ScheduleDAGMILive*>(dag);
3673	SchedModel = DAG->getSchedModel();
3674	TRI = DAG->TRI;
3675
3676	if (RegionPolicy.ComputeDFSResult)
3677	DAG->computeDFSResult();
3678
3679	Rem.init(DAG, SchedModel);
3680	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3681	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3682
3683	// Initialize resource counts.
3684
3685	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3686	// are disabled, then these HazardRecs will be disabled.
3687	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3688	if (!Top.HazardRec) {
3689	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3690	}
3691	if (!Bot.HazardRec) {
3692	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3693	}
3694	TopCand.SU = nullptr;
3695	BotCand.SU = nullptr;
3696
3697	TopCluster = nullptr;
3698	BotCluster = nullptr;
3699	}
3700
3701	/// Initialize the per-region scheduling policy.
3702	void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3703	MachineBasicBlock::iterator End,
3704	unsigned NumRegionInstrs) {
3705	const MachineFunction &MF = *Begin ->getMF();
3706	const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3707
3708	// Avoid setting up the register pressure tracker for small regions to save
3709	// compile time. As a rough heuristic, only track pressure when the number of
3710	// schedulable instructions exceeds half the allocatable integer register file
3711	// that is the largest legal integer regiser type.
3712	RegionPolicy.ShouldTrackPressure = true;
3713	for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3714	MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3715	if (TLI->isTypeLegal(VT: LegalIntVT)) {
3716	unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3717	RC: TLI->getRegClassFor(VT: LegalIntVT));
3718	RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / `2`);
3719	break;
3720	}
3721	}
3722
3723	// For generic targets, we default to bottom-up, because it's simpler and more
3724	// compile-time optimizations have been implemented in that direction.
3725	RegionPolicy.OnlyBottomUp = true;
3726
3727	// Allow the subtarget to override default policy.
3728	MF.getSubtarget().overrideSchedPolicy(Policy&: RegionPolicy, NumRegionInstrs);
3729
3730	// After subtarget overrides, apply command line options.
3731	if (!EnableRegPressure) {
3732	RegionPolicy.ShouldTrackPressure = false;
3733	RegionPolicy.ShouldTrackLaneMasks = false;
3734	}
3735
3736	if (PreRADirection == MISched::TopDown) {
3737	RegionPolicy.OnlyTopDown = true;
3738	RegionPolicy.OnlyBottomUp = false;
3739	} else if (PreRADirection == MISched::BottomUp) {
3740	RegionPolicy.OnlyTopDown = false;
3741	RegionPolicy.OnlyBottomUp = true;
3742	} else if (PreRADirection == MISched::Bidirectional) {
3743	RegionPolicy.OnlyBottomUp = false;
3744	RegionPolicy.OnlyTopDown = false;
3745	}
3746
3747	BotIdx = NumRegionInstrs - `1`;
3748	this->NumRegionInstrs = NumRegionInstrs;
3749	}
3750
3751	void GenericScheduler::dumpPolicy() const {
3752	// Cannot completely remove virtual function even in release mode.
3753	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3754	dbgs() << "GenericScheduler RegionPolicy: "
3755	<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3756	<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3757	<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3758	<< "\n";
3759	#endif
3760	}
3761
3762	/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3763	/// critical path by more cycles than it takes to drain the instruction buffer.
3764	/// We estimate an upper bounds on in-flight instructions as:
3765	///
3766	/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3767	/// InFlightIterations = AcyclicPath / CyclesPerIteration
3768	/// InFlightResources = InFlightIterations LoopResources*
3769	///
3770	/// TODO: Check execution resources in addition to IssueCount.
3771	void GenericScheduler::checkAcyclicLatency() {
3772	if (Rem.CyclicCritPath == `0` \|\| Rem.CyclicCritPath >= Rem.CriticalPath)
3773	return;
3774
3775	// Scaled number of cycles per loop iteration.
3776	unsigned IterCount =
3777	std::max(a: Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3778	b: Rem.RemIssueCount);
3779	// Scaled acyclic critical path.
3780	unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3781	// InFlightCount = (AcyclicPath / IterCycles) InstrPerLoop*
3782	unsigned InFlightCount =
3783	(AcyclicCount * Rem.RemIssueCount + IterCount-`1`) / IterCount;
3784	unsigned BufferLimit =
3785	SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3786
3787	Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3788
3789	LLVM_DEBUG(
3790	dbgs() << "IssueCycles="
3791	<< Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3792	<< "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3793	<< "c NumIters=" << (AcyclicCount + IterCount - `1`) / IterCount
3794	<< " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3795	<< "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3796	if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3797	}
3798
3799	void GenericScheduler::registerRoots() {
3800	Rem.CriticalPath = DAG->ExitSU.getDepth();
3801
3802	// Some roots may not feed into ExitSU. Check all of them in case.
3803	for (const SUnit *SU : Bot.Available) {
3804	if (SU->getDepth() > Rem.CriticalPath)
3805	Rem.CriticalPath = SU->getDepth();
3806	}
3807	LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << `'\n'`);
3808	if (DumpCriticalPathLength) {
3809	errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3810	}
3811
3812	if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > `0`) {
3813	Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3814	checkAcyclicLatency();
3815	}
3816	}
3817
3818	namespace llvm {
3819	bool tryPressure(const PressureChange &TryP,
3820	const PressureChange &CandP,
3821	GenericSchedulerBase::SchedCandidate &TryCand,
3822	GenericSchedulerBase::SchedCandidate &Cand,
3823	GenericSchedulerBase::CandReason Reason,
3824	const TargetRegisterInfo *TRI,
3825	const MachineFunction &MF) {
3826	// If one candidate decreases and the other increases, go with it.
3827	// Invalid candidates have UnitInc==0.
3828	if (tryGreater(TryVal: TryP.getUnitInc() < `0`, CandVal: CandP.getUnitInc() < `0`, TryCand, Cand,
3829	Reason)) {
3830	return true;
3831	}
3832	// Do not compare the magnitude of pressure changes between top and bottom
3833	// boundary.
3834	if (Cand.AtTop != TryCand.AtTop)
3835	return false;
3836
3837	// If both candidates affect the same set in the same boundary, go with the
3838	// smallest increase.
3839	unsigned TryPSet = TryP.getPSetOrMax();
3840	unsigned CandPSet = CandP.getPSetOrMax();
3841	if (TryPSet == CandPSet) {
3842	return tryLess(TryVal: TryP.getUnitInc(), CandVal: CandP.getUnitInc(), TryCand, Cand,
3843	Reason);
3844	}
3845
3846	int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: TryPSet) :
3847	std::numeric_limits<int>::max();
3848
3849	int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: CandPSet) :
3850	std::numeric_limits<int>::max();
3851
3852	// If the candidates are decreasing pressure, reverse priority.
3853	if (TryP.getUnitInc() < `0`)
3854	std::swap(a&: TryRank, b&: CandRank);
3855	return tryGreater(TryVal: TryRank, CandVal: CandRank, TryCand, Cand, Reason);
3856	}
3857
3858	unsigned getWeakLeft(const SUnit SU, bool* isTop) {
3859	return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3860	}
3861
3862	/// Minimize physical register live ranges. Regalloc wants them adjacent to
3863	/// their physreg def/use.
3864	///
3865	/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3866	/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3867	/// with the operation that produces or consumes the physreg. We'll do this when
3868	/// regalloc has support for parallel copies.
3869	int biasPhysReg(const SUnit SU, bool* isTop) {
3870	const MachineInstr *MI = SU->getInstr();
3871
3872	if (MI->isCopy()) {
3873	unsigned ScheduledOper = isTop ? `1` : `0`;
3874	unsigned UnscheduledOper = isTop ? `0` : `1`;
3875	// If we have already scheduled the physreg produce/consumer, immediately
3876	// schedule the copy.
3877	if (MI->getOperand(i: ScheduledOper).getReg().isPhysical())
3878	return `1`;
3879	// If the physreg is at the boundary, defer it. Otherwise schedule it
3880	// immediately to free the dependent. We can hoist the copy later.
3881	bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3882	if (MI->getOperand(i: UnscheduledOper).getReg().isPhysical())
3883	return AtBoundary ? -`1` : `1`;
3884	}
3885
3886	if (MI->isMoveImmediate()) {
3887	// If we have a move immediate and all successors have been assigned, bias
3888	// towards scheduling this later. Make sure all register defs are to
3889	// physical registers.
3890	bool DoBias = true;
3891	for (const MachineOperand &Op : MI->defs()) {
3892	if (Op.isReg() && !Op.getReg().isPhysical()) {
3893	DoBias = false;
3894	break;
3895	}
3896	}
3897
3898	if (DoBias)
3899	return isTop ? -`1` : `1`;
3900	}
3901
3902	return `0`;
3903	}
3904	} // end namespace llvm
3905
3906	void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3907	bool AtTop,
3908	const RegPressureTracker &RPTracker,
3909	RegPressureTracker &TempTracker) {
3910	Cand.SU = SU;
3911	Cand.AtTop = AtTop;
3912	if (DAG->isTrackingPressure()) {
3913	if (AtTop) {
3914	TempTracker.getMaxDownwardPressureDelta(
3915	MI: Cand.SU->getInstr(),
3916	Delta&: Cand.RPDelta,
3917	CriticalPSets: DAG->getRegionCriticalPSets(),
3918	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3919	} else {
3920	if (VerifyScheduling) {
3921	TempTracker.getMaxUpwardPressureDelta(
3922	MI: Cand.SU->getInstr(),
3923	PDiff: &DAG->getPressureDiff(SU: Cand.SU),
3924	Delta&: Cand.RPDelta,
3925	CriticalPSets: DAG->getRegionCriticalPSets(),
3926	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3927	} else {
3928	RPTracker.getUpwardPressureDelta(
3929	MI: Cand.SU->getInstr(),
3930	PDiff&: DAG->getPressureDiff(SU: Cand.SU),
3931	Delta&: Cand.RPDelta,
3932	CriticalPSets: DAG->getRegionCriticalPSets(),
3933	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3934	}
3935	}
3936	}
3937	LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3938	<< " Try SU(" << Cand.SU->NodeNum << ") "
3939	<< TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3940	<< Cand.RPDelta.Excess.getUnitInc() << "\n");
3941	}
3942
3943	/// Apply a set of heuristics to a new candidate. Heuristics are currently
3944	/// hierarchical. This may be more efficient than a graduated cost model because
3945	/// we don't need to evaluate all aspects of the model for each node in the
3946	/// queue. But it's really done to make the heuristics easier to debug and
3947	/// statistically analyze.
3948	///
3949	/// \param Cand provides the policy and current best candidate.
3950	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3951	/// \param Zone describes the scheduled zone that we are extending, or nullptr
3952	/// if Cand is from a different zone than TryCand.
3953	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3954	bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3955	SchedCandidate &TryCand,
3956	SchedBoundary Zone) const* {
3957	// Initialize the candidate if needed.
3958	if (!Cand.isValid()) {
3959	TryCand.Reason = FirstValid;
3960	return true;
3961	}
3962
3963	// Bias PhysReg Defs and copies to their uses and defined respectively.
3964	if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
3965	CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
3966	return TryCand.Reason != NoCand;
3967
3968	// Avoid exceeding the target's limit.
3969	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.Excess,
3970	CandP: Cand.RPDelta.Excess,
3971	TryCand, Cand, Reason: RegExcess, TRI,
3972	MF: DAG->MF))
3973	return TryCand.Reason != NoCand;
3974
3975	// Avoid increasing the max critical pressure in the scheduled region.
3976	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CriticalMax,
3977	CandP: Cand.RPDelta.CriticalMax,
3978	TryCand, Cand, Reason: RegCritical, TRI,
3979	MF: DAG->MF))
3980	return TryCand.Reason != NoCand;
3981
3982	// We only compare a subset of features when comparing nodes between
3983	// Top and Bottom boundary. Some properties are simply incomparable, in many
3984	// other instances we should only override the other boundary if something
3985	// is a clear good pick on one boundary. Skip heuristics that are more
3986	// "tie-breaking" in nature.
3987	bool SameBoundary = Zone != nullptr;
3988	if (SameBoundary) {
3989	// For loops that are acyclic path limited, aggressively schedule for
3990	// latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3991	// heuristics to take precedence.
3992	if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3993	tryLatency(TryCand, Cand, Zone&: *Zone))
3994	return TryCand.Reason != NoCand;
3995
3996	// Prioritize instructions that read unbuffered resources by stall cycles.
3997	if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
3998	CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3999	return TryCand.Reason != NoCand;
4000	}
4001
4002	// Keep clustered nodes together to encourage downstream peephole
4003	// optimizations which may reduce resource requirements.
4004	//
4005	// This is a best effort to set things up for a post-RA pass. Optimizations
4006	// like generating loads of multiple registers should ideally be done within
4007	// the scheduler pass by combining the loads during DAG postprocessing.
4008	const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
4009	const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
4010	if (tryGreater(TryVal: TryCandCluster && TryCandCluster->contains(Ptr: TryCand.SU),
4011	CandVal: CandCluster && CandCluster->contains(Ptr: Cand.SU), TryCand, Cand,
4012	Reason: Cluster))
4013	return TryCand.Reason != NoCand;
4014
4015	if (SameBoundary) {
4016	// Weak edges are for clustering and other constraints.
4017	if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
4018	CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop),
4019	TryCand, Cand, Reason: Weak))
4020	return TryCand.Reason != NoCand;
4021	}
4022
4023	// Avoid increasing the max pressure of the entire region.
4024	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CurrentMax,
4025	CandP: Cand.RPDelta.CurrentMax,
4026	TryCand, Cand, Reason: RegMax, TRI,
4027	MF: DAG->MF))
4028	return TryCand.Reason != NoCand;
4029
4030	if (SameBoundary) {
4031	// Avoid critical resource consumption and balance the schedule.
4032	TryCand.initResourceDelta(DAG, SchedModel);
4033	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
4034	TryCand, Cand, Reason: ResourceReduce))
4035	return TryCand.Reason != NoCand;
4036	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
4037	CandVal: Cand.ResDelta.DemandedResources,
4038	TryCand, Cand, Reason: ResourceDemand))
4039	return TryCand.Reason != NoCand;
4040
4041	// Avoid serializing long latency dependence chains.
4042	// For acyclic path limited loops, latency was already checked above.
4043	if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
4044	!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone))
4045	return TryCand.Reason != NoCand;
4046
4047	// Fall through to original instruction order.
4048	if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
4049	\|\| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
4050	TryCand.Reason = NodeOrder;
4051	return true;
4052	}
4053	}
4054
4055	return false;
4056	}
4057
4058	/// Pick the best candidate from the queue.
4059	///
4060	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
4061	/// DAG building. To adjust for the current scheduling location we need to
4062	/// maintain the number of vreg uses remaining to be top-scheduled.
4063	void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
4064	const CandPolicy &ZonePolicy,
4065	const RegPressureTracker &RPTracker,
4066	SchedCandidate &Cand) {
4067	// getMaxPressureDelta temporarily modifies the tracker.
4068	RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
4069
4070	ReadyQueue &Q = Zone.Available;
4071	for (SUnit *SU : Q) {
4072
4073	SchedCandidate TryCand(ZonePolicy);
4074	initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, TempTracker);
4075	// Pass SchedBoundary only when comparing nodes from the same boundary.
4076	SchedBoundary ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr*;
4077	if (tryCandidate(Cand, TryCand, Zone: ZoneArg)) {
4078	// Initialize resource delta if needed in case future heuristics query it.
4079	if (TryCand.ResDelta == SchedResourceDelta ())
4080	TryCand.initResourceDelta(DAG, SchedModel);
4081	Cand.setBest(TryCand);
4082	LLVM_DEBUG(traceCandidate(Cand));
4083	}
4084	}
4085	}
4086
4087	/// Pick the best candidate node from either the top or bottom queue.
4088	SUnit GenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
4089	// Schedule as far as possible in the direction of no choice. This is most
4090	// efficient, but also provides the best heuristics for CriticalPSets.
4091	if (SUnit *SU = Bot.pickOnlyChoice()) {
4092	IsTopNode = false;
4093	tracePick(Reason: Only1, /IsTopNode=/IsTop: false);
4094	return SU;
4095	}
4096	if (SUnit *SU = Top.pickOnlyChoice()) {
4097	IsTopNode = true;
4098	tracePick(Reason: Only1, /IsTopNode=/IsTop: true);
4099	return SU;
4100	}
4101	// Set the bottom-up policy based on the state of the current bottom zone and
4102	// the instructions outside the zone, including the top zone.
4103	CandPolicy BotPolicy;
4104	setPolicy(Policy&: BotPolicy, /IsPostRA=/false, CurrZone&: Bot, OtherZone: &Top);
4105	// Set the top-down policy based on the state of the current top zone and
4106	// the instructions outside the zone, including the bottom zone.
4107	CandPolicy TopPolicy;
4108	setPolicy(Policy&: TopPolicy, /IsPostRA=/false, CurrZone&: Top, OtherZone: &Bot);
4109
4110	// See if BotCand is still valid (because we previously scheduled from Top).
4111	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4112	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
4113	BotCand.Policy != BotPolicy) {
4114	BotCand.reset(NewPolicy: CandPolicy ());
4115	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
4116	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4117	} else {
4118	LLVM_DEBUG(traceCandidate(BotCand));
4119	#ifndef NDEBUG
4120	if (VerifyScheduling) {
4121	SchedCandidate TCand;
4122	TCand.reset(CandPolicy());
4123	pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
4124	assert(TCand.SU == BotCand.SU &&
4125	"Last pick result should correspond to re-picking right now");
4126	}
4127	#endif
4128	}
4129
4130	// Check if the top Q has a better candidate.
4131	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4132	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
4133	TopCand.Policy != TopPolicy) {
4134	TopCand.reset(NewPolicy: CandPolicy ());
4135	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
4136	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4137	} else {
4138	LLVM_DEBUG(traceCandidate(TopCand));
4139	#ifndef NDEBUG
4140	if (VerifyScheduling) {
4141	SchedCandidate TCand;
4142	TCand.reset(CandPolicy());
4143	pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
4144	assert(TCand.SU == TopCand.SU &&
4145	"Last pick result should correspond to re-picking right now");
4146	}
4147	#endif
4148	}
4149
4150	// Pick best from BotCand and TopCand.
4151	assert(BotCand.isValid());
4152	assert(TopCand.isValid());
4153	SchedCandidate Cand = BotCand;
4154	TopCand.Reason = NoCand;
4155	if (tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr)) {
4156	Cand.setBest(TopCand);
4157	LLVM_DEBUG(traceCandidate(Cand));
4158	}
4159
4160	IsTopNode = Cand.AtTop;
4161	tracePick(Cand);
4162	return Cand.SU;
4163	}
4164
4165	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
4166	SUnit GenericScheduler::pickNode(bool* &IsTopNode) {
4167	if (DAG->top() == DAG->bottom()) {
4168	assert(Top.Available.empty() && Top.Pending.empty() &&
4169	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4170	return nullptr;
4171	}
4172	SUnit *SU;
4173	do {
4174	if (RegionPolicy.OnlyTopDown) {
4175	SU = Top.pickOnlyChoice();
4176	if (!SU) {
4177	CandPolicy NoPolicy;
4178	TopCand.reset(NewPolicy: NoPolicy);
4179	pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
4180	assert(TopCand.Reason != NoCand && "failed to find a candidate");
4181	tracePick(Cand: TopCand);
4182	SU = TopCand.SU;
4183	}
4184	IsTopNode = true;
4185	} else if (RegionPolicy.OnlyBottomUp) {
4186	SU = Bot.pickOnlyChoice();
4187	if (!SU) {
4188	CandPolicy NoPolicy;
4189	BotCand.reset(NewPolicy: NoPolicy);
4190	pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
4191	assert(BotCand.Reason != NoCand && "failed to find a candidate");
4192	tracePick(Cand: BotCand);
4193	SU = BotCand.SU;
4194	}
4195	IsTopNode = false;
4196	} else {
4197	SU = pickNodeBidirectional(IsTopNode);
4198	}
4199	} while (SU->isScheduled);
4200
4201	// If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
4202	// if isTopReady(), then SU is in either Top.Available or Top.Pending.
4203	// If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
4204	// if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
4205	//
4206	// It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
4207	// isBottomReady. That is, it didn't factor into the decision to choose SU
4208	// because it isTopReady or isBottomReady, respectively. In fact, if the
4209	// RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
4210	// queues respectivley contain the original roots and don't get updated when
4211	// picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
4212	// because we schduled everything but the top roots. Conversley, if SU
4213	// isBottomReady on OnlyTopDown, then it was because we scheduled everything
4214	// but the bottom roots. If its in a queue even coincidentally, it should be
4215	// removed so it does not get re-picked in a subsequent pickNode call.
4216	if (SU->isTopReady())
4217	Top.removeReady(SU);
4218	if (SU->isBottomReady())
4219	Bot.removeReady(SU);
4220
4221	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4222	<< *SU->getInstr());
4223
4224	if (IsTopNode) {
4225	if (SU->NodeNum == TopIdx++)
4226	++NumInstrsInSourceOrderPreRA;
4227	} else {
4228	assert(BotIdx < NumRegionInstrs && "out of bounds");
4229	if (SU->NodeNum == BotIdx--)
4230	++NumInstrsInSourceOrderPreRA;
4231	}
4232
4233	NumInstrsScheduledPreRA += `1`;
4234
4235	return SU;
4236	}
4237
4238	void GenericScheduler::reschedulePhysReg(SUnit SU, bool* isTop) {
4239	MachineBasicBlock::iterator InsertPos = SU->getInstr();
4240	if (!isTop)
4241	++InsertPos;
4242	SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
4243
4244	// Find already scheduled copies with a single physreg dependence and move
4245	// them just above the scheduled instruction.
4246	for (SDep &Dep : Deps) {
4247	if (Dep.getKind() != SDep::Data \|\| !Dep.getReg().isPhysical())
4248	continue;
4249	SUnit *DepSU = Dep.getSUnit();
4250	if (isTop ? DepSU->Succs.size() > `1` : DepSU->Preds.size() > `1`)
4251	continue;
4252	MachineInstr *Copy = DepSU->getInstr();
4253	if (!Copy->isCopy() && !Copy->isMoveImmediate())
4254	continue;
4255	LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
4256	DAG->dumpNode(*Dep.getSUnit()));
4257	DAG->moveInstruction(MI: Copy, InsertPos);
4258	}
4259	}
4260
4261	/// Update the scheduler's state after scheduling a node. This is the same node
4262	/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
4263	/// update it's state based on the current cycle before MachineSchedStrategy
4264	/// does.
4265	///
4266	/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
4267	/// them here. See comments in biasPhysReg.
4268	void GenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
4269	if (IsTopNode) {
4270	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
4271	TopCluster = DAG->getCluster(Idx: SU->ParentClusterIdx);
4272	LLVM_DEBUG(if (TopCluster) {
4273	dbgs() << " Top Cluster: ";
4274	for (auto N : TopCluster)
4275	dbgs() << N->NodeNum << `'\t'`;
4276	dbgs() << `'\n'`;
4277	});
4278	Top.bumpNode(SU);
4279	if (SU->hasPhysRegUses)
4280	reschedulePhysReg(SU, isTop: true);
4281	} else {
4282	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
4283	BotCluster = DAG->getCluster(Idx: SU->ParentClusterIdx);
4284	LLVM_DEBUG(if (BotCluster) {
4285	dbgs() << " Bot Cluster: ";
4286	for (auto N : BotCluster)
4287	dbgs() << N->NodeNum << `'\t'`;
4288	dbgs() << `'\n'`;
4289	});
4290	Bot.bumpNode(SU);
4291	if (SU->hasPhysRegDefs)
4292	reschedulePhysReg(SU, isTop: false);
4293	}
4294	}
4295
4296	static ScheduleDAGInstrs createConvergingSched(MachineSchedContext C) {
4297	return createSchedLive(C);
4298	}
4299
4300	static MachineSchedRegistry
4301	GenericSchedRegistry("converge", "Standard converging scheduler.",
4302	createConvergingSched);
4303
4304	//===----------------------------------------------------------------------===//
4305	// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4306	//===----------------------------------------------------------------------===//
4307
4308	void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
4309	DAG = Dag;
4310	SchedModel = DAG->getSchedModel();
4311	TRI = DAG->TRI;
4312
4313	Rem.init(DAG, SchedModel);
4314	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
4315	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
4316
4317	// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4318	// or are disabled, then these HazardRecs will be disabled.
4319	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
4320	if (!Top.HazardRec) {
4321	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4322	}
4323	if (!Bot.HazardRec) {
4324	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4325	}
4326	TopCluster = nullptr;
4327	BotCluster = nullptr;
4328	}
4329
4330	void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
4331	MachineBasicBlock::iterator End,
4332	unsigned NumRegionInstrs) {
4333	const MachineFunction &MF = *Begin ->getMF();
4334
4335	// Default to top-down because it was implemented first and existing targets
4336	// expect that behavior by default.
4337	RegionPolicy.OnlyTopDown = true;
4338	RegionPolicy.OnlyBottomUp = false;
4339
4340	// Allow the subtarget to override default policy.
4341	MF.getSubtarget().overridePostRASchedPolicy(Policy&: RegionPolicy, NumRegionInstrs);
4342
4343	// After subtarget overrides, apply command line options.
4344	if (PostRADirection == MISched::TopDown) {
4345	RegionPolicy.OnlyTopDown = true;
4346	RegionPolicy.OnlyBottomUp = false;
4347	} else if (PostRADirection == MISched::BottomUp) {
4348	RegionPolicy.OnlyTopDown = false;
4349	RegionPolicy.OnlyBottomUp = true;
4350	} else if (PostRADirection == MISched::Bidirectional) {
4351	RegionPolicy.OnlyBottomUp = false;
4352	RegionPolicy.OnlyTopDown = false;
4353	}
4354
4355	BotIdx = NumRegionInstrs - `1`;
4356	this->NumRegionInstrs = NumRegionInstrs;
4357	}
4358
4359	void PostGenericScheduler::registerRoots() {
4360	Rem.CriticalPath = DAG->ExitSU.getDepth();
4361
4362	// Some roots may not feed into ExitSU. Check all of them in case.
4363	for (const SUnit *SU : Bot.Available) {
4364	if (SU->getDepth() > Rem.CriticalPath)
4365	Rem.CriticalPath = SU->getDepth();
4366	}
4367	LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << `'\n'`);
4368	if (DumpCriticalPathLength) {
4369	errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4370	}
4371	}
4372
4373	/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4374	///
4375	/// \param Cand provides the policy and current best candidate.
4376	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4377	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4378	bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
4379	SchedCandidate &TryCand) {
4380	// Initialize the candidate if needed.
4381	if (!Cand.isValid()) {
4382	TryCand.Reason = FirstValid;
4383	return true;
4384	}
4385
4386	// Prioritize instructions that read unbuffered resources by stall cycles.
4387	if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU),
4388	CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
4389	return TryCand.Reason != NoCand;
4390
4391	// Keep clustered nodes together.
4392	const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
4393	const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
4394	if (tryGreater(TryVal: TryCandCluster && TryCandCluster->contains(Ptr: TryCand.SU),
4395	CandVal: CandCluster && CandCluster->contains(Ptr: Cand.SU), TryCand, Cand,
4396	Reason: Cluster))
4397	return TryCand.Reason != NoCand;
4398	// Avoid critical resource consumption and balance the schedule.
4399	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
4400	TryCand, Cand, Reason: ResourceReduce))
4401	return TryCand.Reason != NoCand;
4402	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
4403	CandVal: Cand.ResDelta.DemandedResources,
4404	TryCand, Cand, Reason: ResourceDemand))
4405	return TryCand.Reason != NoCand;
4406
4407	// We only compare a subset of features when comparing nodes between
4408	// Top and Bottom boundary.
4409	if (Cand.AtTop == TryCand.AtTop) {
4410	// Avoid serializing long latency dependence chains.
4411	if (Cand.Policy.ReduceLatency &&
4412	tryLatency(TryCand, Cand, Zone&: Cand.AtTop ? Top : Bot))
4413	return TryCand.Reason != NoCand;
4414	}
4415
4416	// Fall through to original instruction order.
4417	if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4418	TryCand.Reason = NodeOrder;
4419	return true;
4420	}
4421
4422	return false;
4423	}
4424
4425	void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
4426	SchedCandidate &Cand) {
4427	ReadyQueue &Q = Zone.Available;
4428	for (SUnit *SU : Q) {
4429	SchedCandidate TryCand(Cand.Policy);
4430	TryCand.SU = SU;
4431	TryCand.AtTop = Zone.isTop();
4432	TryCand.initResourceDelta(DAG, SchedModel);
4433	if (tryCandidate(Cand, TryCand)) {
4434	Cand.setBest(TryCand);
4435	LLVM_DEBUG(traceCandidate(Cand));
4436	}
4437	}
4438	}
4439
4440	/// Pick the best candidate node from either the top or bottom queue.
4441	SUnit PostGenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
4442	// FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4443	// out common parts.
4444
4445	// Schedule as far as possible in the direction of no choice. This is most
4446	// efficient, but also provides the best heuristics for CriticalPSets.
4447	if (SUnit *SU = Bot.pickOnlyChoice()) {
4448	IsTopNode = false;
4449	tracePick(Reason: Only1, /IsTopNode=/IsTop: false, /IsPostRA=/true);
4450	return SU;
4451	}
4452	if (SUnit *SU = Top.pickOnlyChoice()) {
4453	IsTopNode = true;
4454	tracePick(Reason: Only1, /IsTopNode=/IsTop: true, /IsPostRA=/true);
4455	return SU;
4456	}
4457	// Set the bottom-up policy based on the state of the current bottom zone and
4458	// the instructions outside the zone, including the top zone.
4459	CandPolicy BotPolicy;
4460	setPolicy(Policy&: BotPolicy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: &Top);
4461	// Set the top-down policy based on the state of the current top zone and
4462	// the instructions outside the zone, including the bottom zone.
4463	CandPolicy TopPolicy;
4464	setPolicy(Policy&: TopPolicy, /IsPostRA=/true, CurrZone&: Top, OtherZone: &Bot);
4465
4466	// See if BotCand is still valid (because we previously scheduled from Top).
4467	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4468	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
4469	BotCand.Policy != BotPolicy) {
4470	BotCand.reset(NewPolicy: CandPolicy ());
4471	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4472	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4473	} else {
4474	LLVM_DEBUG(traceCandidate(BotCand));
4475	#ifndef NDEBUG
4476	if (VerifyScheduling) {
4477	SchedCandidate TCand;
4478	TCand.reset(CandPolicy());
4479	pickNodeFromQueue(Bot, BotCand);
4480	assert(TCand.SU == BotCand.SU &&
4481	"Last pick result should correspond to re-picking right now");
4482	}
4483	#endif
4484	}
4485
4486	// Check if the top Q has a better candidate.
4487	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4488	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
4489	TopCand.Policy != TopPolicy) {
4490	TopCand.reset(NewPolicy: CandPolicy ());
4491	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4492	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4493	} else {
4494	LLVM_DEBUG(traceCandidate(TopCand));
4495	#ifndef NDEBUG
4496	if (VerifyScheduling) {
4497	SchedCandidate TCand;
4498	TCand.reset(CandPolicy());
4499	pickNodeFromQueue(Top, TopCand);
4500	assert(TCand.SU == TopCand.SU &&
4501	"Last pick result should correspond to re-picking right now");
4502	}
4503	#endif
4504	}
4505
4506	// Pick best from BotCand and TopCand.
4507	assert(BotCand.isValid());
4508	assert(TopCand.isValid());
4509	SchedCandidate Cand = BotCand;
4510	TopCand.Reason = NoCand;
4511	if (tryCandidate(Cand, TryCand&: TopCand)) {
4512	Cand.setBest(TopCand);
4513	LLVM_DEBUG(traceCandidate(Cand));
4514	}
4515
4516	IsTopNode = Cand.AtTop;
4517	tracePick(Cand, /IsPostRA=/true);
4518	return Cand.SU;
4519	}
4520
4521	/// Pick the next node to schedule.
4522	SUnit PostGenericScheduler::pickNode(bool* &IsTopNode) {
4523	if (DAG->top() == DAG->bottom()) {
4524	assert(Top.Available.empty() && Top.Pending.empty() &&
4525	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4526	return nullptr;
4527	}
4528	SUnit *SU;
4529	do {
4530	if (RegionPolicy.OnlyBottomUp) {
4531	SU = Bot.pickOnlyChoice();
4532	if (SU) {
4533	tracePick(Reason: Only1, /IsTopNode=/IsTop: true, /IsPostRA=/true);
4534	} else {
4535	CandPolicy NoPolicy;
4536	BotCand.reset(NewPolicy: NoPolicy);
4537	// Set the bottom-up policy based on the state of the current bottom
4538	// zone and the instructions outside the zone, including the top zone.
4539	setPolicy(Policy&: BotCand.Policy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: nullptr);
4540	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4541	assert(BotCand.Reason != NoCand && "failed to find a candidate");
4542	tracePick(Cand: BotCand, /IsPostRA=/true);
4543	SU = BotCand.SU;
4544	}
4545	IsTopNode = false;
4546	} else if (RegionPolicy.OnlyTopDown) {
4547	SU = Top.pickOnlyChoice();
4548	if (SU) {
4549	tracePick(Reason: Only1, /IsTopNode=/IsTop: true, /IsPostRA=/true);
4550	} else {
4551	CandPolicy NoPolicy;
4552	TopCand.reset(NewPolicy: NoPolicy);
4553	// Set the top-down policy based on the state of the current top zone
4554	// and the instructions outside the zone, including the bottom zone.
4555	setPolicy(Policy&: TopCand.Policy, /IsPostRA=/true, CurrZone&: Top, OtherZone: nullptr);
4556	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4557	assert(TopCand.Reason != NoCand && "failed to find a candidate");
4558	tracePick(Cand: TopCand, /IsPostRA=/true);
4559	SU = TopCand.SU;
4560	}
4561	IsTopNode = true;
4562	} else {
4563	SU = pickNodeBidirectional(IsTopNode);
4564	}
4565	} while (SU->isScheduled);
4566
4567	if (SU->isTopReady())
4568	Top.removeReady(SU);
4569	if (SU->isBottomReady())
4570	Bot.removeReady(SU);
4571
4572	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4573	<< *SU->getInstr());
4574
4575	if (IsTopNode) {
4576	if (SU->NodeNum == TopIdx++)
4577	++NumInstrsInSourceOrderPostRA;
4578	} else {
4579	assert(BotIdx < NumRegionInstrs && "out of bounds");
4580	if (SU->NodeNum == BotIdx--)
4581	++NumInstrsInSourceOrderPostRA;
4582	}
4583
4584	NumInstrsScheduledPostRA += `1`;
4585
4586	return SU;
4587	}
4588
4589	/// Called after ScheduleDAGMI has scheduled an instruction and updated
4590	/// scheduled/remaining flags in the DAG nodes.
4591	void PostGenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
4592	if (IsTopNode) {
4593	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
4594	TopCluster = DAG->getCluster(Idx: SU->ParentClusterIdx);
4595	Top.bumpNode(SU);
4596	} else {
4597	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
4598	BotCluster = DAG->getCluster(Idx: SU->ParentClusterIdx);
4599	Bot.bumpNode(SU);
4600	}
4601	}
4602
4603	//===----------------------------------------------------------------------===//
4604	// ILP Scheduler. Currently for experimental analysis of heuristics.
4605	//===----------------------------------------------------------------------===//
4606
4607	namespace {
4608
4609	/// Order nodes by the ILP metric.
4610	struct ILPOrder {
4611	const SchedDFSResult DFSResult = nullptr*;
4612	const BitVector ScheduledTrees = nullptr*;
4613	bool MaximizeILP;
4614
4615	ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4616
4617	/// Apply a less-than relation on node priority.
4618	///
4619	/// (Return true if A comes after B in the Q.)
4620	bool operator()(const SUnit A, const* SUnit B) const* {
4621	unsigned SchedTreeA = DFSResult->getSubtreeID(SU: A);
4622	unsigned SchedTreeB = DFSResult->getSubtreeID(SU: B);
4623	if (SchedTreeA != SchedTreeB) {
4624	// Unscheduled trees have lower priority.
4625	if (ScheduledTrees->test(Idx: SchedTreeA) != ScheduledTrees->test(Idx: SchedTreeB))
4626	return ScheduledTrees->test(Idx: SchedTreeB);
4627
4628	// Trees with shallower connections have lower priority.
4629	if (DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4630	!= DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB)) {
4631	return DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4632	< DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB);
4633	}
4634	}
4635	if (MaximizeILP)
4636	return DFSResult->getILP(SU: A) < DFSResult->getILP(SU: B);
4637	else
4638	return DFSResult->getILP(SU: A) > DFSResult->getILP(SU: B);
4639	}
4640	};
4641
4642	/// Schedule based on the ILP metric.
4643	class ILPScheduler : public MachineSchedStrategy {
4644	ScheduleDAGMILive DAG = nullptr*;
4645	ILPOrder Cmp;
4646
4647	std::vector<SUnit*> ReadyQ;
4648
4649	public:
4650	ILPScheduler(bool MaximizeILP) : Cmp (MaximizeILP) {}
4651
4652	void initialize(ScheduleDAGMI *dag) override {
4653	assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4654	DAG = static_cast<ScheduleDAGMILive*>(dag);
4655	DAG->computeDFSResult();
4656	Cmp.DFSResult = DAG->getDFSResult();
4657	Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4658	ReadyQ.clear();
4659	}
4660
4661	void registerRoots() override {
4662	// Restore the heap in ReadyQ with the updated DFS results.
4663	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4664	}
4665
4666	/// Implement MachineSchedStrategy interface.
4667	/// -----------------------------------------
4668
4669	/// Callback to select the highest priority node from the ready Q.
4670	SUnit pickNode(bool* &IsTopNode) override {
4671	if (ReadyQ.empty()) return nullptr;
4672	std::pop_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4673	SUnit *SU = ReadyQ.back();
4674	ReadyQ.pop_back();
4675	IsTopNode = false;
4676	LLVM_DEBUG(dbgs() << "Pick node "
4677	<< "SU(" << SU->NodeNum << ") "
4678	<< " ILP: " << DAG->getDFSResult()->getILP(SU)
4679	<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4680	<< " @"
4681	<< DAG->getDFSResult()->getSubtreeLevel(
4682	DAG->getDFSResult()->getSubtreeID(SU))
4683	<< `'\n'`
4684	<< "Scheduling " << *SU->getInstr());
4685	return SU;
4686	}
4687
4688	/// Scheduler callback to notify that a new subtree is scheduled.
4689	void scheduleTree(unsigned SubtreeID) override {
4690	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4691	}
4692
4693	/// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4694	/// DFSResults, and resort the priority Q.
4695	void schedNode(SUnit SU, bool* IsTopNode) override {
4696	assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4697	}
4698
4699	void releaseTopNode(SUnit ) override { /only called for top roots/* }
4700
4701	void releaseBottomNode(SUnit *SU) override {
4702	ReadyQ.push_back(x: SU);
4703	std::push_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4704	}
4705	};
4706
4707	} // end anonymous namespace
4708
4709	static ScheduleDAGInstrs createILPMaxScheduler(MachineSchedContext C) {
4710	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: true));
4711	}
4712	static ScheduleDAGInstrs createILPMinScheduler(MachineSchedContext C) {
4713	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: false));
4714	}
4715
4716	static MachineSchedRegistry ILPMaxRegistry(
4717	"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4718	static MachineSchedRegistry ILPMinRegistry(
4719	"ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4720
4721	//===----------------------------------------------------------------------===//
4722	// Machine Instruction Shuffler for Correctness Testing
4723	//===----------------------------------------------------------------------===//
4724
4725	#ifndef NDEBUG
4726	namespace {
4727
4728	/// Apply a less-than relation on the node order, which corresponds to the
4729	/// instruction order prior to scheduling. IsReverse implements greater-than.
4730	template<bool IsReverse>
4731	struct SUnitOrder {
4732	bool operator()(SUnit A, SUnit B) const {
4733	if (IsReverse)
4734	return A->NodeNum > B->NodeNum;
4735	else
4736	return A->NodeNum < B->NodeNum;
4737	}
4738	};
4739
4740	/// Reorder instructions as much as possible.
4741	class InstructionShuffler : public MachineSchedStrategy {
4742	bool IsAlternating;
4743	bool IsTopDown;
4744
4745	// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4746	// gives nodes with a higher number higher priority causing the latest
4747	// instructions to be scheduled first.
4748	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<false>>
4749	TopQ;
4750
4751	// When scheduling bottom-up, use greater-than as the queue priority.
4752	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<true>>
4753	BottomQ;
4754
4755	public:
4756	InstructionShuffler(bool alternate, bool topdown)
4757	: IsAlternating(alternate), IsTopDown(topdown) {}
4758
4759	void initialize(ScheduleDAGMI*) override {
4760	TopQ.clear();
4761	BottomQ.clear();
4762	}
4763
4764	/// Implement MachineSchedStrategy interface.
4765	/// -----------------------------------------
4766
4767	SUnit pickNode(bool* &IsTopNode) override {
4768	SUnit *SU;
4769	if (IsTopDown) {
4770	do {
4771	if (TopQ.empty()) return nullptr;
4772	SU = TopQ.top();
4773	TopQ.pop();
4774	} while (SU->isScheduled);
4775	IsTopNode = true;
4776	} else {
4777	do {
4778	if (BottomQ.empty()) return nullptr;
4779	SU = BottomQ.top();
4780	BottomQ.pop();
4781	} while (SU->isScheduled);
4782	IsTopNode = false;
4783	}
4784	if (IsAlternating)
4785	IsTopDown = !IsTopDown;
4786	return SU;
4787	}
4788
4789	void schedNode(SUnit SU, bool* IsTopNode) override {}
4790
4791	void releaseTopNode(SUnit *SU) override {
4792	TopQ.push(SU);
4793	}
4794	void releaseBottomNode(SUnit *SU) override {
4795	BottomQ.push(SU);
4796	}
4797	};
4798
4799	} // end anonymous namespace
4800
4801	static ScheduleDAGInstrs createInstructionShuffler(MachineSchedContext C) {
4802	bool Alternate =
4803	PreRADirection != MISched::TopDown && PreRADirection != MISched::BottomUp;
4804	bool TopDown = PreRADirection != MISched::BottomUp;
4805	return new ScheduleDAGMILive(
4806	C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4807	}
4808
4809	static MachineSchedRegistry ShufflerRegistry(
4810	"shuffle", "Shuffle machine instructions alternating directions",
4811	createInstructionShuffler);
4812	#endif // !NDEBUG
4813
4814	//===----------------------------------------------------------------------===//
4815	// GraphWriter support for ScheduleDAGMILive.
4816	//===----------------------------------------------------------------------===//
4817
4818	#ifndef NDEBUG
4819	namespace llvm {
4820
4821	template<> struct GraphTraits<
4822	ScheduleDAGMI> : public* GraphTraits<ScheduleDAG*> {};
4823
4824	template<>
4825	struct DOTGraphTraits<ScheduleDAGMI> : public* DefaultDOTGraphTraits {
4826	DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
4827
4828	static std::string getGraphName(const ScheduleDAG *G) {
4829	return std::string(G->MF.getName());
4830	}
4831
4832	static bool renderGraphFromBottomUp() {
4833	return true;
4834	}
4835
4836	static bool isNodeHidden(const SUnit Node, const* ScheduleDAG *G) {
4837	if (ViewMISchedCutoff == `0`)
4838	return false;
4839	return (Node->Preds.size() > ViewMISchedCutoff
4840	\|\| Node->Succs.size() > ViewMISchedCutoff);
4841	}
4842
4843	/// If you want to override the dot attributes printed for a particular
4844	/// edge, override this method.
4845	static std::string getEdgeAttributes(const SUnit *Node,
4846	SUnitIterator EI,
4847	const ScheduleDAG *Graph) {
4848	if (EI.isArtificialDep())
4849	return "color=cyan,style=dashed";
4850	if (EI.isCtrlDep())
4851	return "color=blue,style=dashed";
4852	return "";
4853	}
4854
4855	static std::string getNodeLabel(const SUnit SU, const* ScheduleDAG *G) {
4856	std::string Str;
4857	raw_string_ostream SS(Str);
4858	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4859	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4860	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4861	SS << "SU:" << SU->NodeNum;
4862	if (DFS)
4863	SS << " I:" << DFS->getNumInstrs(SU);
4864	return Str;
4865	}
4866
4867	static std::string getNodeDescription(const SUnit SU, const* ScheduleDAG *G) {
4868	return G->getGraphNodeLabel(SU);
4869	}
4870
4871	static std::string getNodeAttributes(const SUnit N, const* ScheduleDAG *G) {
4872	std::string Str("shape=Mrecord");
4873	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4874	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4875	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4876	if (DFS) {
4877	Str += ",style=filled,fillcolor=\"#";
4878	Str += DOT::getColorString(DFS->getSubtreeID(N));
4879	Str += `'"'`;
4880	}
4881	return Str;
4882	}
4883	};
4884
4885	} // end namespace llvm
4886	#endif // NDEBUG
4887
4888	/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4889	/// rendered using 'dot'.
4890	void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4891	#ifndef NDEBUG
4892	ViewGraph(this, Name, false, Title);
4893	#else
4894	errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4895	<< "systems with Graphviz or gv!\n";
4896	#endif // NDEBUG
4897	}
4898
4899	/// Out-of-line implementation with no arguments is handy for gdb.
4900	void ScheduleDAGMI::viewGraph() {
4901	viewGraph(Name: getDAGName(), Title: "Scheduling-Units Graph for " + getDAGName());
4902	}
4903
4904	/// Sort predicate for the intervals stored in an instance of
4905	/// ResourceSegments. Intervals are always disjoint (no intersection
4906	/// for any pairs of intervals), therefore we can sort the totality of
4907	/// the intervals by looking only at the left boundary.
4908	static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4909	const ResourceSegments::IntervalTy &B) {
4910	return A.first < B.first;
4911	}
4912
4913	unsigned ResourceSegments::getFirstAvailableAt(
4914	unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4915	std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4916	IntervalBuilder) const {
4917	assert(llvm::is_sorted(_Intervals, sortIntervals) &&
4918	"Cannot execute on an un-sorted set of intervals.");
4919
4920	// Zero resource usage is allowed by TargetSchedule.td but we do not construct
4921	// a ResourceSegment interval for that situation.
4922	if (AcquireAtCycle == ReleaseAtCycle)
4923	return CurrCycle;
4924
4925	unsigned RetCycle = CurrCycle;
4926	ResourceSegments::IntervalTy NewInterval =
4927	IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4928	for (auto &Interval : _Intervals) {
4929	if (!intersects(A: NewInterval, B: Interval))
4930	continue;
4931
4932	// Move the interval right next to the top of the one it
4933	// intersects.
4934	assert(Interval.second > NewInterval.first &&
4935	"Invalid intervals configuration.");
4936	RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4937	NewInterval = IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4938	}
4939	return RetCycle;
4940	}
4941
4942	void ResourceSegments::add(ResourceSegments::IntervalTy A,
4943	const unsigned CutOff) {
4944	assert(A.first <= A.second && "Cannot add negative resource usage");
4945	assert(CutOff > `0` && "0-size interval history has no use.");
4946	// Zero resource usage is allowed by TargetSchedule.td, in the case that the
4947	// instruction needed the resource to be available but does not use it.
4948	// However, ResourceSegment represents an interval that is closed on the left
4949	// and open on the right. It is impossible to represent an empty interval when
4950	// the left is closed. Do not add it to Intervals.
4951	if (A.first == A.second)
4952	return;
4953
4954	assert(all_of(_Intervals,
4955	[&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4956	return !intersects(A, Interval);
4957	}) &&
4958	"A resource is being overwritten");
4959	_Intervals.push_back(x: A);
4960
4961	sortAndMerge();
4962
4963	// Do not keep the full history of the intervals, just the
4964	// latest #CutOff.
4965	while (_Intervals.size() > CutOff)
4966	_Intervals.pop_front();
4967	}
4968
4969	bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4970	ResourceSegments::IntervalTy B) {
4971	assert(A.first <= A.second && "Invalid interval");
4972	assert(B.first <= B.second && "Invalid interval");
4973
4974	// Share one boundary.
4975	if ((A.first == B.first) \|\| (A.second == B.second))
4976	return true;
4977
4978	// full intersersect: [ ** ) B*
4979	// [*) A
4980	if ((A.first > B.first) && (A.second < B.second))
4981	return true;
4982
4983	// right intersect: [ *) B
4984	// [** ) A*
4985	if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4986	return true;
4987
4988	// left intersect: [** ) B*
4989	// [ *) A
4990	if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4991	return true;
4992
4993	return false;
4994	}
4995
4996	void ResourceSegments::sortAndMerge() {
4997	if (_Intervals.size() <= `1`)
4998	return;
4999
5000	// First sort the collection.
5001	_Intervals.sort(comp: sortIntervals);
5002
5003	// can use next because I have at least 2 elements in the list
5004	auto next = std::next(x: std::begin(cont&: _Intervals));
5005	auto E = std::end(cont&: _Intervals);
5006	for (; next != E; ++next) {
5007	if (std::prev(x: next)->second >= next ->first) {
5008	next ->first = std::prev(x: next)->first;
5009	_Intervals.erase(position: std::prev(x: next));
5010	continue;
5011	}
5012	}
5013	}
5014

Browse the source code of llvm_projects/llvm/lib/CodeGen/MachineScheduler.cpp