LoopFuse.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LoopFuse.cpp]

1	//===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the loop fusion pass.
11	/// The implementation is largely based on the following document:
12	///
13	/// Code Transformations to Augment the Scope of Loop Fusion in a
14	/// Production Compiler
15	/// Christopher Mark Barton
16	/// MSc Thesis
17	/// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
18	///
19	/// The general approach taken is to collect sets of control flow equivalent
20	/// loops and test whether they can be fused. The necessary conditions for
21	/// fusion are:
22	/// 1. The loops must be adjacent (there cannot be any statements between
23	/// the two loops).
24	/// 2. The loops must be conforming (they must execute the same number of
25	/// iterations).
26	/// 3. The loops must be control flow equivalent (if one loop executes, the
27	/// other is guaranteed to execute).
28	/// 4. There cannot be any negative distance dependencies between the loops.
29	/// If all of these conditions are satisfied, it is safe to fuse the loops.
30	///
31	/// This implementation creates FusionCandidates that represent the loop and the
32	/// necessary information needed by fusion. It then operates on the fusion
33	/// candidates, first confirming that the candidate is eligible for fusion. The
34	/// candidates are then collected into control flow equivalent sets, sorted in
35	/// dominance order. Each set of control flow equivalent candidates is then
36	/// traversed, attempting to fuse pairs of candidates in the set. If all
37	/// requirements for fusion are met, the two candidates are fused, creating a
38	/// new (fused) candidate which is then added back into the set to consider for
39	/// additional fusion.
40	///
41	/// This implementation currently does not make any modifications to remove
42	/// conditions for fusion. Code transformations to make loops conform to each of
43	/// the conditions for fusion are discussed in more detail in the document
44	/// above. These can be added to the current implementation in the future.
45	//===----------------------------------------------------------------------===//
46
47	#include "llvm/Transforms/Scalar/LoopFuse.h"
48	#include "llvm/ADT/Statistic.h"
49	#include "llvm/Analysis/AssumptionCache.h"
50	#include "llvm/Analysis/DependenceAnalysis.h"
51	#include "llvm/Analysis/DomTreeUpdater.h"
52	#include "llvm/Analysis/LoopInfo.h"
53	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
54	#include "llvm/Analysis/PostDominators.h"
55	#include "llvm/Analysis/ScalarEvolution.h"
56	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
57	#include "llvm/Analysis/TargetTransformInfo.h"
58	#include "llvm/IR/Function.h"
59	#include "llvm/IR/Verifier.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Debug.h"
62	#include "llvm/Support/raw_ostream.h"
63	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
64	#include "llvm/Transforms/Utils/CodeMoverUtils.h"
65	#include "llvm/Transforms/Utils/LoopPeel.h"
66	#include "llvm/Transforms/Utils/LoopSimplify.h"
67	#include <list>
68
69	using namespace llvm;
70
71	#define DEBUG_TYPE "loop-fusion"
72
73	STATISTIC(FuseCounter, "Loops fused");
74	STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
75	STATISTIC(InvalidPreheader, "Loop has invalid preheader");
76	STATISTIC(InvalidHeader, "Loop has invalid header");
77	STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
78	STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
79	STATISTIC(InvalidLatch, "Loop has invalid latch");
80	STATISTIC(InvalidLoop, "Loop is invalid");
81	STATISTIC(AddressTakenBB, "Basic block has address taken");
82	STATISTIC(MayThrowException, "Loop may throw an exception");
83	STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
84	STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
85	STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
86	STATISTIC(UnknownTripCount, "Loop has unknown trip count");
87	STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
88	STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
89	STATISTIC(
90	NonEmptyPreheader,
91	"Loop has a non-empty preheader with instructions that cannot be moved");
92	STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
93	STATISTIC(NonIdenticalGuards, "Candidates have different guards");
94	STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
95	"instructions that cannot be moved");
96	STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
97	"instructions that cannot be moved");
98	STATISTIC(NotRotated, "Candidate is not rotated");
99	STATISTIC(OnlySecondCandidateIsGuarded,
100	"The second candidate is guarded while the first one is not");
101	STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102	STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
103	STATISTIC(NumDA, "DA checks passed");
104
105	enum FusionDependenceAnalysisChoice {
106	FUSION_DEPENDENCE_ANALYSIS_SCEV,
107	FUSION_DEPENDENCE_ANALYSIS_DA,
108	FUSION_DEPENDENCE_ANALYSIS_ALL,
109	};
110
111	static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
112	"loop-fusion-dependence-analysis",
113	cl::desc ("Which dependence analysis should loop fusion use?"),
114	cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
115	"Use the scalar evolution interface"),
116	clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
117	"Use the dependence analysis interface"),
118	clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
119	"Use all available analyses")),
120	cl::Hidden, cl::init(Val: FUSION_DEPENDENCE_ANALYSIS_ALL));
121
122	static cl::opt<unsigned> FusionPeelMaxCount(
123	"loop-fusion-peel-max-count", cl::init(Val: `0`), cl::Hidden,
124	cl::desc ("Max number of iterations to be peeled from a loop, such that "
125	"fusion can take place"));
126
127	#ifndef NDEBUG
128	static cl::opt<bool>
129	VerboseFusionDebugging("loop-fusion-verbose-debug",
130	cl::desc("Enable verbose debugging for Loop Fusion"),
131	cl::Hidden, cl::init(false));
132	#endif
133
134	namespace {
135	/// This class is used to represent a candidate for loop fusion. When it is
136	/// constructed, it checks the conditions for loop fusion to ensure that it
137	/// represents a valid candidate. It caches several parts of a loop that are
138	/// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
139	/// of continually querying the underlying Loop to retrieve these values. It is
140	/// assumed these will not change throughout loop fusion.
141	///
142	/// The invalidate method should be used to indicate that the FusionCandidate is
143	/// no longer a valid candidate for fusion. Similarly, the isValid() method can
144	/// be used to ensure that the FusionCandidate is still valid for fusion.
145	struct FusionCandidate {
146	/// Cache of parts of the loop used throughout loop fusion. These should not
147	/// need to change throughout the analysis and transformation.
148	/// These parts are cached to avoid repeatedly looking up in the Loop class.
149
150	/// Preheader of the loop this candidate represents
151	BasicBlock *Preheader;
152	/// Header of the loop this candidate represents
153	BasicBlock *Header;
154	/// Blocks in the loop that exit the loop
155	BasicBlock *ExitingBlock;
156	/// The successor block of this loop (where the exiting blocks go to)
157	BasicBlock *ExitBlock;
158	/// Latch of the loop
159	BasicBlock *Latch;
160	/// The loop that this fusion candidate represents
161	Loop *L;
162	/// Vector of instructions in this loop that read from memory
163	SmallVector<Instruction *, `16`> MemReads;
164	/// Vector of instructions in this loop that write to memory
165	SmallVector<Instruction *, `16`> MemWrites;
166	/// Are all of the members of this fusion candidate still valid
167	bool Valid;
168	/// Guard branch of the loop, if it exists
169	CondBrInst *GuardBranch;
170	/// Peeling Paramaters of the Loop.
171	TTI::PeelingPreferences PP;
172	/// Can you Peel this Loop?
173	bool AbleToPeel;
174	/// Has this loop been Peeled
175	bool Peeled;
176
177	DominatorTree &DT;
178	const PostDominatorTree *PDT;
179
180	OptimizationRemarkEmitter &ORE;
181
182	FusionCandidate(Loop L, DominatorTree &DT, const* PostDominatorTree *PDT,
183	OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP)
184	: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
185	ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
186	Latch(L->getLoopLatch()), L(L), Valid(true),
187	GuardBranch(L->getLoopGuardBranch()), PP (PP), AbleToPeel(canPeel(L)),
188	Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
189
190	// Walk over all blocks in the loop and check for conditions that may
191	// prevent fusion. For each block, walk over all instructions and collect
192	// the memory reads and writes If any instructions that prevent fusion are
193	// found, invalidate this object and return.
194	for (BasicBlock *BB : L->blocks()) {
195	if (BB->hasAddressTaken()) {
196	invalidate();
197	reportInvalidCandidate(Stat&: AddressTakenBB);
198	return;
199	}
200
201	for (Instruction &I : *BB) {
202	if (I.mayThrow()) {
203	invalidate();
204	reportInvalidCandidate(Stat&: MayThrowException);
205	return;
206	}
207	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) {
208	if (SI->isVolatile()) {
209	invalidate();
210	reportInvalidCandidate(Stat&: ContainsVolatileAccess);
211	return;
212	}
213	}
214	if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) {
215	if (LI->isVolatile()) {
216	invalidate();
217	reportInvalidCandidate(Stat&: ContainsVolatileAccess);
218	return;
219	}
220	}
221	if (I.mayWriteToMemory())
222	MemWrites.push_back(Elt: &I);
223	if (I.mayReadFromMemory())
224	MemReads.push_back(Elt: &I);
225	}
226	}
227	}
228
229	/// Check if all members of the class are valid.
230	bool isValid() const {
231	return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
232	!L->isInvalid() && Valid;
233	}
234
235	/// Verify that all members are in sync with the Loop object.
236	void verify() const {
237	assert(isValid() && "Candidate is not valid!!");
238	assert(!L->isInvalid() && "Loop is invalid!");
239	assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
240	assert(Header == L->getHeader() && "Header is out of sync");
241	assert(ExitingBlock == L->getExitingBlock() &&
242	"Exiting Blocks is out of sync");
243	assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
244	assert(Latch == L->getLoopLatch() && "Latch is out of sync");
245	}
246
247	/// Get the entry block for this fusion candidate.
248	///
249	/// If this fusion candidate represents a guarded loop, the entry block is the
250	/// loop guard block. If it represents an unguarded loop, the entry block is
251	/// the preheader of the loop.
252	BasicBlock getEntryBlock() const* {
253	if (GuardBranch)
254	return GuardBranch->getParent();
255	return Preheader;
256	}
257
258	/// After Peeling the loop is modified quite a bit, hence all of the Blocks
259	/// need to be updated accordingly.
260	void updateAfterPeeling() {
261	Preheader = L->getLoopPreheader();
262	Header = L->getHeader();
263	ExitingBlock = L->getExitingBlock();
264	ExitBlock = L->getExitBlock();
265	Latch = L->getLoopLatch();
266	verify();
267	}
268
269	/// Given a guarded loop, get the successor of the guard that is not in the
270	/// loop.
271	///
272	/// This method returns the successor of the loop guard that is not located
273	/// within the loop (i.e., the successor of the guard that is not the
274	/// preheader).
275	/// This method is only valid for guarded loops.
276	BasicBlock getNonLoopBlock() const* {
277	assert(GuardBranch && "Only valid on guarded loops.");
278	if (Peeled)
279	return GuardBranch->getSuccessor(i: `1`);
280	return (GuardBranch->getSuccessor(i: `0`) == Preheader)
281	? GuardBranch->getSuccessor(i: `1`)
282	: GuardBranch->getSuccessor(i: `0`);
283	}
284
285	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
286	LLVM_DUMP_METHOD void dump() const {
287	dbgs() << "\tGuardBranch: ";
288	if (GuardBranch)
289	dbgs() << *GuardBranch;
290	else
291	dbgs() << "nullptr";
292	dbgs() << "\n"
293	<< (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
294	<< "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
295	<< "\n"
296	<< "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
297	<< "\tExitingBB: "
298	<< (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
299	<< "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
300	<< "\n"
301	<< "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
302	<< "\tEntryBlock: "
303	<< (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
304	<< "\n";
305	}
306	#endif
307
308	/// Determine if a fusion candidate (representing a loop) is eligible for
309	/// fusion. Note that this only checks whether a single loop can be fused - it
310	/// does not check whether it is legal* to fuse two loops together.*
311	bool isEligibleForFusion(ScalarEvolution &SE) const {
312	if (!isValid()) {
313	LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
314	if (!Preheader)
315	++InvalidPreheader;
316	if (!Header)
317	++InvalidHeader;
318	if (!ExitingBlock)
319	++InvalidExitingBlock;
320	if (!ExitBlock)
321	++InvalidExitBlock;
322	if (!Latch)
323	++InvalidLatch;
324	if (L->isInvalid())
325	++InvalidLoop;
326
327	return false;
328	}
329
330	// Require ScalarEvolution to be able to determine a trip count.
331	if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
332	LLVM_DEBUG(dbgs() << "Loop " << L->getName()
333	<< " trip count not computable!\n");
334	return reportInvalidCandidate(Stat&: UnknownTripCount);
335	}
336
337	if (!L->isLoopSimplifyForm()) {
338	LLVM_DEBUG(dbgs() << "Loop " << L->getName()
339	<< " is not in simplified form!\n");
340	return reportInvalidCandidate(Stat&: NotSimplifiedForm);
341	}
342
343	if (!L->isRotatedForm()) {
344	LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
345	return reportInvalidCandidate(Stat&: NotRotated);
346	}
347
348	return true;
349	}
350
351	private:
352	// This is only used internally for now, to clear the MemWrites and MemReads
353	// list and setting Valid to false. I can't envision other uses of this right
354	// now, since once FusionCandidates are put into the FusionCandidateList they
355	// are immutable. Thus, any time we need to change/update a FusionCandidate,
356	// we must create a new one and insert it into the FusionCandidateList to
357	// ensure the FusionCandidateList remains ordered correctly.
358	void invalidate() {
359	MemWrites.clear();
360	MemReads.clear();
361	Valid = false;
362	}
363
364	bool reportInvalidCandidate(Statistic &Stat) const {
365	using namespace ore;
366	assert(L && Preheader && "Fusion candidate not initialized properly!");
367	#if LLVM_ENABLE_STATS
368	++Stat;
369	ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
370	L->getStartLoc(), Preheader)
371	<< "[" << Preheader->getParent()->getName() << "]: "
372	<< "Loop is not a candidate for fusion: " << Stat.getDesc());
373	#endif
374	return false;
375	}
376	};
377	} // namespace
378
379	using LoopVector = SmallVector<Loop *, `4`>;
380
381	// List of adjacent fusion candidates in order. Thus, if FC0 comes before* FC1*
382	// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
383	// and they are adjacent.
384	using FusionCandidateList = std::list<FusionCandidate>;
385	using FusionCandidateCollection = SmallVector<FusionCandidateList, `4`>;
386
387	#ifndef NDEBUG
388	static void printLoopVector(const LoopVector &LV) {
389	dbgs() << "****************************\n";
390	for (const Loop *L : LV)
391	printLoop(*L, dbgs());
392	dbgs() << "****************************\n";
393	}
394
395	static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
396	if (FC.isValid())
397	OS << FC.Preheader->getName();
398	else
399	OS << "<Invalid>";
400
401	return OS;
402	}
403
404	static raw_ostream &operator<<(raw_ostream &OS,
405	const FusionCandidateList &CandList) {
406	for (const FusionCandidate &FC : CandList)
407	OS << FC << `'\n'`;
408
409	return OS;
410	}
411
412	static void
413	printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
414	dbgs() << "Fusion Candidates: \n";
415	for (const auto &CandidateList : FusionCandidates) {
416	dbgs() << "* Fusion Candidate List *\n";
417	dbgs() << CandidateList;
418	dbgs() << "****************************\n";
419	}
420	}
421	#endif // NDEBUG
422
423	namespace {
424
425	/// Collect all loops in function at the same nest level, starting at the
426	/// outermost level.
427	///
428	/// This data structure collects all loops at the same nest level for a
429	/// given function (specified by the LoopInfo object). It starts at the
430	/// outermost level.
431	struct LoopDepthTree {
432	using LoopsOnLevelTy = SmallVector<LoopVector, `4`>;
433	using iterator = LoopsOnLevelTy::iterator;
434	using const_iterator = LoopsOnLevelTy::const_iterator;
435
436	LoopDepthTree(LoopInfo &LI) : Depth(`1`) {
437	if (!LI.empty())
438	LoopsOnLevel.emplace_back(Args: LoopVector (LI.rbegin(), LI.rend()));
439	}
440
441	/// Test whether a given loop has been removed from the function, and thus is
442	/// no longer valid.
443	bool isRemovedLoop(const Loop L) const* { return RemovedLoops.count(Ptr: L); }
444
445	/// Record that a given loop has been removed from the function and is no
446	/// longer valid.
447	void removeLoop(const Loop *L) { RemovedLoops.insert(Ptr: L); }
448
449	/// Descend the tree to the next (inner) nesting level
450	void descend() {
451	LoopsOnLevelTy LoopsOnNextLevel;
452
453	for (const LoopVector &LV : *this)
454	for (Loop *L : LV)
455	if (!isRemovedLoop(L) && L->begin() != L->end())
456	LoopsOnNextLevel.emplace_back(Args: LoopVector (L->begin(), L->end()));
457
458	LoopsOnLevel = LoopsOnNextLevel;
459	RemovedLoops.clear();
460	Depth++;
461	}
462
463	bool empty() const { return size() == `0`; }
464	size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
465	unsigned getDepth() const { return Depth; }
466
467	iterator begin() { return LoopsOnLevel.begin(); }
468	iterator end() { return LoopsOnLevel.end(); }
469	const_iterator begin() const { return LoopsOnLevel.begin(); }
470	const_iterator end() const { return LoopsOnLevel.end(); }
471
472	private:
473	/// Set of loops that have been removed from the function and are no longer
474	/// valid.
475	SmallPtrSet<const Loop *, `8`> RemovedLoops;
476
477	/// Depth of the current level, starting at 1 (outermost loops).
478	unsigned Depth;
479
480	/// Vector of loops at the current depth level that have the same parent loop
481	LoopsOnLevelTy LoopsOnLevel;
482	};
483
484	struct LoopFuser {
485	private:
486	// Sets of control flow equivalent fusion candidates for a given nest level.
487	FusionCandidateCollection FusionCandidates;
488
489	LoopDepthTree LDT;
490	DomTreeUpdater DTU;
491
492	LoopInfo &LI;
493	DominatorTree &DT;
494	DependenceInfo &DI;
495	ScalarEvolution &SE;
496	PostDominatorTree &PDT;
497	OptimizationRemarkEmitter &ORE;
498	AssumptionCache &AC;
499	const TargetTransformInfo &TTI;
500
501	public:
502	LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
503	ScalarEvolution &SE, PostDominatorTree &PDT,
504	OptimizationRemarkEmitter &ORE, const DataLayout &DL,
505	AssumptionCache &AC, const TargetTransformInfo &TTI)
506	: LDT (LI), DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
507	DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
508
509	/// This is the main entry point for loop fusion. It will traverse the
510	/// specified function and collect candidate loops to fuse, starting at the
511	/// outermost nesting level and working inwards.
512	bool fuseLoops(Function &F) {
513	#ifndef NDEBUG
514	if (VerboseFusionDebugging) {
515	LI.print(dbgs());
516	}
517	#endif
518
519	LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
520	<< "\n");
521	bool Changed = false;
522
523	while (!LDT.empty()) {
524	LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
525	<< LDT.getDepth() << "\n";);
526
527	for (const LoopVector &LV : LDT) {
528	assert(LV.size() > `0` && "Empty loop set was build!");
529
530	// Skip singleton loop sets as they do not offer fusion opportunities on
531	// this level.
532	if (LV.size() == `1`)
533	continue;
534	#ifndef NDEBUG
535	if (VerboseFusionDebugging) {
536	LLVM_DEBUG({
537	dbgs() << " Visit loop set (#" << LV.size() << "):\n";
538	printLoopVector(LV);
539	});
540	}
541	#endif
542
543	collectFusionCandidates(LV);
544	Changed \|= fuseCandidates();
545	// All loops in the candidate sets have a common parent (or no parent).
546	// Next loop vector will correspond to a different parent. It is safe
547	// to remove all the candidates currently in the set.
548	FusionCandidates.clear();
549	}
550
551	// Finished analyzing candidates at this level. Descend to the next level.
552	LLVM_DEBUG(dbgs() << "Descend one level!\n");
553	LDT.descend();
554	}
555
556	if (Changed)
557	LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
558
559	#ifndef NDEBUG
560	assert(DT.verify());
561	assert(PDT.verify());
562	LI.verify(DT);
563	SE.verify();
564	#endif
565
566	LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
567	return Changed;
568	}
569
570	private:
571	/// Iterate over all loops in the given loop set and identify the loops that
572	/// are eligible for fusion. Place all eligible fusion candidates into Control
573	/// Flow Equivalent sets, sorted by dominance.
574	void collectFusionCandidates(const LoopVector &LV) {
575	for (Loop *L : LV) {
576	TTI::PeelingPreferences PP =
577	gatherPeelingPreferences(L, SE, TTI, UserAllowPeeling: std::nullopt, UserAllowProfileBasedPeeling: std::nullopt);
578	FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
579	if (!CurrCand.isEligibleForFusion(SE))
580	continue;
581
582	// Go through each list in FusionCandidates and determine if the first or
583	// last loop in the list is strictly adjacent to L. If it is, append L.
584	// If not, go to the next list.
585	// If no suitable list is found, start another list and add it to
586	// FusionCandidates.
587	bool FoundAdjacent = false;
588	for (auto &CurrCandList : FusionCandidates) {
589	if (isStrictlyAdjacent(FC0: CurrCandList.back(), FC1: CurrCand)) {
590	CurrCandList.push_back(x: CurrCand);
591	FoundAdjacent = true;
592	NumFusionCandidates ++;
593	#ifndef NDEBUG
594	if (VerboseFusionDebugging)
595	LLVM_DEBUG(dbgs() << "Adding " << CurrCand
596	<< " to existing candidate list\n");
597	#endif
598	break;
599	}
600	}
601	if (!FoundAdjacent) {
602	// No list was found. Create a new list and add to FusionCandidates
603	#ifndef NDEBUG
604	if (VerboseFusionDebugging)
605	LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
606	#endif
607	FusionCandidateList NewCandList;
608	NewCandList.push_back(x: CurrCand);
609	FusionCandidates.push_back(Elt: NewCandList);
610	}
611	}
612	}
613
614	/// Determine if it is beneficial to fuse two loops.
615	///
616	/// For now, this method simply returns true because we want to fuse as much
617	/// as possible (primarily to test the pass). This method will evolve, over
618	/// time, to add heuristics for profitability of fusion.
619	bool isBeneficialFusion(const FusionCandidate &FC0,
620	const FusionCandidate &FC1) {
621	return true;
622	}
623
624	/// Determine if two fusion candidates have the same trip count (i.e., they
625	/// execute the same number of iterations).
626	///
627	/// This function will return a pair of values. The first is a boolean,
628	/// stating whether or not the two candidates are known at compile time to
629	/// have the same TripCount. The second is the difference in the two
630	/// TripCounts. This information can be used later to determine whether or not
631	/// peeling can be performed on either one of the candidates.
632	std::pair<bool, std::optional<unsigned>>
633	haveIdenticalTripCounts(const FusionCandidate &FC0,
634	const FusionCandidate &FC1) const {
635	const SCEV *TripCount0 = SE.getBackedgeTakenCount(L: FC0.L);
636	if (isa<SCEVCouldNotCompute>(Val: TripCount0)) {
637	UncomputableTripCount ++;
638	LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
639	return {false, std::nullopt};
640	}
641
642	const SCEV *TripCount1 = SE.getBackedgeTakenCount(L: FC1.L);
643	if (isa<SCEVCouldNotCompute>(Val: TripCount1)) {
644	UncomputableTripCount ++;
645	LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
646	return {false, std::nullopt};
647	}
648
649	LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
650	<< *TripCount1 << " are "
651	<< (TripCount0 == TripCount1 ? "identical" : "different")
652	<< "\n");
653
654	if (TripCount0 == TripCount1)
655	return {true, `0`};
656
657	LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
658	"determining the difference between trip counts\n");
659
660	// Currently only considering loops with a single exit point
661	// and a non-constant trip count.
662	const unsigned TC0 = SE.getSmallConstantTripCount(L: FC0.L);
663	const unsigned TC1 = SE.getSmallConstantTripCount(L: FC1.L);
664
665	// If any of the tripcounts are zero that means that loop(s) do not have
666	// a single exit or a constant tripcount.
667	if (TC0 == `0` \|\| TC1 == `0`) {
668	LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
669	"have a constant number of iterations. Peeling "
670	"is not benefical\n");
671	return {false, std::nullopt};
672	}
673
674	std::optional<unsigned> Difference;
675	int Diff = TC0 - TC1;
676
677	if (Diff > `0`)
678	Difference = Diff;
679	else {
680	LLVM_DEBUG(
681	dbgs() << "Difference is less than 0. FC1 (second loop) has more "
682	"iterations than the first one. Currently not supported\n");
683	}
684
685	LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
686	<< "\n");
687
688	return {false, Difference};
689	}
690
691	void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
692	unsigned PeelCount) {
693	assert(FC0.AbleToPeel && "Should be able to peel loop");
694
695	LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
696	<< " iterations of the first loop. \n");
697
698	ValueToValueMapTy VMap;
699	peelLoop(L: FC0.L, PeelCount, PeelLast: false, LI: &LI, SE: &SE, DT, AC: &AC, PreserveLCSSA: true, VMap);
700	FC0.Peeled = true;
701	LLVM_DEBUG(dbgs() << "Done Peeling\n");
702
703	#ifndef NDEBUG
704	auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
705
706	assert(IdenticalTripCount.first && *IdenticalTripCount.second == `0` &&
707	"Loops should have identical trip counts after peeling");
708	#endif
709
710	FC0.PP.PeelCount += PeelCount;
711
712	// Peeling does not update the PDT
713	PDT.recalculate(Func&: *FC0.Preheader->getParent());
714
715	FC0.updateAfterPeeling();
716
717	// In this case the iterations of the loop are constant, so the first
718	// loop will execute completely (will not jump from one of
719	// the peeled blocks to the second loop). Here we are updating the
720	// branch conditions of each of the peeled blocks, such that it will
721	// branch to its successor which is not the preheader of the second loop
722	// in the case of unguarded loops, or the succesors of the exit block of
723	// the first loop otherwise. Doing this update will ensure that the entry
724	// block of the first loop dominates the entry block of the second loop.
725	BasicBlock *BB =
726	FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
727	if (BB) {
728	SmallVector<DominatorTree::UpdateType, `8`> TreeUpdates;
729	SmallVector<Instruction *, `8`> WorkList;
730	for (BasicBlock *Pred : predecessors(BB)) {
731	if (Pred != FC0.ExitBlock) {
732	WorkList.emplace_back(Args: Pred->getTerminator());
733	TreeUpdates.emplace_back(
734	Args: DominatorTree::UpdateType (DominatorTree::Delete, Pred, BB));
735	}
736	}
737	// Cannot modify the predecessors inside the above loop as it will cause
738	// the iterators to be nullptrs, causing memory errors.
739	for (Instruction *CurrentBranch : WorkList) {
740	BasicBlock *Succ = CurrentBranch->getSuccessor(Idx: `0`);
741	if (Succ == BB)
742	Succ = CurrentBranch->getSuccessor(Idx: `1`);
743	ReplaceInstWithInst(From: CurrentBranch, To: UncondBrInst::Create(IfTrue: Succ));
744	}
745
746	DTU.applyUpdates(Updates: TreeUpdates);
747	DTU.flush();
748	}
749	LLVM_DEBUG(
750	dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
751	<< " iterations from the first loop.\n"
752	"Both Loops have the same number of iterations now.\n");
753	}
754
755	/// Walk each set of strictly adjacent fusion candidates and attempt to fuse
756	/// them. This does a single linear traversal of all candidates in the list.
757	/// The conditions for legal fusion are checked at this point. If a pair of
758	/// fusion candidates passes all legality checks, they are fused together and
759	/// a new fusion candidate is created and added to the FusionCandidateList.
760	/// The original fusion candidates are then removed, as they are no longer
761	/// valid.
762	bool fuseCandidates() {
763	bool Fused = false;
764	LLVM_DEBUG(printFusionCandidates(FusionCandidates));
765	for (auto &CandidateList : FusionCandidates) {
766	if (CandidateList.size() < `2`)
767	continue;
768
769	LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
770	<< CandidateList << "\n");
771
772	for (auto It = CandidateList.begin(), NextIt = std::next(x: It);
773	NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(x: It)) {
774
775	auto FC0 = *It;
776	auto FC1 = *NextIt;
777
778	assert(!LDT.isRemovedLoop(FC0.L) &&
779	"Should not have removed loops in CandidateList!");
780	assert(!LDT.isRemovedLoop(FC1.L) &&
781	"Should not have removed loops in CandidateList!");
782
783	LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
784	dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
785
786	FC0.verify();
787	FC1.verify();
788
789	// Check if the candidates have identical tripcounts (first value of
790	// pair), and if not check the difference in the tripcounts between
791	// the loops (second value of pair). The difference is not equal to
792	// std::nullopt iff the loops iterate a constant number of times, and
793	// have a single exit.
794	std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
795	haveIdenticalTripCounts(FC0, FC1);
796	bool SameTripCount = IdenticalTripCountRes.first;
797	std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
798
799	// Here we are checking that FC0 (the first loop) can be peeled, and
800	// both loops have different tripcounts.
801	if (FC0.AbleToPeel && !SameTripCount && TCDifference) {
802	if (*TCDifference > FusionPeelMaxCount) {
803	LLVM_DEBUG(dbgs()
804	<< "Difference in loop trip counts: " << *TCDifference
805	<< " is greater than maximum peel count specificed: "
806	<< FusionPeelMaxCount << "\n");
807	} else {
808	// Dependent on peeling being performed on the first loop, and
809	// assuming all other conditions for fusion return true.
810	SameTripCount = true;
811	}
812	}
813
814	if (!SameTripCount) {
815	LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
816	"counts. Not fusing.\n");
817	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
818	Stat&: NonEqualTripCount);
819	continue;
820	}
821
822	if ((!FC0.GuardBranch && FC1.GuardBranch) \|\|
823	(FC0.GuardBranch && !FC1.GuardBranch)) {
824	LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
825	"another one is not. Not fusing.\n");
826	reportLoopFusion<OptimizationRemarkMissed>(
827	FC0, FC1, Stat&: OnlySecondCandidateIsGuarded);
828	continue;
829	}
830
831	// Ensure that FC0 and FC1 have identical guards.
832	// If one (or both) are not guarded, this check is not necessary.
833	if (FC0.GuardBranch && FC1.GuardBranch &&
834	!haveIdenticalGuards(FC0, FC1) && !TCDifference) {
835	LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
836	"guards. Not Fusing.\n");
837	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
838	Stat&: NonIdenticalGuards);
839	continue;
840	}
841
842	if (FC0.GuardBranch) {
843	assert(FC1.GuardBranch && "Expecting valid FC1 guard branch");
844
845	if (!isSafeToMoveBefore(BB&: *FC0.ExitBlock,
846	InsertPoint&: *FC1.ExitBlock->getFirstNonPHIOrDbg(), DT,
847	PDT: &PDT, DI: &DI)) {
848	LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
849	"instructions in exit block. Not fusing.\n");
850	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
851	Stat&: NonEmptyExitBlock);
852	continue;
853	}
854
855	if (!isSafeToMoveBefore(
856	BB&: *FC1.GuardBranch->getParent(),
857	InsertPoint&: *FC0.GuardBranch->getParent()->getTerminator(), DT, PDT: &PDT,
858	DI: &DI)) {
859	LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
860	"instructions in guard block. Not fusing.\n");
861	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
862	Stat&: NonEmptyGuardBlock);
863	continue;
864	}
865	}
866
867	// Check the dependencies across the loops and do not fuse if it would
868	// violate them.
869	if (!dependencesAllowFusion(FC0, FC1)) {
870	LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
871	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
872	Stat&: InvalidDependencies);
873	continue;
874	}
875
876	// If the second loop has instructions in the pre-header, attempt to
877	// hoist them up to the first loop's pre-header or sink them into the
878	// body of the second loop.
879	SmallVector<Instruction *, `4`> SafeToHoist;
880	SmallVector<Instruction *, `4`> SafeToSink;
881	// At this point, this is the last remaining legality check.
882	// Which means if we can make this pre-header empty, we can fuse
883	// these loops
884	if (!isEmptyPreheader(FC: FC1)) {
885	LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
886	"preheader.\n");
887
888	// If it is not safe to hoist/sink all instructions in the
889	// pre-header, we cannot fuse these loops.
890	if (!collectMovablePreheaderInsts(FC0, FC1, SafeToHoist,
891	SafeToSink)) {
892	LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
893	"Fusion Candidate Pre-header.\n"
894	<< "Not Fusing.\n");
895	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
896	Stat&: NonEmptyPreheader);
897	continue;
898	}
899	}
900
901	bool BeneficialToFuse = isBeneficialFusion(FC0, FC1);
902	LLVM_DEBUG(dbgs() << "\tFusion appears to be "
903	<< (BeneficialToFuse ? "" : "un") << "profitable!\n");
904	if (!BeneficialToFuse) {
905	reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
906	Stat&: FusionNotBeneficial);
907	continue;
908	}
909	// All analysis has completed and has determined that fusion is legal
910	// and profitable. At this point, start transforming the code and
911	// perform fusion.
912
913	// Execute the hoist/sink operations on preheader instructions
914	movePreheaderInsts(FC0, FC1, HoistInsts&: SafeToHoist, SinkInsts&: SafeToSink);
915
916	LLVM_DEBUG(dbgs() << "\tFusion is performed: " << FC0 << " and " << FC1
917	<< "\n");
918
919	FusionCandidate FC0Copy = FC0;
920	// Peel the loop after determining that fusion is legal. The Loops
921	// will still be safe to fuse after the peeling is performed.
922	bool Peel = TCDifference && *TCDifference > `0`;
923	if (Peel)
924	peelFusionCandidate(FC0&: FC0Copy, FC1, PeelCount: *TCDifference);
925
926	// Report fusion to the Optimization Remarks.
927	// Note this needs to be done before* performFusion because*
928	// performFusion will change the original loops, making it not
929	// possible to identify them after fusion is complete.
930	reportLoopFusion<OptimizationRemark>(FC0: (Peel ? FC0Copy : FC0), FC1,
931	Stat&: FuseCounter);
932
933	FusionCandidate FusedCand(performFusion(FC0: (Peel ? FC0Copy : FC0), FC1),
934	DT, &PDT, ORE, FC0Copy.PP);
935	FusedCand.verify();
936	assert(FusedCand.isEligibleForFusion(SE) &&
937	"Fused candidate should be eligible for fusion!");
938
939	// Notify the loop-depth-tree that these loops are not valid objects
940	LDT.removeLoop(L: FC1.L);
941
942	// Replace FC0 and FC1 with their fused loop
943	It = CandidateList.erase(position: It);
944	It = CandidateList.erase(position: It);
945	It = CandidateList.insert(position: It, x: FusedCand);
946
947	// Start from FusedCand in the next iteration
948	NextIt = It;
949
950	LLVM_DEBUG(dbgs() << "Candidate List (after fusion): " << CandidateList
951	<< "\n");
952
953	Fused = true;
954	}
955	}
956	return Fused;
957	}
958
959	// Returns true if the instruction \p I can be hoisted to the end of the
960	// preheader of \p FC0. \p SafeToHoist contains the instructions that are
961	// known to be safe to hoist. The instructions encountered that cannot be
962	// hoisted are in \p NotHoisting.
963	// TODO: Move functionality into CodeMoverUtils
964	bool canHoistInst(Instruction &I,
965	const SmallVector<Instruction *, `4`> &SafeToHoist,
966	const SmallVector<Instruction *, `4`> &NotHoisting,
967	const FusionCandidate &FC0) const {
968	const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor();
969	assert(FC0PreheaderTarget &&
970	"Expected single successor for loop preheader.");
971
972	for (Use &Op : I.operands()) {
973	if (auto *OpInst = dyn_cast<Instruction>(Val&: Op)) {
974	bool OpHoisted = is_contained(Range: SafeToHoist, Element: OpInst);
975	// Check if we have already decided to hoist this operand. In this
976	// case, it does not dominate FC0 yet, but will after we hoist it.
977	if (!(OpHoisted \|\| DT.dominates(Def: OpInst, BB: FC0PreheaderTarget))) {
978	return false;
979	}
980	}
981	}
982
983	// PHIs in FC1's header only have FC0 blocks as predecessors. PHIs
984	// cannot be hoisted and should be sunk to the exit of the fused loop.
985	if (isa<PHINode>(Val: I))
986	return false;
987
988	// If this isn't a memory inst, hoisting is safe
989	if (!I.mayReadOrWriteMemory())
990	return true;
991
992	LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
993	for (Instruction *NotHoistedInst : NotHoisting) {
994	if (auto D = DI.depends(Src: &I, Dst: NotHoistedInst)) {
995	// Dependency is not read-before-write, write-before-read or
996	// write-before-write
997	if (D ->isFlow() \|\| D ->isAnti() \|\| D ->isOutput()) {
998	LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
999	"preheader that is not being hoisted.\n");
1000	return false;
1001	}
1002	}
1003	}
1004
1005	for (Instruction *ReadInst : FC0.MemReads) {
1006	if (auto D = DI.depends(Src: ReadInst, Dst: &I)) {
1007	// Dependency is not read-before-write
1008	if (D ->isAnti()) {
1009	LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
1010	return false;
1011	}
1012	}
1013	}
1014
1015	for (Instruction *WriteInst : FC0.MemWrites) {
1016	if (auto D = DI.depends(Src: WriteInst, Dst: &I)) {
1017	// Dependency is not write-before-read or write-before-write
1018	if (D ->isFlow() \|\| D ->isOutput()) {
1019	LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
1020	return false;
1021	}
1022	}
1023	}
1024	return true;
1025	}
1026
1027	// Returns true if the instruction \p I can be sunk to the top of the exit
1028	// block of \p FC1.
1029	// TODO: Move functionality into CodeMoverUtils
1030	bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
1031	for (User *U : I.users()) {
1032	if (auto *UI{dyn_cast<Instruction>(Val: U)}) {
1033	// Cannot sink if user in loop
1034	// If FC1 has phi users of this value, we cannot sink it into FC1.
1035	if (FC1.L->contains(Inst: UI)) {
1036	// Cannot hoist or sink this instruction. No hoisting/sinking
1037	// should take place, loops should not fuse
1038	return false;
1039	}
1040	}
1041	}
1042
1043	// If this isn't a memory inst, sinking is safe
1044	if (!I.mayReadOrWriteMemory())
1045	return true;
1046
1047	for (Instruction *ReadInst : FC1.MemReads) {
1048	if (auto D = DI.depends(Src: &I, Dst: ReadInst)) {
1049	// Dependency is not write-before-read
1050	if (D ->isFlow()) {
1051	LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
1052	return false;
1053	}
1054	}
1055	}
1056
1057	for (Instruction *WriteInst : FC1.MemWrites) {
1058	if (auto D = DI.depends(Src: &I, Dst: WriteInst)) {
1059	// Dependency is not write-before-write or read-before-write
1060	if (D ->isOutput() \|\| D ->isAnti()) {
1061	LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
1062	return false;
1063	}
1064	}
1065	}
1066
1067	return true;
1068	}
1069
1070	/// Collect instructions in the \p FC1 Preheader that can be hoisted
1071	/// to the \p FC0 Preheader or sunk into the \p FC1 Body
1072	bool collectMovablePreheaderInsts(
1073	const FusionCandidate &FC0, const FusionCandidate &FC1,
1074	SmallVector<Instruction *, `4`> &SafeToHoist,
1075	SmallVector<Instruction , `4`> &SafeToSink) const* {
1076	BasicBlock *FC1Preheader = FC1.Preheader;
1077	// Save the instructions that are not being hoisted, so we know not to hoist
1078	// mem insts that they dominate.
1079	SmallVector<Instruction *, `4`> NotHoisting;
1080
1081	for (Instruction &I : *FC1Preheader) {
1082	// Can't move a branch
1083	if (&I == FC1Preheader->getTerminator())
1084	continue;
1085	// If the instruction has side-effects, give up.
1086	// TODO: The case of mayReadFromMemory we can handle but requires
1087	// additional work with a dependence analysis so for now we give
1088	// up on memory reads.
1089	if (I.mayThrow() \|\| !I.willReturn()) {
1090	LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
1091	return false;
1092	}
1093
1094	LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
1095
1096	if (I.isAtomic() \|\| I.isVolatile()) {
1097	LLVM_DEBUG(
1098	dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
1099	return false;
1100	}
1101
1102	if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
1103	SafeToHoist.push_back(Elt: &I);
1104	LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
1105	} else {
1106	LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
1107	NotHoisting.push_back(Elt: &I);
1108
1109	if (canSinkInst(I, FC1)) {
1110	SafeToSink.push_back(Elt: &I);
1111	LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
1112	} else {
1113	LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
1114	return false;
1115	}
1116	}
1117	}
1118	LLVM_DEBUG(
1119	dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1120	return true;
1121	}
1122
1123	/// Rewrite all additive recurrences in a SCEV to use a new loop.
1124	class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
1125	public:
1126	AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
1127	bool UseMax = true)
1128	: SCEVRewriteVisitor (SE), Valid(true), UseMax(UseMax), OldL(OldL),
1129	NewL(NewL) {}
1130
1131	const SCEV visitAddRecExpr(const* SCEVAddRecExpr *Expr) {
1132	const Loop *ExprL = Expr->getLoop();
1133	SmallVector<SCEVUse, `2`> Operands;
1134	if (ExprL == &OldL) {
1135	append_range(C&: Operands, R: Expr->operands());
1136	return SE.getAddRecExpr(Operands, L: &NewL, Flags: Expr->getNoWrapFlags());
1137	}
1138
1139	if (OldL.contains(L: ExprL)) {
1140	bool Pos = SE.isKnownPositive(S: Expr->getStepRecurrence(SE));
1141	if (!UseMax \|\| !Pos \|\| !Expr->isAffine()) {
1142	Valid = false;
1143	return Expr;
1144	}
1145	return visit(S: Expr->getStart());
1146	}
1147
1148	for (SCEVUse Op : Expr->operands())
1149	Operands.push_back(Elt: visit(S: Op));
1150	return SE.getAddRecExpr(Operands, L: ExprL, Flags: Expr->getNoWrapFlags());
1151	}
1152
1153	bool wasValidSCEV() const { return Valid; }
1154
1155	private:
1156	bool Valid, UseMax;
1157	const Loop &OldL, &NewL;
1158	};
1159
1160	/// Return false if the access functions of \p I0 and \p I1 could cause
1161	/// a negative dependence.
1162	bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
1163	Instruction &I1, bool EqualIsInvalid) {
1164	Value *Ptr0 = getLoadStorePointerOperand(V: &I0);
1165	Value *Ptr1 = getLoadStorePointerOperand(V: &I1);
1166	if (!Ptr0 \|\| !Ptr1)
1167	return false;
1168
1169	const SCEV *SCEVPtr0 = SE.getSCEVAtScope(V: Ptr0, L: &L0);
1170	const SCEV *SCEVPtr1 = SE.getSCEVAtScope(V: Ptr1, L: &L1);
1171	#ifndef NDEBUG
1172	if (VerboseFusionDebugging)
1173	LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
1174	<< *SCEVPtr1 << "\n");
1175	#endif
1176	AddRecLoopReplacer Rewriter(SE, L0, L1);
1177	SCEVPtr0 = Rewriter.visit(S: SCEVPtr0);
1178	#ifndef NDEBUG
1179	if (VerboseFusionDebugging)
1180	LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
1181	<< " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
1182	#endif
1183	if (!Rewriter.wasValidSCEV())
1184	return false;
1185
1186	// TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
1187	// L0) and the other is not. We could check if it is monotone and test
1188	// the beginning and end value instead.
1189
1190	BasicBlock *L0Header = L0.getHeader();
1191	auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
1192	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: S);
1193	if (!AddRec)
1194	return false;
1195	return !DT.dominates(A: L0Header, B: AddRec->getLoop()->getHeader()) &&
1196	!DT.dominates(A: AddRec->getLoop()->getHeader(), B: L0Header);
1197	};
1198	if (SCEVExprContains(Root: SCEVPtr1, Pred: HasNonLinearDominanceRelation))
1199	return false;
1200
1201	ICmpInst::Predicate Pred =
1202	EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
1203	bool IsAlwaysGE = SE.isKnownPredicate(Pred, LHS: SCEVPtr0, RHS: SCEVPtr1);
1204	#ifndef NDEBUG
1205	if (VerboseFusionDebugging)
1206	LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0
1207	<< (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
1208	<< "\n");
1209	#endif
1210	return IsAlwaysGE;
1211	}
1212
1213	/// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
1214	/// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
1215	/// specified by @p DepChoice are used to determine this.
1216	bool dependencesAllowFusion(const FusionCandidate &FC0,
1217	const FusionCandidate &FC1, Instruction &I0,
1218	Instruction &I1, bool AnyDep,
1219	FusionDependenceAnalysisChoice DepChoice) {
1220	#ifndef NDEBUG
1221	if (VerboseFusionDebugging) {
1222	LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
1223	<< DepChoice << "\n");
1224	}
1225	#endif
1226	switch (DepChoice) {
1227	case FUSION_DEPENDENCE_ANALYSIS_SCEV:
1228	return accessDiffIsPositive(L0: FC0.L, L1: FC1.L, I0, I1, EqualIsInvalid: AnyDep);
1229	case FUSION_DEPENDENCE_ANALYSIS_DA: {
1230	auto DepResult = DI.depends(Src: &I0, Dst: &I1);
1231	if (!DepResult)
1232	return true;
1233	#ifndef NDEBUG
1234	if (VerboseFusionDebugging) {
1235	LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
1236	dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
1237	<< (DepResult->isOrdered() ? "true" : "false")
1238	<< "]\n");
1239	LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
1240	<< "\n");
1241	}
1242	#endif
1243	unsigned Levels = DepResult ->getLevels();
1244	unsigned SameSDLevels = DepResult ->getSameSDLevels();
1245	unsigned CurLoopLevel = FC0.L->getLoopDepth();
1246
1247	// Check if DA is missing info regarding the current loop level
1248	if (CurLoopLevel > Levels + SameSDLevels)
1249	return false;
1250
1251	// Iterating over the outer levels.
1252	for (unsigned Level = `1`; Level <= std::min(a: CurLoopLevel - `1`, b: Levels);
1253	++Level) {
1254	unsigned Direction = DepResult ->getDirection(Level, SameSD: false);
1255
1256	// Check if the direction vector does not include equality. If an outer
1257	// loop has a non-equal direction, outer indicies are different and it
1258	// is safe to fuse.
1259	if (!(Direction & Dependence::DVEntry::EQ)) {
1260	LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "
1261	"outer loops\n");
1262	NumDA ++;
1263	return true;
1264	}
1265	}
1266
1267	assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
1268
1269	if (DepResult ->isScalar(Level: CurLoopLevel, SameSD: true) && !DepResult ->isAnti()) {
1270	LLVM_DEBUG(dbgs() << "Safe to fuse due to a loop-invariant non-anti "
1271	"dependency\n");
1272	NumDA ++;
1273	return true;
1274	}
1275
1276	unsigned CurDir = DepResult ->getDirection(Level: CurLoopLevel, SameSD: true);
1277
1278	// Check if the direction vector does not include greater direction. In
1279	// that case, the dependency is not a backward loop-carried and is legal
1280	// to fuse. For example here we have a forward dependency
1281	// for (int i = 0; i < n; i++)
1282	// A[i] = ...;
1283	// for (int i = 0; i < n; i++)
1284	// ... = A[i-1];
1285	if (!(CurDir & Dependence::DVEntry::GT)) {
1286	LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "
1287	"dependency\n");
1288	NumDA ++;
1289	return true;
1290	}
1291
1292	if (DepResult ->getNextPredecessor() \|\| DepResult ->getNextSuccessor())
1293	LLVM_DEBUG(
1294	dbgs() << "TODO: Implement pred/succ dependence handling!\n");
1295
1296	return false;
1297	}
1298
1299	case FUSION_DEPENDENCE_ANALYSIS_ALL:
1300	return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1301	DepChoice: FUSION_DEPENDENCE_ANALYSIS_SCEV) \|\|
1302	dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
1303	DepChoice: FUSION_DEPENDENCE_ANALYSIS_DA);
1304	}
1305
1306	llvm_unreachable("Unknown fusion dependence analysis choice!");
1307	}
1308
1309	/// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
1310	bool dependencesAllowFusion(const FusionCandidate &FC0,
1311	const FusionCandidate &FC1) {
1312	LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
1313	<< "\n");
1314	assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
1315	assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
1316
1317	for (Instruction *WriteL0 : FC0.MemWrites) {
1318	for (Instruction *WriteL1 : FC1.MemWrites)
1319	if (!dependencesAllowFusion(FC0, FC1, I0&: WriteL0, I1&: WriteL1,
1320	/ AnyDep / false,
1321	DepChoice: FusionDependenceAnalysis)) {
1322	InvalidDependencies ++;
1323	return false;
1324	}
1325	for (Instruction *ReadL1 : FC1.MemReads)
1326	if (!dependencesAllowFusion(FC0, FC1, I0&: WriteL0, I1&: ReadL1,
1327	/ AnyDep / false,
1328	DepChoice: FusionDependenceAnalysis)) {
1329	InvalidDependencies ++;
1330	return false;
1331	}
1332	}
1333
1334	for (Instruction *WriteL1 : FC1.MemWrites) {
1335	for (Instruction *WriteL0 : FC0.MemWrites)
1336	if (!dependencesAllowFusion(FC0, FC1, I0&: WriteL0, I1&: WriteL1,
1337	/ AnyDep / false,
1338	DepChoice: FusionDependenceAnalysis)) {
1339	InvalidDependencies ++;
1340	return false;
1341	}
1342	for (Instruction *ReadL0 : FC0.MemReads)
1343	if (!dependencesAllowFusion(FC0, FC1, I0&: ReadL0, I1&: WriteL1,
1344	/ AnyDep / false,
1345	DepChoice: FusionDependenceAnalysis)) {
1346	InvalidDependencies ++;
1347	return false;
1348	}
1349	}
1350
1351	// Walk through all uses in FC1. For each use, find the reaching def. If the
1352	// def is located in FC0 then it is not safe to fuse.
1353	for (BasicBlock *BB : FC1.L->blocks())
1354	for (Instruction &I : *BB)
1355	for (auto &Op : I.operands())
1356	if (Instruction *Def = dyn_cast<Instruction>(Val&: Op))
1357	if (FC0.L->contains(BB: Def->getParent())) {
1358	InvalidDependencies ++;
1359	return false;
1360	}
1361
1362	return true;
1363	}
1364
1365	/// Determine if two fusion candidates are strictly adjacent in the CFG.
1366	///
1367	/// This method will determine if there are additional basic blocks in the CFG
1368	/// between the exit of \p FC0 and the entry of \p FC1.
1369	/// If the two candidates are guarded loops, then it checks whether the
1370	/// exit block of the \p FC0 is the predecessor of the \p FC1 preheader. This
1371	/// implicitly ensures that the non-loop successor of the \p FC0 guard branch
1372	/// is the entry block of \p FC1. If not, then the loops are not adjacent. If
1373	/// the two candidates are not guarded loops, then it checks whether the exit
1374	/// block of \p FC0 is the preheader of \p FC1.
1375	/// Strictly means there is no predecessor for FC1 unless it is from FC0,
1376	/// i.e., FC0 dominates FC1.
1377	bool isStrictlyAdjacent(const FusionCandidate &FC0,
1378	const FusionCandidate &FC1) const {
1379	// If the successor of the guard branch is FC1, then the loops are adjacent
1380	if (FC0.GuardBranch)
1381	return DT.dominates(A: FC0.getEntryBlock(), B: FC1.getEntryBlock()) &&
1382	FC0.ExitBlock->getSingleSuccessor() == FC1.getEntryBlock();
1383	return FC0.ExitBlock == FC1.getEntryBlock();
1384	}
1385
1386	bool isEmptyPreheader(const FusionCandidate &FC) const {
1387	return FC.Preheader->size() == `1`;
1388	}
1389
1390	/// Hoist \p FC1 Preheader instructions to \p FC0 Preheader
1391	/// and sink others into the body of \p FC1.
1392	void movePreheaderInsts(const FusionCandidate &FC0,
1393	const FusionCandidate &FC1,
1394	SmallVector<Instruction *, `4`> &HoistInsts,
1395	SmallVector<Instruction , `4`> &SinkInsts) const* {
1396	// All preheader instructions except the branch must be hoisted or sunk
1397	assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - `1` &&
1398	"Attempting to sink and hoist preheader instructions, but not all "
1399	"the preheader instructions are accounted for.");
1400
1401	NumHoistedInsts += HoistInsts.size();
1402	NumSunkInsts += SinkInsts.size();
1403
1404	LLVM_DEBUG(if (VerboseFusionDebugging) {
1405	if (!HoistInsts.empty())
1406	dbgs() << "Hoisting: \n";
1407	for (Instruction *I : HoistInsts)
1408	dbgs() << *I << "\n";
1409	if (!SinkInsts.empty())
1410	dbgs() << "Sinking: \n";
1411	for (Instruction *I : SinkInsts)
1412	dbgs() << *I << "\n";
1413	});
1414
1415	for (Instruction *I : HoistInsts) {
1416	assert(I->getParent() == FC1.Preheader);
1417	I->moveBefore(BB&: *FC0.Preheader,
1418	I: FC0.Preheader->getTerminator()->getIterator());
1419	}
1420	// insert instructions in reverse order to maintain dominance relationship
1421	for (Instruction *I : reverse(C&: SinkInsts)) {
1422	assert(I->getParent() == FC1.Preheader);
1423	if (isa<PHINode>(Val: I)) {
1424	// The Phis to be sunk should have only one incoming value, as is
1425	// assured by the condition that the second loop is dominated by the
1426	// first one which is enforced by isStrictlyAdjacent().
1427	// Replace the phi uses with the corresponding incoming value to clean
1428	// up the code.
1429	assert(cast<PHINode>(I)->getNumIncomingValues() == `1` &&
1430	"Expected the sunk PHI node to have 1 incoming value.");
1431	I->replaceAllUsesWith(V: I->getOperand(i: `0`));
1432	I->eraseFromParent();
1433	} else
1434	I->moveBefore(BB&: *FC1.ExitBlock, I: FC1.ExitBlock->getFirstInsertionPt());
1435	}
1436	}
1437
1438	/// Determine if two fusion candidates have identical guards
1439	///
1440	/// This method will determine if two fusion candidates have the same guards.
1441	/// The guards are considered the same if:
1442	/// 1. The instructions to compute the condition used in the compare are
1443	/// identical.
1444	/// 2. The successors of the guard have the same flow into/around the loop.
1445	/// If the compare instructions are identical, then the first successor of the
1446	/// guard must go to the same place (either the preheader of the loop or the
1447	/// NonLoopBlock). In other words, the first successor of both loops must
1448	/// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
1449	/// the NonLoopBlock). The same must be true for the second successor.
1450	bool haveIdenticalGuards(const FusionCandidate &FC0,
1451	const FusionCandidate &FC1) const {
1452	assert(FC0.GuardBranch && FC1.GuardBranch &&
1453	"Expecting FC0 and FC1 to be guarded loops.");
1454
1455	if (auto FC0CmpInst =
1456	dyn_cast<Instruction>(Val: FC0.GuardBranch->getCondition()))
1457	if (auto FC1CmpInst =
1458	dyn_cast<Instruction>(Val: FC1.GuardBranch->getCondition()))
1459	if (!FC0CmpInst->isIdenticalTo(I: FC1CmpInst))
1460	return false;
1461
1462	// The compare instructions are identical.
1463	// Now make sure the successor of the guards have the same flow into/around
1464	// the loop
1465	if (FC0.GuardBranch->getSuccessor(i: `0`) == FC0.Preheader)
1466	return (FC1.GuardBranch->getSuccessor(i: `0`) == FC1.Preheader);
1467	else
1468	return (FC1.GuardBranch->getSuccessor(i: `1`) == FC1.Preheader);
1469	}
1470
1471	/// Modify the latch branch of FC to be unconditional since successors of the
1472	/// branch are the same.
1473	void simplifyLatchBranch(const FusionCandidate &FC) const {
1474	CondBrInst *FCLatchBranch = dyn_cast<CondBrInst>(Val: FC.Latch->getTerminator());
1475	if (FCLatchBranch) {
1476	assert(FCLatchBranch->getSuccessor(`0`) == FCLatchBranch->getSuccessor(`1`) &&
1477	"Expecting the two successors of FCLatchBranch to be the same");
1478	UncondBrInst *NewBranch =
1479	UncondBrInst::Create(IfTrue: FCLatchBranch->getSuccessor(i: `0`));
1480	ReplaceInstWithInst(From: FCLatchBranch, To: NewBranch);
1481	}
1482	}
1483
1484	/// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
1485	/// successor, then merge FC0.Latch with its unique successor.
1486	void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1487	moveInstructionsToTheBeginning(FromBB&: FC0.Latch, ToBB&: FC1.Latch, DT, PDT, DI);
1488	if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
1489	MergeBlockIntoPredecessor(BB: Succ, DTU: &DTU, LI: &LI);
1490	DTU.flush();
1491	}
1492	}
1493
1494	/// Fuse two fusion candidates, creating a new fused loop.
1495	///
1496	/// This method contains the mechanics of fusing two loops, represented by \p
1497	/// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
1498	/// postdominates \p FC0 (making them control flow equivalent). It also
1499	/// assumes that the other conditions for fusion have been met: adjacent,
1500	/// identical trip counts, and no negative distance dependencies exist that
1501	/// would prevent fusion. Thus, there is no checking for these conditions in
1502	/// this method.
1503	///
1504	/// Fusion is performed by rewiring the CFG to update successor blocks of the
1505	/// components of tho loop. Specifically, the following changes are done:
1506	///
1507	/// 1. The preheader of \p FC1 is removed as it is no longer necessary
1508	/// (because it is currently only a single statement block).
1509	/// 2. The latch of \p FC0 is modified to jump to the header of \p FC1.
1510	/// 3. The latch of \p FC1 i modified to jump to the header of \p FC0.
1511	/// 4. All blocks from \p FC1 are removed from FC1 and added to FC0.
1512	///
1513	/// All of these modifications are done with dominator tree updates, thus
1514	/// keeping the dominator (and post dominator) information up-to-date.
1515	///
1516	/// This can be improved in the future by actually merging blocks during
1517	/// fusion. For example, the preheader of \p FC1 can be merged with the
1518	/// preheader of \p FC0. This would allow loops with more than a single
1519	/// statement in the preheader to be fused. Similarly, the latch blocks of the
1520	/// two loops could also be fused into a single block. This will require
1521	/// analysis to prove it is safe to move the contents of the block past
1522	/// existing code, which currently has not been implemented.
1523	Loop performFusion(const* FusionCandidate &FC0, const FusionCandidate &FC1) {
1524	assert(FC0.isValid() && FC1.isValid() &&
1525	"Expecting valid fusion candidates");
1526
1527	LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
1528	dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
1529
1530	// Move instructions from the preheader of FC1 to the end of the preheader
1531	// of FC0.
1532	moveInstructionsToTheEnd(FromBB&: FC1.Preheader, ToBB&: FC0.Preheader, DT, PDT, DI);
1533
1534	// Fusing guarded loops is handled slightly differently than non-guarded
1535	// loops and has been broken out into a separate method instead of trying to
1536	// intersperse the logic within a single method.
1537	if (FC0.GuardBranch)
1538	return fuseGuardedLoops(FC0, FC1);
1539
1540	assert(FC1.Preheader ==
1541	(FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
1542	assert(FC1.Preheader->size() == `1` &&
1543	FC1.Preheader->getSingleSuccessor() == FC1.Header);
1544
1545	// Remember the phi nodes originally in the header of FC0 in order to rewire
1546	// them later. However, this is only necessary if the new loop carried
1547	// values might not dominate the exiting branch. While we do not generally
1548	// test if this is the case but simply insert intermediate phi nodes, we
1549	// need to make sure these intermediate phi nodes have different
1550	// predecessors. To this end, we filter the special case where the exiting
1551	// block is the latch block of the first loop. Nothing needs to be done
1552	// anyway as all loop carried values dominate the latch and thereby also the
1553	// exiting branch.
1554	SmallVector<PHINode *, `8`> OriginalFC0PHIs;
1555	if (FC0.ExitingBlock != FC0.Latch)
1556	for (PHINode &PHI : FC0.Header->phis())
1557	OriginalFC0PHIs.push_back(Elt: &PHI);
1558
1559	// Replace incoming blocks for header PHIs first.
1560	FC1.Preheader->replaceSuccessorsPhiUsesWith(New: FC0.Preheader);
1561	FC0.Latch->replaceSuccessorsPhiUsesWith(New: FC1.Latch);
1562
1563	// Then modify the control flow and update DT and PDT.
1564	SmallVector<DominatorTree::UpdateType, `8`> TreeUpdates;
1565
1566	// The old exiting block of the first loop (FC0) has to jump to the header
1567	// of the second as we need to execute the code in the second header block
1568	// regardless of the trip count. That is, if the trip count is 0, so the
1569	// back edge is never taken, we still have to execute both loop headers,
1570	// especially (but not only!) if the second is a do-while style loop.
1571	// However, doing so might invalidate the phi nodes of the first loop as
1572	// the new values do only need to dominate their latch and not the exiting
1573	// predicate. To remedy this potential problem we always introduce phi
1574	// nodes in the header of the second loop later that select the loop carried
1575	// value, if the second header was reached through an old latch of the
1576	// first, or undef otherwise. This is sound as exiting the first implies the
1577	// second will exit too, __without__ taking the back-edge. [Their
1578	// trip-counts are equal after all.
1579	// KB: Would this sequence be simpler to just make FC0.ExitingBlock go
1580	// to FC1.Header? I think this is basically what the three sequences are
1581	// trying to accomplish; however, doing this directly in the CFG may mean
1582	// the DT/PDT becomes invalid
1583	if (!FC0.Peeled) {
1584	FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(From: FC1.Preheader,
1585	To: FC1.Header);
1586	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1587	DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
1588	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1589	DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1590	} else {
1591	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1592	DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
1593
1594	// Remove the ExitBlock of the first Loop (also not needed)
1595	FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(From: FC0.ExitBlock,
1596	To: FC1.Header);
1597	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1598	DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1599	FC0.ExitBlock->getTerminator()->eraseFromParent();
1600	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1601	DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1602	new UnreachableInst (FC0.ExitBlock->getContext(), FC0.ExitBlock);
1603	}
1604
1605	// The pre-header of L1 is not necessary anymore.
1606	assert(pred_empty(FC1.Preheader));
1607	FC1.Preheader->getTerminator()->eraseFromParent();
1608	new UnreachableInst (FC1.Preheader->getContext(), FC1.Preheader);
1609	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1610	DominatorTree::Delete, FC1.Preheader, FC1.Header));
1611
1612	// Moves the phi nodes from the second to the first loops header block.
1613	while (PHINode *PHI = dyn_cast<PHINode>(Val: &FC1.Header->front())) {
1614	if (SE.isSCEVable(Ty: PHI->getType()))
1615	SE.forgetValue(V: PHI);
1616	if (PHI->hasNUsesOrMore(N: `1`))
1617	PHI->moveBefore(InsertPos: FC0.Header->getFirstInsertionPt());
1618	else
1619	PHI->eraseFromParent();
1620	}
1621
1622	// Introduce new phi nodes in the second loop header to ensure
1623	// exiting the first and jumping to the header of the second does not break
1624	// the SSA property of the phis originally in the first loop. See also the
1625	// comment above.
1626	BasicBlock::iterator L1HeaderIP = FC1.Header->begin();
1627	for (PHINode *LCPHI : OriginalFC0PHIs) {
1628	int L1LatchBBIdx = LCPHI->getBasicBlockIndex(BB: FC1.Latch);
1629	assert(L1LatchBBIdx >= `0` &&
1630	"Expected loop carried value to be rewired at this point!");
1631
1632	Value *LCV = LCPHI->getIncomingValue(i: L1LatchBBIdx);
1633
1634	PHINode *L1HeaderPHI =
1635	PHINode::Create(Ty: LCV->getType(), NumReservedValues: `2`, NameStr: LCPHI->getName() + ".afterFC0");
1636	L1HeaderPHI->insertBefore(InsertPos: L1HeaderIP);
1637	L1HeaderPHI->addIncoming(V: LCV, BB: FC0.Latch);
1638	L1HeaderPHI->addIncoming(V: PoisonValue::get(T: LCV->getType()),
1639	BB: FC0.ExitingBlock);
1640
1641	LCPHI->setIncomingValue(i: L1LatchBBIdx, V: L1HeaderPHI);
1642	}
1643
1644	// Replace latch terminator destinations.
1645	FC0.Latch->getTerminator()->replaceUsesOfWith(From: FC0.Header, To: FC1.Header);
1646	FC1.Latch->getTerminator()->replaceUsesOfWith(From: FC1.Header, To: FC0.Header);
1647
1648	// Modify the latch branch of FC0 to be unconditional as both successors of
1649	// the branch are the same.
1650	simplifyLatchBranch(FC: FC0);
1651
1652	// If FC0.Latch and FC0.ExitingBlock are the same then we have already
1653	// performed the updates above.
1654	if (FC0.Latch != FC0.ExitingBlock)
1655	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1656	DominatorTree::Insert, FC0.Latch, FC1.Header));
1657
1658	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Delete,
1659	FC0.Latch, FC0.Header));
1660	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Insert,
1661	FC1.Latch, FC0.Header));
1662	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Delete,
1663	FC1.Latch, FC1.Header));
1664
1665	// Update DT/PDT
1666	DTU.applyUpdates(Updates: TreeUpdates);
1667
1668	LI.removeBlock(BB: FC1.Preheader);
1669	DTU.deleteBB(DelBB: FC1.Preheader);
1670	if (FC0.Peeled) {
1671	LI.removeBlock(BB: FC0.ExitBlock);
1672	DTU.deleteBB(DelBB: FC0.ExitBlock);
1673	}
1674
1675	DTU.flush();
1676
1677	// Is there a way to keep SE up-to-date so we don't need to forget the loops
1678	// and rebuild the information in subsequent passes of fusion?
1679	// Note: Need to forget the loops before merging the loop latches, as
1680	// mergeLatch may remove the only block in FC1.
1681	SE.forgetLoop(L: FC1.L);
1682	SE.forgetLoop(L: FC0.L);
1683
1684	// Move instructions from FC0.Latch to FC1.Latch.
1685	// Note: mergeLatch requires an updated DT.
1686	mergeLatch(FC0, FC1);
1687
1688	// Forget block dispositions as well, so that there are no dangling
1689	// pointers to erased/free'ed blocks. It should be done after mergeLatch()
1690	// since merging the latches may affect the dispositions.
1691	SE.forgetBlockAndLoopDispositions();
1692
1693	// Forget the cached SCEV values including the induction variable that may
1694	// have changed after the fusion.
1695	SE.forgetLoop(L: FC0.L);
1696
1697	// Merge the loops.
1698	SmallVector<BasicBlock *, `8`> Blocks(FC1.L->blocks());
1699	for (BasicBlock *BB : Blocks) {
1700	FC0.L->addBlockEntry(BB);
1701	FC1.L->removeBlockFromLoop(BB);
1702	if (LI.getLoopFor(BB) != FC1.L)
1703	continue;
1704	LI.changeLoopFor(BB, L: FC0.L);
1705	}
1706	while (!FC1.L->isInnermost()) {
1707	const auto &ChildLoopIt = FC1.L->begin();
1708	Loop ChildLoop = ChildLoopIt;
1709	FC1.L->removeChildLoop(I: ChildLoopIt);
1710	FC0.L->addChildLoop(NewChild: ChildLoop);
1711	}
1712
1713	// Delete the now empty loop L1.
1714	LI.erase(L: FC1.L);
1715
1716	#ifndef NDEBUG
1717	assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
1718	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1719	assert(PDT.verify());
1720	LI.verify(DT);
1721	SE.verify();
1722	#endif
1723
1724	LLVM_DEBUG(dbgs() << "Fusion done:\n");
1725
1726	return FC0.L;
1727	}
1728
1729	/// Report details on loop fusion opportunities.
1730	///
1731	/// This template function can be used to report both successful and missed
1732	/// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
1733	/// be one of:
1734	/// - OptimizationRemarkMissed to report when loop fusion is unsuccessful
1735	/// given two valid fusion candidates.
1736	/// - OptimizationRemark to report successful fusion of two fusion
1737	/// candidates.
1738	/// The remarks will be printed using the form:
1739	/// <path/filename>:<line number>:<column number>: [<function name>]:
1740	/// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
1741	template <typename RemarkKind>
1742	void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
1743	Statistic &Stat) {
1744	assert(FC0.Preheader && FC1.Preheader &&
1745	"Expecting valid fusion candidates");
1746	using namespace ore;
1747	#if LLVM_ENABLE_STATS
1748	++Stat;
1749	ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
1750	FC0.Preheader)
1751	<< "[" << FC0.Preheader->getParent()->getName()
1752	<< "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
1753	<< " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
1754	<< ": " << Stat.getDesc());
1755	#endif
1756	}
1757
1758	/// Fuse two guarded fusion candidates, creating a new fused loop.
1759	///
1760	/// Fusing guarded loops is handled much the same way as fusing non-guarded
1761	/// loops. The rewiring of the CFG is slightly different though, because of
1762	/// the presence of the guards around the loops and the exit blocks after the
1763	/// loop body. As such, the new loop is rewired as follows:
1764	/// 1. Keep the guard branch from FC0 and use the non-loop block target
1765	/// from the FC1 guard branch.
1766	/// 2. Remove the exit block from FC0 (this exit block should be empty
1767	/// right now).
1768	/// 3. Remove the guard branch for FC1
1769	/// 4. Remove the preheader for FC1.
1770	/// The exit block successor for the latch of FC0 is updated to be the header
1771	/// of FC1 and the non-exit block successor of the latch of FC1 is updated to
1772	/// be the header of FC0, thus creating the fused loop.
1773	Loop fuseGuardedLoops(const* FusionCandidate &FC0,
1774	const FusionCandidate &FC1) {
1775	assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
1776
1777	BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
1778	BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
1779	BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
1780	BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
1781	BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
1782
1783	// Move instructions from the exit block of FC0 to the beginning of the exit
1784	// block of FC1, in the case that the FC0 loop has not been peeled. In the
1785	// case that FC0 loop is peeled, then move the instructions of the successor
1786	// of the FC0 Exit block to the beginning of the exit block of FC1.
1787	moveInstructionsToTheBeginning(
1788	FromBB&: (FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock), ToBB&: *FC1.ExitBlock,
1789	DT, PDT, DI);
1790
1791	// Move instructions from the guard block of FC1 to the end of the guard
1792	// block of FC0.
1793	moveInstructionsToTheEnd(FromBB&: FC1GuardBlock, ToBB&: FC0GuardBlock, DT, PDT, DI);
1794
1795	assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
1796
1797	SmallVector<DominatorTree::UpdateType, `8`> TreeUpdates;
1798
1799	////////////////////////////////////////////////////////////////////////////
1800	// Update the Loop Guard
1801	////////////////////////////////////////////////////////////////////////////
1802	// The guard for FC0 is updated to guard both FC0 and FC1. This is done by
1803	// changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
1804	// Thus, one path from the guard goes to the preheader for FC0 (and thus
1805	// executes the new fused loop) and the other path goes to the NonLoopBlock
1806	// for FC1 (where FC1 guard would have gone if FC1 was not executed).
1807	FC1NonLoopBlock->replacePhiUsesWith(Old: FC1GuardBlock, New: FC0GuardBlock);
1808	FC0.GuardBranch->replaceUsesOfWith(From: FC0NonLoopBlock, To: FC1NonLoopBlock);
1809
1810	BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
1811	BBToUpdate->getTerminator()->replaceUsesOfWith(From: FC1GuardBlock, To: FC1.Header);
1812
1813	// The guard of FC1 is not necessary anymore.
1814	FC1.GuardBranch->eraseFromParent();
1815	new UnreachableInst (FC1GuardBlock->getContext(), FC1GuardBlock);
1816
1817	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1818	DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
1819	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1820	DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
1821	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1822	DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
1823	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1824	DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
1825
1826	if (FC0.Peeled) {
1827	// Remove the Block after the ExitBlock of FC0
1828	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1829	DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
1830	FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
1831	new UnreachableInst (FC0ExitBlockSuccessor->getContext(),
1832	FC0ExitBlockSuccessor);
1833	}
1834
1835	assert(pred_empty(FC1GuardBlock) &&
1836	"Expecting guard block to have no predecessors");
1837	assert(succ_empty(FC1GuardBlock) &&
1838	"Expecting guard block to have no successors");
1839
1840	// Remember the phi nodes originally in the header of FC0 in order to rewire
1841	// them later. However, this is only necessary if the new loop carried
1842	// values might not dominate the exiting branch. While we do not generally
1843	// test if this is the case but simply insert intermediate phi nodes, we
1844	// need to make sure these intermediate phi nodes have different
1845	// predecessors. To this end, we filter the special case where the exiting
1846	// block is the latch block of the first loop. Nothing needs to be done
1847	// anyway as all loop carried values dominate the latch and thereby also the
1848	// exiting branch.
1849	// KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
1850	// (because the loops are rotated. Thus, nothing will ever be added to
1851	// OriginalFC0PHIs.
1852	SmallVector<PHINode *, `8`> OriginalFC0PHIs;
1853	if (FC0.ExitingBlock != FC0.Latch)
1854	for (PHINode &PHI : FC0.Header->phis())
1855	OriginalFC0PHIs.push_back(Elt: &PHI);
1856
1857	assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
1858
1859	// Replace incoming blocks for header PHIs first.
1860	FC1.Preheader->replaceSuccessorsPhiUsesWith(New: FC0.Preheader);
1861	FC0.Latch->replaceSuccessorsPhiUsesWith(New: FC1.Latch);
1862
1863	// The old exiting block of the first loop (FC0) has to jump to the header
1864	// of the second as we need to execute the code in the second header block
1865	// regardless of the trip count. That is, if the trip count is 0, so the
1866	// back edge is never taken, we still have to execute both loop headers,
1867	// especially (but not only!) if the second is a do-while style loop.
1868	// However, doing so might invalidate the phi nodes of the first loop as
1869	// the new values do only need to dominate their latch and not the exiting
1870	// predicate. To remedy this potential problem we always introduce phi
1871	// nodes in the header of the second loop later that select the loop carried
1872	// value, if the second header was reached through an old latch of the
1873	// first, or undef otherwise. This is sound as exiting the first implies the
1874	// second will exit too, __without__ taking the back-edge (their
1875	// trip-counts are equal after all).
1876	FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(From: FC0.ExitBlock,
1877	To: FC1.Header);
1878
1879	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1880	DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
1881	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1882	DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1883
1884	// Remove FC0 Exit Block
1885	// The exit block for FC0 is no longer needed since control will flow
1886	// directly to the header of FC1. Since it is an empty block, it can be
1887	// removed at this point.
1888	// TODO: In the future, we can handle non-empty exit blocks my merging any
1889	// instructions from FC0 exit block into FC1 exit block prior to removing
1890	// the block.
1891	assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
1892	FC0.ExitBlock->getTerminator()->eraseFromParent();
1893	new UnreachableInst (FC0.ExitBlock->getContext(), FC0.ExitBlock);
1894
1895	// Remove FC1 Preheader
1896	// The pre-header of L1 is not necessary anymore.
1897	assert(pred_empty(FC1.Preheader));
1898	FC1.Preheader->getTerminator()->eraseFromParent();
1899	new UnreachableInst (FC1.Preheader->getContext(), FC1.Preheader);
1900	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1901	DominatorTree::Delete, FC1.Preheader, FC1.Header));
1902
1903	// Moves the phi nodes from the second to the first loops header block.
1904	while (PHINode *PHI = dyn_cast<PHINode>(Val: &FC1.Header->front())) {
1905	if (SE.isSCEVable(Ty: PHI->getType()))
1906	SE.forgetValue(V: PHI);
1907	if (PHI->hasNUsesOrMore(N: `1`))
1908	PHI->moveBefore(InsertPos: FC0.Header->getFirstInsertionPt());
1909	else
1910	PHI->eraseFromParent();
1911	}
1912
1913	// Introduce new phi nodes in the second loop header to ensure
1914	// exiting the first and jumping to the header of the second does not break
1915	// the SSA property of the phis originally in the first loop. See also the
1916	// comment above.
1917	BasicBlock::iterator L1HeaderIP = FC1.Header->begin();
1918	for (PHINode *LCPHI : OriginalFC0PHIs) {
1919	int L1LatchBBIdx = LCPHI->getBasicBlockIndex(BB: FC1.Latch);
1920	assert(L1LatchBBIdx >= `0` &&
1921	"Expected loop carried value to be rewired at this point!");
1922
1923	Value *LCV = LCPHI->getIncomingValue(i: L1LatchBBIdx);
1924
1925	PHINode *L1HeaderPHI =
1926	PHINode::Create(Ty: LCV->getType(), NumReservedValues: `2`, NameStr: LCPHI->getName() + ".afterFC0");
1927	L1HeaderPHI->insertBefore(InsertPos: L1HeaderIP);
1928	L1HeaderPHI->addIncoming(V: LCV, BB: FC0.Latch);
1929	L1HeaderPHI->addIncoming(V: PoisonValue::get(T: LCV->getType()),
1930	BB: FC0.ExitingBlock);
1931
1932	LCPHI->setIncomingValue(i: L1LatchBBIdx, V: L1HeaderPHI);
1933	}
1934
1935	// Update the latches
1936
1937	// Replace latch terminator destinations.
1938	FC0.Latch->getTerminator()->replaceUsesOfWith(From: FC0.Header, To: FC1.Header);
1939	FC1.Latch->getTerminator()->replaceUsesOfWith(From: FC1.Header, To: FC0.Header);
1940
1941	// Modify the latch branch of FC0 to be unconditional as both successors of
1942	// the branch are the same.
1943	simplifyLatchBranch(FC: FC0);
1944
1945	// If FC0.Latch and FC0.ExitingBlock are the same then we have already
1946	// performed the updates above.
1947	if (FC0.Latch != FC0.ExitingBlock)
1948	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (
1949	DominatorTree::Insert, FC0.Latch, FC1.Header));
1950
1951	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Delete,
1952	FC0.Latch, FC0.Header));
1953	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Insert,
1954	FC1.Latch, FC0.Header));
1955	TreeUpdates.emplace_back(Args: DominatorTree::UpdateType (DominatorTree::Delete,
1956	FC1.Latch, FC1.Header));
1957
1958	// All done
1959	// Apply the updates to the Dominator Tree and cleanup.
1960
1961	assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
1962	assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
1963
1964	// Update DT/PDT
1965	DTU.applyUpdates(Updates: TreeUpdates);
1966
1967	LI.removeBlock(BB: FC1GuardBlock);
1968	LI.removeBlock(BB: FC1.Preheader);
1969	LI.removeBlock(BB: FC0.ExitBlock);
1970	if (FC0.Peeled) {
1971	LI.removeBlock(BB: FC0ExitBlockSuccessor);
1972	DTU.deleteBB(DelBB: FC0ExitBlockSuccessor);
1973	}
1974	DTU.deleteBB(DelBB: FC1GuardBlock);
1975	DTU.deleteBB(DelBB: FC1.Preheader);
1976	DTU.deleteBB(DelBB: FC0.ExitBlock);
1977	DTU.flush();
1978
1979	// Is there a way to keep SE up-to-date so we don't need to forget the loops
1980	// and rebuild the information in subsequent passes of fusion?
1981	// Note: Need to forget the loops before merging the loop latches, as
1982	// mergeLatch may remove the only block in FC1.
1983	SE.forgetLoop(L: FC1.L);
1984	SE.forgetLoop(L: FC0.L);
1985
1986	// Move instructions from FC0.Latch to FC1.Latch.
1987	// Note: mergeLatch requires an updated DT.
1988	mergeLatch(FC0, FC1);
1989
1990	// Forget block dispositions as well, so that there are no dangling
1991	// pointers to erased/free'ed blocks. It should be done after mergeLatch()
1992	// since merging the latches may affect the dispositions.
1993	SE.forgetBlockAndLoopDispositions();
1994
1995	// Merge the loops.
1996	SmallVector<BasicBlock *, `8`> Blocks(FC1.L->blocks());
1997	for (BasicBlock *BB : Blocks) {
1998	FC0.L->addBlockEntry(BB);
1999	FC1.L->removeBlockFromLoop(BB);
2000	if (LI.getLoopFor(BB) != FC1.L)
2001	continue;
2002	LI.changeLoopFor(BB, L: FC0.L);
2003	}
2004	while (!FC1.L->isInnermost()) {
2005	const auto &ChildLoopIt = FC1.L->begin();
2006	Loop ChildLoop = ChildLoopIt;
2007	FC1.L->removeChildLoop(I: ChildLoopIt);
2008	FC0.L->addChildLoop(NewChild: ChildLoop);
2009	}
2010
2011	// Delete the now empty loop L1.
2012	LI.erase(L: FC1.L);
2013
2014	#ifndef NDEBUG
2015	assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
2016	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2017	assert(PDT.verify());
2018	LI.verify(DT);
2019	SE.verify();
2020	#endif
2021
2022	LLVM_DEBUG(dbgs() << "Fusion done:\n");
2023
2024	return FC0.L;
2025	}
2026	};
2027	} // namespace
2028
2029	PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
2030	auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
2031	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2032	auto &DI = AM.getResult<DependenceAnalysis>(IR&: F);
2033	auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
2034	auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2035	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2036	auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
2037	const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
2038	const DataLayout &DL = F.getDataLayout();
2039
2040	// Ensure loops are in simplifed form which is a pre-requisite for loop fusion
2041	// pass. Added only for new PM since the legacy PM has already added
2042	// LoopSimplify pass as a dependency.
2043	bool Changed = false;
2044	for (auto &L : LI) {
2045	Changed \|=
2046	simplifyLoop(L, DT: &DT, LI: &LI, SE: &SE, AC: &AC, MSSAU: nullptr, PreserveLCSSA: false / PreserveLCSSA /);
2047	}
2048	if (Changed)
2049	PDT.recalculate(Func&: F);
2050
2051	LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
2052	Changed \|= LF.fuseLoops(F);
2053	if (!Changed)
2054	return PreservedAnalyses::all();
2055
2056	PreservedAnalyses PA;
2057	PA.preserve<DominatorTreeAnalysis>();
2058	PA.preserve<PostDominatorTreeAnalysis>();
2059	PA.preserve<ScalarEvolutionAnalysis>();
2060	PA.preserve<LoopAnalysis>();
2061	return PA;
2062	}
2063

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LoopFuse.cpp