LoopPeel.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/LoopPeel.cpp]

1	//===- LoopPeel.cpp -------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loop Peeling Utilities.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/Transforms/Utils/LoopPeel.h"
13	#include "llvm/ADT/DenseMap.h"
14	#include "llvm/ADT/SmallVector.h"
15	#include "llvm/ADT/Statistic.h"
16	#include "llvm/Analysis/Loads.h"
17	#include "llvm/Analysis/LoopInfo.h"
18	#include "llvm/Analysis/LoopIterator.h"
19	#include "llvm/Analysis/ScalarEvolution.h"
20	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21	#include "llvm/Analysis/TargetTransformInfo.h"
22	#include "llvm/IR/BasicBlock.h"
23	#include "llvm/IR/Dominators.h"
24	#include "llvm/IR/Function.h"
25	#include "llvm/IR/InstrTypes.h"
26	#include "llvm/IR/Instruction.h"
27	#include "llvm/IR/Instructions.h"
28	#include "llvm/IR/LLVMContext.h"
29	#include "llvm/IR/MDBuilder.h"
30	#include "llvm/IR/PatternMatch.h"
31	#include "llvm/IR/ProfDataUtils.h"
32	#include "llvm/Support/Casting.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/Debug.h"
35	#include "llvm/Support/raw_ostream.h"
36	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37	#include "llvm/Transforms/Utils/Cloning.h"
38	#include "llvm/Transforms/Utils/LoopSimplify.h"
39	#include "llvm/Transforms/Utils/LoopUtils.h"
40	#include "llvm/Transforms/Utils/ValueMapper.h"
41	#include <algorithm>
42	#include <cassert>
43	#include <cstdint>
44	#include <optional>
45
46	using namespace llvm;
47	using namespace llvm::PatternMatch;
48
49	#define DEBUG_TYPE "loop-peel"
50
51	STATISTIC(NumPeeled, "Number of loops peeled");
52
53	static cl::opt<unsigned> UnrollPeelCount(
54	"unroll-peel-count", cl::Hidden,
55	cl::desc ("Set the unroll peeling count, for testing purposes"));
56
57	static cl::opt<bool>
58	UnrollAllowPeeling("unroll-allow-peeling", cl::init(Val: true), cl::Hidden,
59	cl::desc ("Allows loops to be peeled when the dynamic "
60	"trip count is known to be low."));
61
62	static cl::opt<bool>
63	UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
64	cl::init(Val: false), cl::Hidden,
65	cl::desc ("Allows loop nests to be peeled."));
66
67	static cl::opt<unsigned> UnrollPeelMaxCount(
68	"unroll-peel-max-count", cl::init(Val: `7`), cl::Hidden,
69	cl::desc ("Max average trip count which will cause loop peeling."));
70
71	static cl::opt<unsigned> UnrollForcePeelCount(
72	"unroll-force-peel-count", cl::init(Val: `0`), cl::Hidden,
73	cl::desc ("Force a peel count regardless of profiling information."));
74
75	static cl::opt<bool> DisableAdvancedPeeling(
76	"disable-advanced-peeling", cl::init(Val: false), cl::Hidden,
77	cl::desc (
78	"Disable advance peeling. Issues for convergent targets (D134803)."));
79
80	static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
81
82	// Check whether we are capable of peeling this loop.
83	bool llvm::canPeel(const Loop *L) {
84	// Make sure the loop is in simplified form
85	if (!L->isLoopSimplifyForm())
86	return false;
87	if (!DisableAdvancedPeeling)
88	return true;
89
90	SmallVector<BasicBlock *, `4`> Exits;
91	L->getUniqueNonLatchExitBlocks(ExitBlocks&: Exits);
92	// The latch must either be the only exiting block or all non-latch exit
93	// blocks have either a deopt or unreachable terminator or compose a chain of
94	// blocks where the last one is either deopt or unreachable terminated. Both
95	// deopt and unreachable terminators are a strong indication they are not
96	// taken. Note that this is a profitability check, not a legality check. Also
97	// note that LoopPeeling currently can only update the branch weights of latch
98	// blocks and branch weights to blocks with deopt or unreachable do not need
99	// updating.
100	return llvm::all_of(Range&: Exits, P: IsBlockFollowedByDeoptOrUnreachable);
101	}
102
103	namespace {
104
105	// As a loop is peeled, it may be the case that Phi nodes become
106	// loop-invariant (ie, known because there is only one choice).
107	// For example, consider the following function:
108	// void g(int);
109	// void binary() {
110	// int x = 0;
111	// int y = 0;
112	// int a = 0;
113	// for(int i = 0; i <100000; ++i) {
114	// g(x);
115	// x = y;
116	// g(a);
117	// y = a + 1;
118	// a = 5;
119	// }
120	// }
121	// Peeling 3 iterations is beneficial because the values for x, y and a
122	// become known. The IR for this loop looks something like the following:
123	//
124	// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
125	// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
126	// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
127	// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
128	// ...
129	// tail call void @_Z1gi(i32 signext %x)
130	// tail call void @_Z1gi(i32 signext %a)
131	// %add = add nuw nsw i32 %a, 1
132	// %inc = add nuw nsw i32 %i, 1
133	// %exitcond = icmp eq i32 %inc, 100000
134	// br i1 %exitcond, label %for.cond.cleanup, label %for.body
135	//
136	// The arguments for the calls to g will become known after 3 iterations
137	// of the loop, because the phi nodes values become known after 3 iterations
138	// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
139	// The first iteration has g(0), g(0); the second has g(0), g(5); the
140	// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
141	// Now consider the phi nodes:
142	// %a is a phi with constants so it is determined after iteration 1.
143	// %y is a phi based on a constant and %a so it is determined on
144	// the iteration after %a is determined, so iteration 2.
145	// %x is a phi based on a constant and %y so it is determined on
146	// the iteration after %y, so iteration 3.
147	// %i is based on itself (and is an induction variable) so it is
148	// never determined.
149	// This means that peeling off 3 iterations will result in being able to
150	// remove the phi nodes for %a, %y, and %x. The arguments for the
151	// corresponding calls to g are determined and the code for computing
152	// x, y, and a can be removed.
153	//
154	// The PhiAnalyzer class calculates how many times a loop should be
155	// peeled based on the above analysis of the phi nodes in the loop while
156	// respecting the maximum specified.
157	class PhiAnalyzer {
158	public:
159	PhiAnalyzer(const Loop &L, unsigned MaxIterations);
160
161	// Calculate the sufficient minimum number of iterations of the loop to peel
162	// such that phi instructions become determined (subject to allowable limits)
163	std::optional<unsigned> calculateIterationsToPeel();
164
165	protected:
166	using PeelCounter = std::optional<unsigned>;
167	const PeelCounter Unknown = std::nullopt;
168
169	// Add 1 respecting Unknown and return Unknown if result over MaxIterations
170	PeelCounter addOne(PeelCounter PC) const {
171	if (PC == Unknown)
172	return Unknown;
173	return (PC + `1` <= MaxIterations) ? PeelCounter {PC + `1`} : Unknown;
174	}
175
176	// Calculate the number of iterations after which the given value
177	// becomes an invariant.
178	PeelCounter calculate(const Value &);
179
180	const Loop &L;
181	const unsigned MaxIterations;
182
183	// Map of Values to number of iterations to invariance
184	SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
185	};
186
187	PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
188	: L(L), MaxIterations(MaxIterations) {
189	assert(canPeel(&L) && "loop is not suitable for peeling");
190	assert(MaxIterations > `0` && "no peeling is allowed?");
191	}
192
193	// This function calculates the number of iterations after which the value
194	// becomes an invariant. The pre-calculated values are memorized in a map.
195	// N.B. This number will be Unknown or <= MaxIterations.
196	// The function is calculated according to the following definition:
197	// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
198	// F(%x) = G(%y) + 1 (N.B. [MaxIterations \| Unknown] + 1 => Unknown)
199	// G(%y) = 0 if %y is a loop invariant
200	// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
201	// G(%y) = TODO: if %y is an expression based on phis and loop invariants
202	// The example looks like:
203	// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
204	// %y = phi(0, 5)
205	// %a = %y + 1
206	// G(%y) = Unknown otherwise (including phi not in header block)
207	PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
208	// If we already know the answer, take it from the map.
209	auto I = IterationsToInvariance.find(Val: &V);
210	if (I != IterationsToInvariance.end())
211	return I ->second;
212
213	// Place Unknown to map to avoid infinite recursion. Such
214	// cycles can never stop on an invariant.
215	IterationsToInvariance [&V] = Unknown;
216
217	if (L.isLoopInvariant(V: &V))
218	// Loop invariant so known at start.
219	return (IterationsToInvariance [&V] = `0`);
220	if (const PHINode *Phi = dyn_cast<PHINode>(Val: &V)) {
221	if (Phi->getParent() != L.getHeader()) {
222	// Phi is not in header block so Unknown.
223	assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
224	return Unknown;
225	}
226	// We need to analyze the input from the back edge and add 1.
227	Value *Input = Phi->getIncomingValueForBlock(BB: L.getLoopLatch());
228	PeelCounter Iterations = calculate(V: *Input);
229	assert(IterationsToInvariance[Input] == Iterations &&
230	"unexpected value saved");
231	return (IterationsToInvariance [Phi] = addOne(PC: Iterations));
232	}
233	if (const Instruction *I = dyn_cast<Instruction>(Val: &V)) {
234	if (isa<CmpInst>(Val: I) \|\| I->isBinaryOp()) {
235	// Binary instructions get the max of the operands.
236	PeelCounter LHS = calculate(V: *I->getOperand(i: `0`));
237	if (LHS == Unknown)
238	return Unknown;
239	PeelCounter RHS = calculate(V: *I->getOperand(i: `1`));
240	if (RHS == Unknown)
241	return Unknown;
242	return (IterationsToInvariance [I] = {std::max(a: LHS, b: RHS)});
243	}
244	if (I->isCast())
245	// Cast instructions get the value of the operand.
246	return (IterationsToInvariance [I] = calculate(V: *I->getOperand(i: `0`)));
247	}
248	// TODO: handle more expressions
249
250	// Everything else is Unknown.
251	assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
252	return Unknown;
253	}
254
255	std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
256	unsigned Iterations = `0`;
257	for (auto &PHI : L.getHeader()->phis()) {
258	PeelCounter ToInvariance = calculate(V: PHI);
259	if (ToInvariance != Unknown) {
260	assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
261	Iterations = std::max(a: Iterations, b: *ToInvariance);
262	if (Iterations == MaxIterations)
263	break;
264	}
265	}
266	assert((Iterations <= MaxIterations) && "bad result in phi analysis");
267	return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
268	}
269
270	} // unnamed namespace
271
272	// Try to find any invariant memory reads that will become dereferenceable in
273	// the remainder loop after peeling. The load must also be used (transitively)
274	// by an exit condition. Returns the number of iterations to peel off (at the
275	// moment either 0 or 1).
276	static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
277	DominatorTree &DT,
278	AssumptionCache *AC) {
279	// Skip loops with a single exiting block, because there should be no benefit
280	// for the heuristic below.
281	if (L.getExitingBlock())
282	return `0`;
283
284	// All non-latch exit blocks must have an UnreachableInst terminator.
285	// Otherwise the heuristic below may not be profitable.
286	SmallVector<BasicBlock *, `4`> Exits;
287	L.getUniqueNonLatchExitBlocks(ExitBlocks&: Exits);
288	if (any_of(Range&: Exits, P: [](const BasicBlock *BB) {
289	return !isa<UnreachableInst>(Val: BB->getTerminator());
290	}))
291	return `0`;
292
293	// Now look for invariant loads that dominate the latch and are not known to
294	// be dereferenceable. If there are such loads and no writes, they will become
295	// dereferenceable in the loop if the first iteration is peeled off. Also
296	// collect the set of instructions controlled by such loads. Only peel if an
297	// exit condition uses (transitively) such a load.
298	BasicBlock *Header = L.getHeader();
299	BasicBlock *Latch = L.getLoopLatch();
300	SmallPtrSet<Value *, `8`> LoadUsers;
301	const DataLayout &DL = L.getHeader()->getDataLayout();
302	for (BasicBlock *BB : L.blocks()) {
303	for (Instruction &I : *BB) {
304	if (I.mayWriteToMemory())
305	return `0`;
306
307	auto Iter = LoadUsers.find(Ptr: &I);
308	if (Iter != LoadUsers.end()) {
309	for (Value *U : I.users())
310	LoadUsers.insert(Ptr: U);
311	}
312	// Do not look for reads in the header; they can already be hoisted
313	// without peeling.
314	if (BB == Header)
315	continue;
316	if (auto *LI = dyn_cast<LoadInst>(Val: &I)) {
317	Value *Ptr = LI->getPointerOperand();
318	if (DT.dominates(A: BB, B: Latch) && L.isLoopInvariant(V: Ptr) &&
319	!isDereferenceablePointer(V: Ptr, Ty: LI->getType(), DL, CtxI: LI, AC, DT: &DT))
320	for (Value *U : I.users())
321	LoadUsers.insert(Ptr: U);
322	}
323	}
324	}
325	SmallVector<BasicBlock *> ExitingBlocks;
326	L.getExitingBlocks(ExitingBlocks);
327	if (any_of(Range&: ExitingBlocks, P: [&LoadUsers](BasicBlock *Exiting) {
328	return LoadUsers.contains(Ptr: Exiting->getTerminator());
329	}))
330	return `1`;
331	return `0`;
332	}
333
334	// Return the number of iterations to peel off that make conditions in the
335	// body true/false. For example, if we peel 2 iterations off the loop below,
336	// the condition i < 2 can be evaluated at compile time.
337	// for (i = 0; i < n; i++)
338	// if (i < 2)
339	// ..
340	// else
341	// ..
342	// }
343	static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
344	ScalarEvolution &SE) {
345	assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
346	unsigned DesiredPeelCount = `0`;
347
348	// Do not peel the entire loop.
349	const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(L: &L);
350	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: BE))
351	MaxPeelCount =
352	std::min(a: (unsigned)SC->getAPInt().getLimitedValue() - `1`, b: MaxPeelCount);
353
354	// Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
355	// return true if inversed condition become known before reaching the
356	// MaxPeelCount limit.
357	auto PeelWhilePredicateIsKnown =
358	[&](unsigned &PeelCount, const SCEV &IterVal, const* SCEV *BoundSCEV,
359	const SCEV *Step, ICmpInst::Predicate Pred) {
360	while (PeelCount < MaxPeelCount &&
361	SE.isKnownPredicate(Pred, LHS: IterVal, RHS: BoundSCEV)) {
362	IterVal = SE.getAddExpr(LHS: IterVal, RHS: Step);
363	++PeelCount;
364	}
365	return SE.isKnownPredicate(Pred: ICmpInst::getInversePredicate(pred: Pred), LHS: IterVal,
366	RHS: BoundSCEV);
367	};
368
369	const unsigned MaxDepth = `4`;
370	std::function<void(Value , unsigned*)> ComputePeelCount =
371	[&](Value Condition, unsigned* Depth) -> void {
372	if (!Condition->getType()->isIntegerTy() \|\| Depth >= MaxDepth)
373	return;
374
375	Value LeftVal, RightVal;
376	if (match(V: Condition, P: m_And(L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal))) \|\|
377	match(V: Condition, P: m_Or(L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal)))) {
378	ComputePeelCount (LeftVal, Depth + `1`);
379	ComputePeelCount (RightVal, Depth + `1`);
380	return;
381	}
382
383	CmpInst::Predicate Pred;
384	if (!match(V: Condition, P: m_ICmp(Pred, L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal))))
385	return;
386
387	const SCEV *LeftSCEV = SE.getSCEV(V: LeftVal);
388	const SCEV *RightSCEV = SE.getSCEV(V: RightVal);
389
390	// Do not consider predicates that are known to be true or false
391	// independently of the loop iteration.
392	if (SE.evaluatePredicate(Pred, LHS: LeftSCEV, RHS: RightSCEV))
393	return;
394
395	// Check if we have a condition with one AddRec and one non AddRec
396	// expression. Normalize LeftSCEV to be the AddRec.
397	if (!isa<SCEVAddRecExpr>(Val: LeftSCEV)) {
398	if (isa<SCEVAddRecExpr>(Val: RightSCEV)) {
399	std::swap(a&: LeftSCEV, b&: RightSCEV);
400	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
401	} else
402	return;
403	}
404
405	const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(Val: LeftSCEV);
406
407	// Avoid huge SCEV computations in the loop below, make sure we only
408	// consider AddRecs of the loop we are trying to peel.
409	if (!LeftAR->isAffine() \|\| LeftAR->getLoop() != &L)
410	return;
411	if (!(ICmpInst::isEquality(P: Pred) && LeftAR->hasNoSelfWrap()) &&
412	!SE.getMonotonicPredicateType(LHS: LeftAR, Pred))
413	return;
414
415	// Check if extending the current DesiredPeelCount lets us evaluate Pred
416	// or !Pred in the loop body statically.
417	unsigned NewPeelCount = DesiredPeelCount;
418
419	const SCEV *IterVal = LeftAR->evaluateAtIteration(
420	It: SE.getConstant(Ty: LeftSCEV->getType(), V: NewPeelCount), SE);
421
422	// If the original condition is not known, get the negated predicate
423	// (which holds on the else branch) and check if it is known. This allows
424	// us to peel of iterations that make the original condition false.
425	if (!SE.isKnownPredicate(Pred, LHS: IterVal, RHS: RightSCEV))
426	Pred = ICmpInst::getInversePredicate(pred: Pred);
427
428	const SCEV *Step = LeftAR->getStepRecurrence(SE);
429	if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
430	Pred))
431	return;
432
433	// However, for equality comparisons, that isn't always sufficient to
434	// eliminate the comparsion in loop body, we may need to peel one more
435	// iteration. See if that makes !Pred become unknown again.
436	const SCEV *NextIterVal = SE.getAddExpr(LHS: IterVal, RHS: Step);
437	if (ICmpInst::isEquality(P: Pred) &&
438	!SE.isKnownPredicate(Pred: ICmpInst::getInversePredicate(pred: Pred), LHS: NextIterVal,
439	RHS: RightSCEV) &&
440	!SE.isKnownPredicate(Pred, LHS: IterVal, RHS: RightSCEV) &&
441	SE.isKnownPredicate(Pred, LHS: NextIterVal, RHS: RightSCEV)) {
442	if (NewPeelCount >= MaxPeelCount)
443	return; // Need to peel one more iteration, but can't. Give up.
444	++NewPeelCount; // Great!
445	}
446
447	DesiredPeelCount = std::max(a: DesiredPeelCount, b: NewPeelCount);
448	};
449
450	auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
451	if (!MinMax->getType()->isIntegerTy())
452	return;
453	Value LHS = MinMax->getLHS(), RHS = MinMax->getRHS();
454	const SCEV BoundSCEV, IterSCEV;
455	if (L.isLoopInvariant(V: LHS)) {
456	BoundSCEV = SE.getSCEV(V: LHS);
457	IterSCEV = SE.getSCEV(V: RHS);
458	} else if (L.isLoopInvariant(V: RHS)) {
459	BoundSCEV = SE.getSCEV(V: RHS);
460	IterSCEV = SE.getSCEV(V: LHS);
461	} else
462	return;
463	const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: IterSCEV);
464	// For simplicity, we support only affine recurrences.
465	if (!AddRec \|\| !AddRec->isAffine() \|\| AddRec->getLoop() != &L)
466	return;
467	const SCEV *Step = AddRec->getStepRecurrence(SE);
468	bool IsSigned = MinMax->isSigned();
469	// To minimize number of peeled iterations, we use strict relational
470	// predicates here.
471	ICmpInst::Predicate Pred;
472	if (SE.isKnownPositive(S: Step))
473	Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
474	else if (SE.isKnownNegative(S: Step))
475	Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
476	else
477	return;
478	// Check that AddRec is not wrapping.
479	if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
480	return;
481	unsigned NewPeelCount = DesiredPeelCount;
482	const SCEV *IterVal = AddRec->evaluateAtIteration(
483	It: SE.getConstant(Ty: AddRec->getType(), V: NewPeelCount), SE);
484	if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, BoundSCEV, Step,
485	Pred))
486	return;
487	DesiredPeelCount = NewPeelCount;
488	};
489
490	for (BasicBlock *BB : L.blocks()) {
491	for (Instruction &I : *BB) {
492	if (SelectInst *SI = dyn_cast<SelectInst>(Val: &I))
493	ComputePeelCount (SI->getCondition(), `0`);
494	if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Val: &I))
495	ComputePeelCountMinMax (MinMax);
496	}
497
498	auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
499	if (!BI \|\| BI->isUnconditional())
500	continue;
501
502	// Ignore loop exit condition.
503	if (L.getLoopLatch() == BB)
504	continue;
505
506	ComputePeelCount (BI->getCondition(), `0`);
507	}
508
509	return DesiredPeelCount;
510	}
511
512	/// This "heuristic" exactly matches implicit behavior which used to exist
513	/// inside getLoopEstimatedTripCount. It was added here to keep an
514	/// improvement inside that API from causing peeling to become more aggressive.
515	/// This should probably be removed.
516	static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
517	BasicBlock *Latch = L->getLoopLatch();
518	if (!Latch)
519	return true;
520
521	BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
522	if (!LatchBR \|\| LatchBR->getNumSuccessors() != `2` \|\| !L->isLoopExiting(BB: Latch))
523	return true;
524
525	assert((LatchBR->getSuccessor(`0`) == L->getHeader() \|\|
526	LatchBR->getSuccessor(`1`) == L->getHeader()) &&
527	"At least one edge out of the latch must go to the header");
528
529	SmallVector<BasicBlock *, `4`> ExitBlocks;
530	L->getUniqueNonLatchExitBlocks(ExitBlocks);
531	return any_of(Range&: ExitBlocks, P: [](const BasicBlock *EB) {
532	return !EB->getTerminatingDeoptimizeCall();
533	});
534	}
535
536
537	// Return the number of iterations we want to peel off.
538	void llvm::computePeelCount(Loop L, unsigned* LoopSize,
539	TargetTransformInfo::PeelingPreferences &PP,
540	unsigned TripCount, DominatorTree &DT,
541	ScalarEvolution &SE, AssumptionCache *AC,
542	unsigned Threshold) {
543	assert(LoopSize > `0` && "Zero loop size is not allowed!");
544	// Save the PP.PeelCount value set by the target in
545	// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
546	unsigned TargetPeelCount = PP.PeelCount;
547	PP.PeelCount = `0`;
548	if (!canPeel(L))
549	return;
550
551	// Only try to peel innermost loops by default.
552	// The constraint can be relaxed by the target in TTI.getPeelingPreferences
553	// or by the flag -unroll-allow-loop-nests-peeling.
554	if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
555	return;
556
557	// If the user provided a peel count, use that.
558	bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > `0`;
559	if (UserPeelCount) {
560	LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
561	<< " iterations.\n");
562	PP.PeelCount = UnrollForcePeelCount;
563	PP.PeelProfiledIterations = true;
564	return;
565	}
566
567	// Skip peeling if it's disabled.
568	if (!PP.AllowPeeling)
569	return;
570
571	// Check that we can peel at least one iteration.
572	if (`2` * LoopSize > Threshold)
573	return;
574
575	unsigned AlreadyPeeled = `0`;
576	if (auto Peeled = getOptionalIntLoopAttribute(TheLoop: L, Name: PeeledCountMetaData))
577	AlreadyPeeled = *Peeled;
578	// Stop if we already peeled off the maximum number of iterations.
579	if (AlreadyPeeled >= UnrollPeelMaxCount)
580	return;
581
582	// Pay respect to limitations implied by loop size and the max peel count.
583	unsigned MaxPeelCount = UnrollPeelMaxCount;
584	MaxPeelCount = std::min(a: MaxPeelCount, b: Threshold / LoopSize - `1`);
585
586	// Start the max computation with the PP.PeelCount value set by the target
587	// in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
588	unsigned DesiredPeelCount = TargetPeelCount;
589
590	// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
591	// iterations of the loop. For this we compute the number for iterations after
592	// which every Phi is guaranteed to become an invariant, and try to peel the
593	// maximum number of iterations among these values, thus turning all those
594	// Phis into invariants.
595	if (MaxPeelCount > DesiredPeelCount) {
596	// Check how many iterations are useful for resolving Phis
597	auto NumPeels = PhiAnalyzer (*L, MaxPeelCount).calculateIterationsToPeel();
598	if (NumPeels)
599	DesiredPeelCount = std::max(a: DesiredPeelCount, b: *NumPeels);
600	}
601
602	DesiredPeelCount = std::max(a: DesiredPeelCount,
603	b: countToEliminateCompares(L&: *L, MaxPeelCount, SE));
604
605	if (DesiredPeelCount == `0`)
606	DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(L&: *L, DT, AC);
607
608	if (DesiredPeelCount > `0`) {
609	DesiredPeelCount = std::min(a: DesiredPeelCount, b: MaxPeelCount);
610	// Consider max peel count limitation.
611	assert(DesiredPeelCount > `0` && "Wrong loop size estimation?");
612	if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
613	LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
614	<< " iteration(s) to turn"
615	<< " some Phis into invariants.\n");
616	PP.PeelCount = DesiredPeelCount;
617	PP.PeelProfiledIterations = false;
618	return;
619	}
620	}
621
622	// Bail if we know the statically calculated trip count.
623	// In this case we rather prefer partial unrolling.
624	if (TripCount)
625	return;
626
627	// Do not apply profile base peeling if it is disabled.
628	if (!PP.PeelProfiledIterations)
629	return;
630	// If we don't know the trip count, but have reason to believe the average
631	// trip count is low, peeling should be beneficial, since we will usually
632	// hit the peeled section.
633	// We only do this in the presence of profile information, since otherwise
634	// our estimates of the trip count are not reliable enough.
635	if (L->getHeader()->getParent()->hasProfileData()) {
636	if (violatesLegacyMultiExitLoopCheck(L))
637	return;
638	std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
639	if (!EstimatedTripCount)
640	return;
641
642	LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
643	<< *EstimatedTripCount << "\n");
644
645	if (*EstimatedTripCount) {
646	if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
647	unsigned PeelCount = *EstimatedTripCount;
648	LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
649	PP.PeelCount = PeelCount;
650	return;
651	}
652	LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
653	LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
654	LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
655	LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
656	LLVM_DEBUG(dbgs() << "Max peel count by cost: "
657	<< (Threshold / LoopSize - `1`) << "\n");
658	}
659	}
660	}
661
662	struct WeightInfo {
663	// Weights for current iteration.
664	SmallVector<uint32_t> Weights;
665	// Weights to subtract after each iteration.
666	const SmallVector<uint32_t> SubWeights;
667	};
668
669	/// Update the branch weights of an exiting block of a peeled-off loop
670	/// iteration.
671	/// Let F is a weight of the edge to continue (fallthrough) into the loop.
672	/// Let E is a weight of the edge to an exit.
673	/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
674	/// go to exit.
675	/// Then, Estimated ExitCount = F / E.
676	/// For I-th (counting from 0) peeled off iteration we set the weights for
677	/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
678	/// The probability to go to exit 1/(EC-I) increases. At the same time
679	/// the estimated exit count in the remainder loop reduces by I.
680	/// To avoid dealing with division rounding we can just multiple both part
681	/// of weights to E and use weight as (F - I E, E).*
682	static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
683	setBranchWeights(I&: Term, Weights: Info.Weights, /IsExpected=/*false);
684	for (auto [Idx, SubWeight] : enumerate(First: Info.SubWeights))
685	if (SubWeight != `0`)
686	// Don't set the probability of taking the edge from latch to loop header
687	// to less than 1:1 ratio (meaning Weight should not be lower than
688	// SubWeight), as this could significantly reduce the loop's hotness,
689	// which would be incorrect in the case of underestimating the trip count.
690	Info.Weights [Idx] =
691	Info.Weights [Idx] > SubWeight
692	? std::max(a: Info.Weights [Idx] - SubWeight, b: SubWeight)
693	: SubWeight;
694	}
695
696	/// Initialize the weights for all exiting blocks.
697	static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
698	Loop *L) {
699	SmallVector<BasicBlock *> ExitingBlocks;
700	L->getExitingBlocks(ExitingBlocks);
701	for (BasicBlock *ExitingBlock : ExitingBlocks) {
702	Instruction *Term = ExitingBlock->getTerminator();
703	SmallVector<uint32_t> Weights;
704	if (!extractBranchWeights(I: *Term, Weights))
705	continue;
706
707	// See the comment on updateBranchWeights() for an explanation of what we
708	// do here.
709	uint32_t FallThroughWeights = `0`;
710	uint32_t ExitWeights = `0`;
711	for (auto [Succ, Weight] : zip(t: successors(I: Term), u&: Weights)) {
712	if (L->contains(BB: Succ))
713	FallThroughWeights += Weight;
714	else
715	ExitWeights += Weight;
716	}
717
718	// Don't try to update weights for degenerate case.
719	if (FallThroughWeights == `0`)
720	continue;
721
722	SmallVector<uint32_t> SubWeights;
723	for (auto [Succ, Weight] : zip(t: successors(I: Term), u&: Weights)) {
724	if (!L->contains(BB: Succ)) {
725	// Exit weights stay the same.
726	SubWeights.push_back(Elt: `0`);
727	continue;
728	}
729
730	// Subtract exit weights on each iteration, distributed across all
731	// fallthrough edges.
732	double W = (double)Weight / (double)FallThroughWeights;
733	SubWeights.push_back(Elt: (uint32_t)(ExitWeights * W));
734	}
735
736	WeightInfos.insert(KV: {Term, {.Weights: std::move(Weights), .SubWeights: std::move(SubWeights)}});
737	}
738	}
739
740	/// Clones the body of the loop L, putting it between \p InsertTop and \p
741	/// InsertBot.
742	/// \param IterNumber The serial number of the iteration currently being
743	/// peeled off.
744	/// \param ExitEdges The exit edges of the original loop.
745	/// \param[out] NewBlocks A list of the blocks in the newly created clone
746	/// \param[out] VMap The value map between the loop and the new clone.
747	/// \param LoopBlocks A helper for DFS-traversal of the loop.
748	/// \param LVMap A value-map that maps instructions from the original loop to
749	/// instructions in the last peeled-off iteration.
750	static void cloneLoopBlocks(
751	Loop L, unsigned* IterNumber, BasicBlock InsertTop, BasicBlock InsertBot,
752	SmallVectorImpl<std::pair<BasicBlock , BasicBlock >> &ExitEdges,
753	SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
754	ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
755	LoopInfo LI, ArrayRef<MDNode > LoopLocalNoAliasDeclScopes,
756	ScalarEvolution &SE) {
757	BasicBlock *Header = L->getHeader();
758	BasicBlock *Latch = L->getLoopLatch();
759	BasicBlock *PreHeader = L->getLoopPreheader();
760
761	Function *F = Header->getParent();
762	LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
763	LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
764	Loop *ParentLoop = L->getParentLoop();
765
766	// For each block in the original loop, create a new copy,
767	// and update the value map with the newly created values.
768	for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
769	BasicBlock NewBB = CloneBasicBlock(BB: BB, VMap, NameSuffix: ".peel", F);
770	NewBlocks.push_back(Elt: NewBB);
771
772	// If an original block is an immediate child of the loop L, its copy
773	// is a child of a ParentLoop after peeling. If a block is a child of
774	// a nested loop, it is handled in the cloneLoop() call below.
775	if (ParentLoop && LI->getLoopFor(BB: *BB) == L)
776	ParentLoop->addBasicBlockToLoop(NewBB, LI&: *LI);
777
778	VMap [*BB] = NewBB;
779
780	// If dominator tree is available, insert nodes to represent cloned blocks.
781	if (DT) {
782	if (Header == *BB)
783	DT->addNewBlock(BB: NewBB, DomBB: InsertTop);
784	else {
785	DomTreeNode IDom = DT->getNode(BB: BB)->getIDom();
786	// VMap must contain entry for IDom, as the iteration order is RPO.
787	DT->addNewBlock(BB: NewBB, DomBB: cast<BasicBlock>(Val&: VMap [IDom->getBlock()]));
788	}
789	}
790	}
791
792	{
793	// Identify what other metadata depends on the cloned version. After
794	// cloning, replace the metadata with the corrected version for both
795	// memory instructions and noalias intrinsics.
796	std::string Ext = (Twine ("Peel") + Twine (IterNumber)).str();
797	cloneAndAdaptNoAliasScopes(NoAliasDeclScopes: LoopLocalNoAliasDeclScopes, NewBlocks,
798	Context&: Header->getContext(), Ext);
799	}
800
801	// Recursively create the new Loop objects for nested loops, if any,
802	// to preserve LoopInfo.
803	for (Loop ChildLoop : L) {
804	cloneLoop(L: ChildLoop, PL: ParentLoop, VM&: VMap, LI, LPM: nullptr);
805	}
806
807	// Hook-up the control flow for the newly inserted blocks.
808	// The new header is hooked up directly to the "top", which is either
809	// the original loop preheader (for the first iteration) or the previous
810	// iteration's exiting block (for every other iteration)
811	InsertTop->getTerminator()->setSuccessor(Idx: `0`, BB: cast<BasicBlock>(Val&: VMap [Header]));
812
813	// Similarly, for the latch:
814	// The original exiting edge is still hooked up to the loop exit.
815	// The backedge now goes to the "bottom", which is either the loop's real
816	// header (for the last peeled iteration) or the copied header of the next
817	// iteration (for every other iteration)
818	BasicBlock *NewLatch = cast<BasicBlock>(Val&: VMap [Latch]);
819	auto *LatchTerm = cast<Instruction>(Val: NewLatch->getTerminator());
820	for (unsigned idx = `0`, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
821	if (LatchTerm->getSuccessor(Idx: idx) == Header) {
822	LatchTerm->setSuccessor(Idx: idx, BB: InsertBot);
823	break;
824	}
825	if (DT)
826	DT->changeImmediateDominator(BB: InsertBot, NewBB: NewLatch);
827
828	// The new copy of the loop body starts with a bunch of PHI nodes
829	// that pick an incoming value from either the preheader, or the previous
830	// loop iteration. Since this copy is no longer part of the loop, we
831	// resolve this statically:
832	// For the first iteration, we use the value from the preheader directly.
833	// For any other iteration, we replace the phi with the value generated by
834	// the immediately preceding clone of the loop body (which represents
835	// the previous iteration).
836	for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
837	PHINode NewPHI = cast<PHINode>(Val&: VMap [&I]);
838	if (IterNumber == `0`) {
839	VMap [&*I] = NewPHI->getIncomingValueForBlock(BB: PreHeader);
840	} else {
841	Value *LatchVal = NewPHI->getIncomingValueForBlock(BB: Latch);
842	Instruction *LatchInst = dyn_cast<Instruction>(Val: LatchVal);
843	if (LatchInst && L->contains(Inst: LatchInst))
844	VMap [&*I] = LVMap [LatchInst];
845	else
846	VMap [&*I] = LatchVal;
847	}
848	NewPHI->eraseFromParent();
849	}
850
851	// Fix up the outgoing values - we need to add a value for the iteration
852	// we've just created. Note that this must happen after* the incoming*
853	// values are adjusted, since the value going out of the latch may also be
854	// a value coming into the header.
855	for (auto Edge : ExitEdges)
856	for (PHINode &PHI : Edge.second->phis()) {
857	Value *LatchVal = PHI.getIncomingValueForBlock(BB: Edge.first);
858	Instruction *LatchInst = dyn_cast<Instruction>(Val: LatchVal);
859	if (LatchInst && L->contains(Inst: LatchInst))
860	LatchVal = VMap [LatchVal];
861	PHI.addIncoming(V: LatchVal, BB: cast<BasicBlock>(Val&: VMap [Edge.first]));
862	SE.forgetValue(V: &PHI);
863	}
864
865	// LastValueMap is updated with the values for the current loop
866	// which are used the next time this function is called.
867	for (auto KV : VMap)
868	LVMap [KV.first] = KV.second;
869	}
870
871	TargetTransformInfo::PeelingPreferences
872	llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
873	const TargetTransformInfo &TTI,
874	std::optional<bool> UserAllowPeeling,
875	std::optional<bool> UserAllowProfileBasedPeeling,
876	bool UnrollingSpecficValues) {
877	TargetTransformInfo::PeelingPreferences PP;
878
879	// Set the default values.
880	PP.PeelCount = `0`;
881	PP.AllowPeeling = true;
882	PP.AllowLoopNestsPeeling = false;
883	PP.PeelProfiledIterations = true;
884
885	// Get the target specifc values.
886	TTI.getPeelingPreferences(L, SE, PP);
887
888	// User specified values using cl::opt.
889	if (UnrollingSpecficValues) {
890	if (UnrollPeelCount.getNumOccurrences() > `0`)
891	PP.PeelCount = UnrollPeelCount;
892	if (UnrollAllowPeeling.getNumOccurrences() > `0`)
893	PP.AllowPeeling = UnrollAllowPeeling;
894	if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > `0`)
895	PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
896	}
897
898	// User specifed values provided by argument.
899	if (UserAllowPeeling)
900	PP.AllowPeeling = *UserAllowPeeling;
901	if (UserAllowProfileBasedPeeling)
902	PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
903
904	return PP;
905	}
906
907	/// Peel off the first \p PeelCount iterations of loop \p L.
908	///
909	/// Note that this does not peel them off as a single straight-line block.
910	/// Rather, each iteration is peeled off separately, and needs to check the
911	/// exit condition.
912	/// For loops that dynamically execute \p PeelCount iterations or less
913	/// this provides a benefit, since the peeled off iterations, which account
914	/// for the bulk of dynamic execution, can be further simplified by scalar
915	/// optimizations.
916	bool llvm::peelLoop(Loop L, unsigned* PeelCount, LoopInfo *LI,
917	ScalarEvolution SE, DominatorTree &DT, AssumptionCache AC,
918	bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
919	assert(PeelCount > `0` && "Attempt to peel out zero iterations?");
920	assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
921
922	LoopBlocksDFS LoopBlocks(L);
923	LoopBlocks.perform(LI);
924
925	BasicBlock *Header = L->getHeader();
926	BasicBlock *PreHeader = L->getLoopPreheader();
927	BasicBlock *Latch = L->getLoopLatch();
928	SmallVector<std::pair<BasicBlock , BasicBlock >, `4`> ExitEdges;
929	L->getExitEdges(ExitEdges);
930
931	// Remember dominators of blocks we might reach through exits to change them
932	// later. Immediate dominator of such block might change, because we add more
933	// routes which can lead to the exit: we can reach it from the peeled
934	// iterations too.
935	DenseMap<BasicBlock , BasicBlock > NonLoopBlocksIDom;
936	for (auto *BB : L->blocks()) {
937	auto *BBDomNode = DT.getNode(BB);
938	SmallVector<BasicBlock *, `16`> ChildrenToUpdate;
939	for (auto *ChildDomNode : BBDomNode->children()) {
940	auto *ChildBB = ChildDomNode->getBlock();
941	if (!L->contains(BB: ChildBB))
942	ChildrenToUpdate.push_back(Elt: ChildBB);
943	}
944	// The new idom of the block will be the nearest common dominator
945	// of all copies of the previous idom. This is equivalent to the
946	// nearest common dominator of the previous idom and the first latch,
947	// which dominates all copies of the previous idom.
948	BasicBlock *NewIDom = DT.findNearestCommonDominator(A: BB, B: Latch);
949	for (auto *ChildBB : ChildrenToUpdate)
950	NonLoopBlocksIDom [ChildBB] = NewIDom;
951	}
952
953	Function *F = Header->getParent();
954
955	// Set up all the necessary basic blocks. It is convenient to split the
956	// preheader into 3 parts - two blocks to anchor the peeled copy of the loop
957	// body, and a new preheader for the "real" loop.
958
959	// Peeling the first iteration transforms.
960	//
961	// PreHeader:
962	// ...
963	// Header:
964	// LoopBody
965	// If (cond) goto Header
966	// Exit:
967	//
968	// into
969	//
970	// InsertTop:
971	// LoopBody
972	// If (!cond) goto Exit
973	// InsertBot:
974	// NewPreHeader:
975	// ...
976	// Header:
977	// LoopBody
978	// If (cond) goto Header
979	// Exit:
980	//
981	// Each following iteration will split the current bottom anchor in two,
982	// and put the new copy of the loop body between these two blocks. That is,
983	// after peeling another iteration from the example above, we'll split
984	// InsertBot, and get:
985	//
986	// InsertTop:
987	// LoopBody
988	// If (!cond) goto Exit
989	// InsertBot:
990	// LoopBody
991	// If (!cond) goto Exit
992	// InsertBot.next:
993	// NewPreHeader:
994	// ...
995	// Header:
996	// LoopBody
997	// If (cond) goto Header
998	// Exit:
999
1000	BasicBlock *InsertTop = SplitEdge(From: PreHeader, To: Header, DT: &DT, LI);
1001	BasicBlock *InsertBot =
1002	SplitBlock(Old: InsertTop, SplitPt: InsertTop->getTerminator(), DT: &DT, LI);
1003	BasicBlock *NewPreHeader =
1004	SplitBlock(Old: InsertBot, SplitPt: InsertBot->getTerminator(), DT: &DT, LI);
1005
1006	InsertTop->setName(Header->getName() + ".peel.begin");
1007	InsertBot->setName(Header->getName() + ".peel.next");
1008	NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1009
1010	Instruction *LatchTerm =
1011	cast<Instruction>(Val: cast<BasicBlock>(Val: Latch)->getTerminator());
1012
1013	// If we have branch weight information, we'll want to update it for the
1014	// newly created branches.
1015	DenseMap<Instruction *, WeightInfo> Weights;
1016	initBranchWeights(WeightInfos&: Weights, L);
1017
1018	// Identify what noalias metadata is inside the loop: if it is inside the
1019	// loop, the associated metadata must be cloned for each iteration.
1020	SmallVector<MDNode *, `6`> LoopLocalNoAliasDeclScopes;
1021	identifyNoAliasScopesToClone(BBs: L->getBlocks(), NoAliasDeclScopes&: LoopLocalNoAliasDeclScopes);
1022
1023	// For each peeled-off iteration, make a copy of the loop.
1024	for (unsigned Iter = `0`; Iter < PeelCount; ++Iter) {
1025	SmallVector<BasicBlock *, `8`> NewBlocks;
1026	ValueToValueMapTy VMap;
1027
1028	cloneLoopBlocks(L, IterNumber: Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
1029	LoopBlocks, VMap, LVMap, DT: &DT, LI,
1030	LoopLocalNoAliasDeclScopes, SE&: *SE);
1031
1032	// Remap to use values from the current iteration instead of the
1033	// previous one.
1034	remapInstructionsInBlocks(Blocks: NewBlocks, VMap);
1035
1036	// Update IDoms of the blocks reachable through exits.
1037	if (Iter == `0`)
1038	for (auto BBIDom : NonLoopBlocksIDom)
1039	DT.changeImmediateDominator(BB: BBIDom.first,
1040	NewBB: cast<BasicBlock>(Val&: LVMap [BBIDom.second]));
1041	#ifdef EXPENSIVE_CHECKS
1042	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1043	#endif
1044
1045	for (auto &[Term, Info] : Weights) {
1046	auto *TermCopy = cast<Instruction>(Val&: VMap [Term]);
1047	updateBranchWeights(Term: TermCopy, Info);
1048	}
1049
1050	// Remove Loop metadata from the latch branch instruction
1051	// because it is not the Loop's latch branch anymore.
1052	auto *LatchTermCopy = cast<Instruction>(Val&: VMap [LatchTerm]);
1053	LatchTermCopy->setMetadata(KindID: LLVMContext::MD_loop, Node: nullptr);
1054
1055	InsertTop = InsertBot;
1056	InsertBot = SplitBlock(Old: InsertBot, SplitPt: InsertBot->getTerminator(), DT: &DT, LI);
1057	InsertBot->setName(Header->getName() + ".peel.next");
1058
1059	F->splice(ToIt: InsertTop->getIterator(), FromF: F, FromBeginIt: NewBlocks [`0`]->getIterator(),
1060	FromEndIt: F->end());
1061	}
1062
1063	// Now adjust the phi nodes in the loop header to get their initial values
1064	// from the last peeled-off iteration instead of the preheader.
1065	for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
1066	PHINode *PHI = cast<PHINode>(Val&: I);
1067	Value *NewVal = PHI->getIncomingValueForBlock(BB: Latch);
1068	Instruction *LatchInst = dyn_cast<Instruction>(Val: NewVal);
1069	if (LatchInst && L->contains(Inst: LatchInst))
1070	NewVal = LVMap [LatchInst];
1071
1072	PHI->setIncomingValueForBlock(BB: NewPreHeader, V: NewVal);
1073	}
1074
1075	for (const auto &[Term, Info] : Weights) {
1076	setBranchWeights(I&: Term, Weights: Info.Weights, /IsExpected=/*false);
1077	}
1078
1079	// Update Metadata for count of peeled off iterations.
1080	unsigned AlreadyPeeled = `0`;
1081	if (auto Peeled = getOptionalIntLoopAttribute(TheLoop: L, Name: PeeledCountMetaData))
1082	AlreadyPeeled = *Peeled;
1083	addStringMetadataToLoop(TheLoop: L, MDString: PeeledCountMetaData, V: AlreadyPeeled + PeelCount);
1084
1085	if (Loop *ParentLoop = L->getParentLoop())
1086	L = ParentLoop;
1087
1088	// We modified the loop, update SE.
1089	SE->forgetTopmostLoop(L);
1090	SE->forgetBlockAndLoopDispositions();
1091
1092	#ifdef EXPENSIVE_CHECKS
1093	// Finally DomtTree must be correct.
1094	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1095	#endif
1096
1097	// FIXME: Incrementally update loop-simplify
1098	simplifyLoop(L, DT: &DT, LI, SE, AC, MSSAU: nullptr, PreserveLCSSA);
1099
1100	NumPeeled ++;
1101
1102	return true;
1103	}
1104

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/LoopPeel.cpp