LoopPeel.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/LoopPeel.cpp]

1	//===- LoopPeel.cpp -------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loop Peeling Utilities.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/Transforms/Utils/LoopPeel.h"
13	#include "llvm/ADT/DenseMap.h"
14	#include "llvm/ADT/SmallVector.h"
15	#include "llvm/ADT/Statistic.h"
16	#include "llvm/Analysis/Loads.h"
17	#include "llvm/Analysis/LoopInfo.h"
18	#include "llvm/Analysis/LoopIterator.h"
19	#include "llvm/Analysis/ScalarEvolution.h"
20	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
22	#include "llvm/Analysis/TargetTransformInfo.h"
23	#include "llvm/IR/BasicBlock.h"
24	#include "llvm/IR/Dominators.h"
25	#include "llvm/IR/Function.h"
26	#include "llvm/IR/InstrTypes.h"
27	#include "llvm/IR/Instruction.h"
28	#include "llvm/IR/Instructions.h"
29	#include "llvm/IR/LLVMContext.h"
30	#include "llvm/IR/MDBuilder.h"
31	#include "llvm/IR/PatternMatch.h"
32	#include "llvm/IR/ProfDataUtils.h"
33	#include "llvm/Support/Casting.h"
34	#include "llvm/Support/CommandLine.h"
35	#include "llvm/Support/Debug.h"
36	#include "llvm/Support/raw_ostream.h"
37	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38	#include "llvm/Transforms/Utils/Cloning.h"
39	#include "llvm/Transforms/Utils/LoopSimplify.h"
40	#include "llvm/Transforms/Utils/LoopUtils.h"
41	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
42	#include "llvm/Transforms/Utils/ValueMapper.h"
43	#include <algorithm>
44	#include <cassert>
45	#include <cstdint>
46	#include <optional>
47
48	using namespace llvm;
49	using namespace llvm::PatternMatch;
50	using namespace llvm::SCEVPatternMatch;
51
52	#define DEBUG_TYPE "loop-peel"
53
54	STATISTIC(NumPeeled, "Number of loops peeled");
55	STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
56
57	static cl::opt<unsigned> UnrollPeelCount(
58	"unroll-peel-count", cl::Hidden,
59	cl::desc ("Set the unroll peeling count, for testing purposes"));
60
61	static cl::opt<bool>
62	UnrollAllowPeeling("unroll-allow-peeling", cl::init(Val: true), cl::Hidden,
63	cl::desc ("Allows loops to be peeled when the dynamic "
64	"trip count is known to be low."));
65
66	static cl::opt<bool>
67	UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
68	cl::init(Val: false), cl::Hidden,
69	cl::desc ("Allows loop nests to be peeled."));
70
71	static cl::opt<unsigned> UnrollPeelMaxCount(
72	"unroll-peel-max-count", cl::init(Val: `7`), cl::Hidden,
73	cl::desc ("Max average trip count which will cause loop peeling."));
74
75	static cl::opt<unsigned> UnrollForcePeelCount(
76	"unroll-force-peel-count", cl::init(Val: `0`), cl::Hidden,
77	cl::desc ("Force a peel count regardless of profiling information."));
78
79	static cl::opt<bool> DisableAdvancedPeeling(
80	"disable-advanced-peeling", cl::init(Val: false), cl::Hidden,
81	cl::desc (
82	"Disable advance peeling. Issues for convergent targets (D134803)."));
83
84	static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
85
86	// Check whether we are capable of peeling this loop.
87	bool llvm::canPeel(const Loop *L) {
88	// Make sure the loop is in simplified form
89	if (!L->isLoopSimplifyForm())
90	return false;
91	if (!DisableAdvancedPeeling)
92	return true;
93
94	SmallVector<BasicBlock *, `4`> Exits;
95	L->getUniqueNonLatchExitBlocks(ExitBlocks&: Exits);
96	// The latch must either be the only exiting block or all non-latch exit
97	// blocks have either a deopt or unreachable terminator or compose a chain of
98	// blocks where the last one is either deopt or unreachable terminated. Both
99	// deopt and unreachable terminators are a strong indication they are not
100	// taken. Note that this is a profitability check, not a legality check. Also
101	// note that LoopPeeling currently can only update the branch weights of latch
102	// blocks and branch weights to blocks with deopt or unreachable do not need
103	// updating.
104	return llvm::all_of(Range&: Exits, P: IsBlockFollowedByDeoptOrUnreachable);
105	}
106
107	namespace {
108
109	// As a loop is peeled, it may be the case that Phi nodes become
110	// loop-invariant (ie, known because there is only one choice).
111	// For example, consider the following function:
112	// void g(int);
113	// void binary() {
114	// int x = 0;
115	// int y = 0;
116	// int a = 0;
117	// for(int i = 0; i <100000; ++i) {
118	// g(x);
119	// x = y;
120	// g(a);
121	// y = a + 1;
122	// a = 5;
123	// }
124	// }
125	// Peeling 3 iterations is beneficial because the values for x, y and a
126	// become known. The IR for this loop looks something like the following:
127	//
128	// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
129	// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
130	// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
131	// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
132	// ...
133	// tail call void @_Z1gi(i32 signext %x)
134	// tail call void @_Z1gi(i32 signext %a)
135	// %add = add nuw nsw i32 %a, 1
136	// %inc = add nuw nsw i32 %i, 1
137	// %exitcond = icmp eq i32 %inc, 100000
138	// br i1 %exitcond, label %for.cond.cleanup, label %for.body
139	//
140	// The arguments for the calls to g will become known after 3 iterations
141	// of the loop, because the phi nodes values become known after 3 iterations
142	// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
143	// The first iteration has g(0), g(0); the second has g(0), g(5); the
144	// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
145	// Now consider the phi nodes:
146	// %a is a phi with constants so it is determined after iteration 1.
147	// %y is a phi based on a constant and %a so it is determined on
148	// the iteration after %a is determined, so iteration 2.
149	// %x is a phi based on a constant and %y so it is determined on
150	// the iteration after %y, so iteration 3.
151	// %i is based on itself (and is an induction variable) so it is
152	// never determined.
153	// This means that peeling off 3 iterations will result in being able to
154	// remove the phi nodes for %a, %y, and %x. The arguments for the
155	// corresponding calls to g are determined and the code for computing
156	// x, y, and a can be removed.
157	//
158	// The PhiAnalyzer class calculates how many times a loop should be
159	// peeled based on the above analysis of the phi nodes in the loop while
160	// respecting the maximum specified.
161	class PhiAnalyzer {
162	public:
163	PhiAnalyzer(const Loop &L, unsigned MaxIterations);
164
165	// Calculate the sufficient minimum number of iterations of the loop to peel
166	// such that phi instructions become determined (subject to allowable limits)
167	std::optional<unsigned> calculateIterationsToPeel();
168
169	protected:
170	using PeelCounter = std::optional<unsigned>;
171	const PeelCounter Unknown = std::nullopt;
172
173	// Add 1 respecting Unknown and return Unknown if result over MaxIterations
174	PeelCounter addOne(PeelCounter PC) const {
175	if (PC == Unknown)
176	return Unknown;
177	return (PC + `1` <= MaxIterations) ? PeelCounter {PC + `1`} : Unknown;
178	}
179
180	// Calculate the number of iterations after which the given value
181	// becomes an invariant.
182	PeelCounter calculate(const Value &);
183
184	const Loop &L;
185	const unsigned MaxIterations;
186
187	// Map of Values to number of iterations to invariance
188	SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
189	};
190
191	PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
192	: L(L), MaxIterations(MaxIterations) {
193	assert(canPeel(&L) && "loop is not suitable for peeling");
194	assert(MaxIterations > `0` && "no peeling is allowed?");
195	}
196
197	// This function calculates the number of iterations after which the value
198	// becomes an invariant. The pre-calculated values are memorized in a map.
199	// N.B. This number will be Unknown or <= MaxIterations.
200	// The function is calculated according to the following definition:
201	// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
202	// F(%x) = G(%y) + 1 (N.B. [MaxIterations \| Unknown] + 1 => Unknown)
203	// G(%y) = 0 if %y is a loop invariant
204	// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
205	// G(%y) = TODO: if %y is an expression based on phis and loop invariants
206	// The example looks like:
207	// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
208	// %y = phi(0, 5)
209	// %a = %y + 1
210	// G(%y) = Unknown otherwise (including phi not in header block)
211	PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
212	// If we already know the answer, take it from the map.
213	// Otherwise, place Unknown to map to avoid infinite recursion. Such
214	// cycles can never stop on an invariant.
215	auto [I, Inserted] = IterationsToInvariance.try_emplace(Key: &V, Args: Unknown);
216	if (!Inserted)
217	return I ->second;
218
219	if (L.isLoopInvariant(V: &V))
220	// Loop invariant so known at start.
221	return (IterationsToInvariance [&V] = `0`);
222	if (const PHINode *Phi = dyn_cast<PHINode>(Val: &V)) {
223	if (Phi->getParent() != L.getHeader()) {
224	// Phi is not in header block so Unknown.
225	assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
226	return Unknown;
227	}
228	// We need to analyze the input from the back edge and add 1.
229	Value *Input = Phi->getIncomingValueForBlock(BB: L.getLoopLatch());
230	PeelCounter Iterations = calculate(V: *Input);
231	assert(IterationsToInvariance[Input] == Iterations &&
232	"unexpected value saved");
233	return (IterationsToInvariance [Phi] = addOne(PC: Iterations));
234	}
235	if (const Instruction *I = dyn_cast<Instruction>(Val: &V)) {
236	if (isa<CmpInst>(Val: I) \|\| I->isBinaryOp()) {
237	// Binary instructions get the max of the operands.
238	PeelCounter LHS = calculate(V: *I->getOperand(i: `0`));
239	if (LHS == Unknown)
240	return Unknown;
241	PeelCounter RHS = calculate(V: *I->getOperand(i: `1`));
242	if (RHS == Unknown)
243	return Unknown;
244	return (IterationsToInvariance [I] = {std::max(a: LHS, b: RHS)});
245	}
246	if (I->isCast())
247	// Cast instructions get the value of the operand.
248	return (IterationsToInvariance [I] = calculate(V: *I->getOperand(i: `0`)));
249	}
250	// TODO: handle more expressions
251
252	// Everything else is Unknown.
253	assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
254	return Unknown;
255	}
256
257	std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
258	unsigned Iterations = `0`;
259	for (auto &PHI : L.getHeader()->phis()) {
260	PeelCounter ToInvariance = calculate(V: PHI);
261	if (ToInvariance != Unknown) {
262	assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
263	Iterations = std::max(a: Iterations, b: *ToInvariance);
264	if (Iterations == MaxIterations)
265	break;
266	}
267	}
268	assert((Iterations <= MaxIterations) && "bad result in phi analysis");
269	return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
270	}
271
272	} // unnamed namespace
273
274	// Try to find any invariant memory reads that will become dereferenceable in
275	// the remainder loop after peeling. The load must also be used (transitively)
276	// by an exit condition. Returns the number of iterations to peel off (at the
277	// moment either 0 or 1).
278	static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
279	DominatorTree &DT,
280	AssumptionCache *AC) {
281	// Skip loops with a single exiting block, because there should be no benefit
282	// for the heuristic below.
283	if (L.getExitingBlock())
284	return `0`;
285
286	// All non-latch exit blocks must have an UnreachableInst terminator.
287	// Otherwise the heuristic below may not be profitable.
288	SmallVector<BasicBlock *, `4`> Exits;
289	L.getUniqueNonLatchExitBlocks(ExitBlocks&: Exits);
290	if (any_of(Range&: Exits, P: [](const BasicBlock *BB) {
291	return !isa<UnreachableInst>(Val: BB->getTerminator());
292	}))
293	return `0`;
294
295	// Now look for invariant loads that dominate the latch and are not known to
296	// be dereferenceable. If there are such loads and no writes, they will become
297	// dereferenceable in the loop if the first iteration is peeled off. Also
298	// collect the set of instructions controlled by such loads. Only peel if an
299	// exit condition uses (transitively) such a load.
300	BasicBlock *Header = L.getHeader();
301	BasicBlock *Latch = L.getLoopLatch();
302	SmallPtrSet<Value *, `8`> LoadUsers;
303	const DataLayout &DL = L.getHeader()->getDataLayout();
304	for (BasicBlock *BB : L.blocks()) {
305	for (Instruction &I : *BB) {
306	if (I.mayWriteToMemory())
307	return `0`;
308
309	if (LoadUsers.contains(Ptr: &I))
310	LoadUsers.insert_range(R: I.users());
311	// Do not look for reads in the header; they can already be hoisted
312	// without peeling.
313	if (BB == Header)
314	continue;
315	if (auto *LI = dyn_cast<LoadInst>(Val: &I)) {
316	Value *Ptr = LI->getPointerOperand();
317	if (DT.dominates(A: BB, B: Latch) && L.isLoopInvariant(V: Ptr) &&
318	!isDereferenceablePointer(V: Ptr, Ty: LI->getType(), DL, CtxI: LI, AC, DT: &DT))
319	LoadUsers.insert_range(R: I.users());
320	}
321	}
322	}
323	SmallVector<BasicBlock *> ExitingBlocks;
324	L.getExitingBlocks(ExitingBlocks);
325	if (any_of(Range&: ExitingBlocks, P: [&LoadUsers](BasicBlock *Exiting) {
326	return LoadUsers.contains(Ptr: Exiting->getTerminator());
327	}))
328	return `1`;
329	return `0`;
330	}
331
332	bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
333	const SCEV *BTC = SE.getBackedgeTakenCount(L: &L);
334	if (isa<SCEVCouldNotCompute>(Val: BTC))
335	return false;
336
337	// Check if the exit condition of the loop can be adjusted by the peeling
338	// codegen. For now, it must
339	// exit via the latch,*
340	// the exit condition must be a NE/EQ compare of an induction with step*
341	// of 1 and must only be used by the exiting branch.
342	BasicBlock *Latch = L.getLoopLatch();
343	Value *Inc;
344	Value *Bound;
345	CmpPredicate Pred;
346	BasicBlock *Succ1;
347	BasicBlock *Succ2;
348	return Latch && Latch == L.getExitingBlock() &&
349	match(V: Latch->getTerminator(),
350	P: m_Br(C: m_OneUse(SubPattern: m_ICmp(Pred, L: m_Value(V&: Inc), R: m_Value(V&: Bound))),
351	T: m_BasicBlock(V&: Succ1), F: m_BasicBlock(V&: Succ2))) &&
352	((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) \|\|
353	(Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
354	Bound->getType()->isIntegerTy() &&
355	SE.isLoopInvariant(S: SE.getSCEV(V: Bound), L: &L) &&
356	match(S: SE.getSCEV(V: Inc),
357	P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L: &L)));
358	}
359
360	/// Returns true if the last iteration can be peeled off and the condition (Pred
361	/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
362	/// is known at the second-to-last.
363	static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
364	const SCEVAddRecExpr *LeftAR,
365	const SCEV *RightSCEV, ScalarEvolution &SE,
366	const TargetTransformInfo &TTI) {
367	if (!canPeelLastIteration(L, SE))
368	return false;
369
370	const SCEV *BTC = SE.getBackedgeTakenCount(L: &L);
371	SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
372	if (!SE.isKnownNonZero(S: BTC) &&
373	Expander.isHighCostExpansion(Exprs: BTC, L: &L, Budget: SCEVCheapExpansionBudget, TTI: &TTI,
374	At: L.getLoopPredecessor()->getTerminator()))
375	return false;
376
377	auto Guards = ScalarEvolution::LoopGuards::collect(L: &L, SE);
378	BTC = SE.applyLoopGuards(Expr: BTC, Guards);
379	RightSCEV = SE.applyLoopGuards(Expr: RightSCEV, Guards);
380	const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(It: BTC, SE);
381	const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
382	It: SE.getMinusSCEV(LHS: BTC, RHS: SE.getOne(Ty: BTC->getType())), SE);
383
384	return SE.isKnownPredicate(Pred: ICmpInst::getInversePredicate(pred: Pred), LHS: ValAtLastIter,
385	RHS: RightSCEV) &&
386	SE.isKnownPredicate(Pred, LHS: ValAtSecondToLastIter, RHS: RightSCEV);
387	}
388
389	// Return the number of iterations to peel off from the beginning and end of the
390	// loop respectively, that make conditions in the body true/false. For example,
391	// if we peel 2 iterations off the loop below, the condition i < 2 can be
392	// evaluated at compile time.
393	//
394	// for (i = 0; i < n; i++)
395	// if (i < 2)
396	// ..
397	// else
398	// ..
399	// }
400	static std::pair<unsigned, unsigned>
401	countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
402	const TargetTransformInfo &TTI) {
403	assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
404	unsigned DesiredPeelCount = `0`;
405	unsigned DesiredPeelCountLast = `0`;
406
407	// Do not peel the entire loop.
408	const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(L: &L);
409	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: BE))
410	MaxPeelCount =
411	std::min(a: (unsigned)SC->getAPInt().getLimitedValue() - `1`, b: MaxPeelCount);
412
413	// Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
414	// return true if inversed condition become known before reaching the
415	// MaxPeelCount limit.
416	auto PeelWhilePredicateIsKnown =
417	[&](unsigned &PeelCount, const SCEV &IterVal, const* SCEV *BoundSCEV,
418	const SCEV *Step, ICmpInst::Predicate Pred) {
419	while (PeelCount < MaxPeelCount &&
420	SE.isKnownPredicate(Pred, LHS: IterVal, RHS: BoundSCEV)) {
421	IterVal = SE.getAddExpr(LHS: IterVal, RHS: Step);
422	++PeelCount;
423	}
424	return SE.isKnownPredicate(Pred: ICmpInst::getInversePredicate(pred: Pred), LHS: IterVal,
425	RHS: BoundSCEV);
426	};
427
428	const unsigned MaxDepth = `4`;
429	std::function<void(Value , unsigned*)> ComputePeelCount =
430	[&](Value Condition, unsigned* Depth) -> void {
431	if (!Condition->getType()->isIntegerTy() \|\| Depth >= MaxDepth)
432	return;
433
434	Value LeftVal, RightVal;
435	if (match(V: Condition, P: m_And(L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal))) \|\|
436	match(V: Condition, P: m_Or(L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal)))) {
437	ComputePeelCount (LeftVal, Depth + `1`);
438	ComputePeelCount (RightVal, Depth + `1`);
439	return;
440	}
441
442	CmpPredicate Pred;
443	if (!match(V: Condition, P: m_ICmp(Pred, L: m_Value(V&: LeftVal), R: m_Value(V&: RightVal))))
444	return;
445
446	const SCEV *LeftSCEV = SE.getSCEV(V: LeftVal);
447	const SCEV *RightSCEV = SE.getSCEV(V: RightVal);
448
449	// Do not consider predicates that are known to be true or false
450	// independently of the loop iteration.
451	if (SE.evaluatePredicate(Pred, LHS: LeftSCEV, RHS: RightSCEV))
452	return;
453
454	// Check if we have a condition with one AddRec and one non AddRec
455	// expression. Normalize LeftSCEV to be the AddRec.
456	if (!isa<SCEVAddRecExpr>(Val: LeftSCEV)) {
457	if (isa<SCEVAddRecExpr>(Val: RightSCEV)) {
458	std::swap(a&: LeftSCEV, b&: RightSCEV);
459	Pred = ICmpInst::getSwappedPredicate(pred: Pred);
460	} else
461	return;
462	}
463
464	const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(Val: LeftSCEV);
465
466	// Avoid huge SCEV computations in the loop below, make sure we only
467	// consider AddRecs of the loop we are trying to peel.
468	if (!LeftAR->isAffine() \|\| LeftAR->getLoop() != &L)
469	return;
470	if (!(ICmpInst::isEquality(P: Pred) && LeftAR->hasNoSelfWrap()) &&
471	!SE.getMonotonicPredicateType(LHS: LeftAR, Pred))
472	return;
473
474	// Check if extending the current DesiredPeelCount lets us evaluate Pred
475	// or !Pred in the loop body statically.
476	unsigned NewPeelCount = DesiredPeelCount;
477
478	const SCEV *IterVal = LeftAR->evaluateAtIteration(
479	It: SE.getConstant(Ty: LeftSCEV->getType(), V: NewPeelCount), SE);
480
481	// If the original condition is not known, get the negated predicate
482	// (which holds on the else branch) and check if it is known. This allows
483	// us to peel of iterations that make the original condition false.
484	if (!SE.isKnownPredicate(Pred, LHS: IterVal, RHS: RightSCEV))
485	Pred = ICmpInst::getInversePredicate(pred: Pred);
486
487	const SCEV *Step = LeftAR->getStepRecurrence(SE);
488	if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
489	Pred)) {
490	if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
491	DesiredPeelCountLast = `1`;
492	return;
493	}
494
495	// However, for equality comparisons, that isn't always sufficient to
496	// eliminate the comparsion in loop body, we may need to peel one more
497	// iteration. See if that makes !Pred become unknown again.
498	const SCEV *NextIterVal = SE.getAddExpr(LHS: IterVal, RHS: Step);
499	if (ICmpInst::isEquality(P: Pred) &&
500	!SE.isKnownPredicate(Pred: ICmpInst::getInversePredicate(pred: Pred), LHS: NextIterVal,
501	RHS: RightSCEV) &&
502	!SE.isKnownPredicate(Pred, LHS: IterVal, RHS: RightSCEV) &&
503	SE.isKnownPredicate(Pred, LHS: NextIterVal, RHS: RightSCEV)) {
504	if (NewPeelCount >= MaxPeelCount)
505	return; // Need to peel one more iteration, but can't. Give up.
506	++NewPeelCount; // Great!
507	}
508
509	DesiredPeelCount = std::max(a: DesiredPeelCount, b: NewPeelCount);
510	DesiredPeelCountLast = std::max(a: DesiredPeelCountLast, b: NewPeelCount);
511	};
512
513	auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
514	if (!MinMax->getType()->isIntegerTy())
515	return;
516	Value LHS = MinMax->getLHS(), RHS = MinMax->getRHS();
517	const SCEV BoundSCEV, IterSCEV;
518	if (L.isLoopInvariant(V: LHS)) {
519	BoundSCEV = SE.getSCEV(V: LHS);
520	IterSCEV = SE.getSCEV(V: RHS);
521	} else if (L.isLoopInvariant(V: RHS)) {
522	BoundSCEV = SE.getSCEV(V: RHS);
523	IterSCEV = SE.getSCEV(V: LHS);
524	} else
525	return;
526	const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: IterSCEV);
527	// For simplicity, we support only affine recurrences.
528	if (!AddRec \|\| !AddRec->isAffine() \|\| AddRec->getLoop() != &L)
529	return;
530	const SCEV *Step = AddRec->getStepRecurrence(SE);
531	bool IsSigned = MinMax->isSigned();
532	// To minimize number of peeled iterations, we use strict relational
533	// predicates here.
534	ICmpInst::Predicate Pred;
535	if (SE.isKnownPositive(S: Step))
536	Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
537	else if (SE.isKnownNegative(S: Step))
538	Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
539	else
540	return;
541	// Check that AddRec is not wrapping.
542	if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
543	return;
544	unsigned NewPeelCount = DesiredPeelCount;
545	const SCEV *IterVal = AddRec->evaluateAtIteration(
546	It: SE.getConstant(Ty: AddRec->getType(), V: NewPeelCount), SE);
547	if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, BoundSCEV, Step,
548	Pred)) {
549	if (shouldPeelLastIteration(L, Pred, LeftAR: AddRec, RightSCEV: BoundSCEV, SE, TTI))
550	DesiredPeelCountLast = `1`;
551	return;
552	}
553	DesiredPeelCount = NewPeelCount;
554	};
555
556	for (BasicBlock *BB : L.blocks()) {
557	for (Instruction &I : *BB) {
558	if (SelectInst *SI = dyn_cast<SelectInst>(Val: &I))
559	ComputePeelCount (SI->getCondition(), `0`);
560	if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Val: &I))
561	ComputePeelCountMinMax (MinMax);
562	}
563
564	auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
565	if (!BI \|\| BI->isUnconditional())
566	continue;
567
568	// Ignore loop exit condition.
569	if (L.getLoopLatch() == BB)
570	continue;
571
572	ComputePeelCount (BI->getCondition(), `0`);
573	}
574
575	return {DesiredPeelCount, DesiredPeelCountLast};
576	}
577
578	/// This "heuristic" exactly matches implicit behavior which used to exist
579	/// inside getLoopEstimatedTripCount. It was added here to keep an
580	/// improvement inside that API from causing peeling to become more aggressive.
581	/// This should probably be removed.
582	static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
583	BasicBlock *Latch = L->getLoopLatch();
584	if (!Latch)
585	return true;
586
587	BranchInst *LatchBR = dyn_cast<BranchInst>(Val: Latch->getTerminator());
588	if (!LatchBR \|\| LatchBR->getNumSuccessors() != `2` \|\| !L->isLoopExiting(BB: Latch))
589	return true;
590
591	assert((LatchBR->getSuccessor(`0`) == L->getHeader() \|\|
592	LatchBR->getSuccessor(`1`) == L->getHeader()) &&
593	"At least one edge out of the latch must go to the header");
594
595	SmallVector<BasicBlock *, `4`> ExitBlocks;
596	L->getUniqueNonLatchExitBlocks(ExitBlocks);
597	return any_of(Range&: ExitBlocks, P: [](const BasicBlock *EB) {
598	return !EB->getTerminatingDeoptimizeCall();
599	});
600	}
601
602
603	// Return the number of iterations we want to peel off.
604	void llvm::computePeelCount(Loop L, unsigned* LoopSize,
605	TargetTransformInfo::PeelingPreferences &PP,
606	unsigned TripCount, DominatorTree &DT,
607	ScalarEvolution &SE, const TargetTransformInfo &TTI,
608	AssumptionCache AC, unsigned* Threshold) {
609	assert(LoopSize > `0` && "Zero loop size is not allowed!");
610	// Save the PP.PeelCount value set by the target in
611	// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
612	unsigned TargetPeelCount = PP.PeelCount;
613	PP.PeelCount = `0`;
614	PP.PeelLast = false;
615	if (!canPeel(L))
616	return;
617
618	// Only try to peel innermost loops by default.
619	// The constraint can be relaxed by the target in TTI.getPeelingPreferences
620	// or by the flag -unroll-allow-loop-nests-peeling.
621	if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
622	return;
623
624	// If the user provided a peel count, use that.
625	bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > `0`;
626	if (UserPeelCount) {
627	LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
628	<< " iterations.\n");
629	PP.PeelCount = UnrollForcePeelCount;
630	PP.PeelProfiledIterations = true;
631	return;
632	}
633
634	// Skip peeling if it's disabled.
635	if (!PP.AllowPeeling)
636	return;
637
638	// Check that we can peel at least one iteration.
639	if (`2` * LoopSize > Threshold)
640	return;
641
642	unsigned AlreadyPeeled = `0`;
643	if (auto Peeled = getOptionalIntLoopAttribute(TheLoop: L, Name: PeeledCountMetaData))
644	AlreadyPeeled = *Peeled;
645	// Stop if we already peeled off the maximum number of iterations.
646	if (AlreadyPeeled >= UnrollPeelMaxCount)
647	return;
648
649	// Pay respect to limitations implied by loop size and the max peel count.
650	unsigned MaxPeelCount = UnrollPeelMaxCount;
651	MaxPeelCount = std::min(a: MaxPeelCount, b: Threshold / LoopSize - `1`);
652
653	// Start the max computation with the PP.PeelCount value set by the target
654	// in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
655	unsigned DesiredPeelCount = TargetPeelCount;
656
657	// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
658	// iterations of the loop. For this we compute the number for iterations after
659	// which every Phi is guaranteed to become an invariant, and try to peel the
660	// maximum number of iterations among these values, thus turning all those
661	// Phis into invariants.
662	if (MaxPeelCount > DesiredPeelCount) {
663	// Check how many iterations are useful for resolving Phis
664	auto NumPeels = PhiAnalyzer (*L, MaxPeelCount).calculateIterationsToPeel();
665	if (NumPeels)
666	DesiredPeelCount = std::max(a: DesiredPeelCount, b: *NumPeels);
667	}
668
669	const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
670	countToEliminateCompares(L&: *L, MaxPeelCount, SE, TTI);
671	DesiredPeelCount = std::max(a: DesiredPeelCount, b: CountToEliminateCmps);
672
673	if (DesiredPeelCount == `0`)
674	DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(L&: *L, DT, AC);
675
676	if (DesiredPeelCount > `0`) {
677	DesiredPeelCount = std::min(a: DesiredPeelCount, b: MaxPeelCount);
678	// Consider max peel count limitation.
679	assert(DesiredPeelCount > `0` && "Wrong loop size estimation?");
680	if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
681	LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
682	<< " iteration(s) to turn"
683	<< " some Phis into invariants.\n");
684	PP.PeelCount = DesiredPeelCount;
685	PP.PeelProfiledIterations = false;
686	PP.PeelLast = false;
687	return;
688	}
689	}
690
691	if (CountToEliminateCmpsLast > `0`) {
692	unsigned DesiredPeelCountLast =
693	std::min(a: CountToEliminateCmpsLast, b: MaxPeelCount);
694	// Consider max peel count limitation.
695	assert(DesiredPeelCountLast > `0` && "Wrong loop size estimation?");
696	if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
697	LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
698	<< " iteration(s) to turn"
699	<< " some Phis into invariants.\n");
700	PP.PeelCount = DesiredPeelCountLast;
701	PP.PeelProfiledIterations = false;
702	PP.PeelLast = true;
703	return;
704	}
705	}
706
707	// Bail if we know the statically calculated trip count.
708	// In this case we rather prefer partial unrolling.
709	if (TripCount)
710	return;
711
712	// Do not apply profile base peeling if it is disabled.
713	if (!PP.PeelProfiledIterations)
714	return;
715	// If we don't know the trip count, but have reason to believe the average
716	// trip count is low, peeling should be beneficial, since we will usually
717	// hit the peeled section.
718	// We only do this in the presence of profile information, since otherwise
719	// our estimates of the trip count are not reliable enough.
720	if (L->getHeader()->getParent()->hasProfileData()) {
721	if (violatesLegacyMultiExitLoopCheck(L))
722	return;
723	std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
724	if (!EstimatedTripCount)
725	return;
726
727	LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
728	<< *EstimatedTripCount << "\n");
729
730	if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
731	unsigned PeelCount = *EstimatedTripCount;
732	LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
733	PP.PeelCount = PeelCount;
734	return;
735	}
736	LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
737	LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
738	LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
739	LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
740	LLVM_DEBUG(dbgs() << "Max peel count by cost: "
741	<< (Threshold / LoopSize - `1`) << "\n");
742	}
743	}
744
745	struct WeightInfo {
746	// Weights for current iteration.
747	SmallVector<uint32_t> Weights;
748	// Weights to subtract after each iteration.
749	const SmallVector<uint32_t> SubWeights;
750	};
751
752	/// Update the branch weights of an exiting block of a peeled-off loop
753	/// iteration.
754	/// Let F is a weight of the edge to continue (fallthrough) into the loop.
755	/// Let E is a weight of the edge to an exit.
756	/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
757	/// go to exit.
758	/// Then, Estimated ExitCount = F / E.
759	/// For I-th (counting from 0) peeled off iteration we set the weights for
760	/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
761	/// The probability to go to exit 1/(EC-I) increases. At the same time
762	/// the estimated exit count in the remainder loop reduces by I.
763	/// To avoid dealing with division rounding we can just multiple both part
764	/// of weights to E and use weight as (F - I E, E).*
765	static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
766	setBranchWeights(I&: Term, Weights: Info.Weights, /IsExpected=/*false);
767	for (auto [Idx, SubWeight] : enumerate(First: Info.SubWeights))
768	if (SubWeight != `0`)
769	// Don't set the probability of taking the edge from latch to loop header
770	// to less than 1:1 ratio (meaning Weight should not be lower than
771	// SubWeight), as this could significantly reduce the loop's hotness,
772	// which would be incorrect in the case of underestimating the trip count.
773	Info.Weights [Idx] =
774	Info.Weights [Idx] > SubWeight
775	? std::max(a: Info.Weights [Idx] - SubWeight, b: SubWeight)
776	: SubWeight;
777	}
778
779	/// Initialize the weights for all exiting blocks.
780	static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
781	Loop *L) {
782	SmallVector<BasicBlock *> ExitingBlocks;
783	L->getExitingBlocks(ExitingBlocks);
784	for (BasicBlock *ExitingBlock : ExitingBlocks) {
785	Instruction *Term = ExitingBlock->getTerminator();
786	SmallVector<uint32_t> Weights;
787	if (!extractBranchWeights(I: *Term, Weights))
788	continue;
789
790	// See the comment on updateBranchWeights() for an explanation of what we
791	// do here.
792	uint32_t FallThroughWeights = `0`;
793	uint32_t ExitWeights = `0`;
794	for (auto [Succ, Weight] : zip(t: successors(I: Term), u&: Weights)) {
795	if (L->contains(BB: Succ))
796	FallThroughWeights += Weight;
797	else
798	ExitWeights += Weight;
799	}
800
801	// Don't try to update weights for degenerate case.
802	if (FallThroughWeights == `0`)
803	continue;
804
805	SmallVector<uint32_t> SubWeights;
806	for (auto [Succ, Weight] : zip(t: successors(I: Term), u&: Weights)) {
807	if (!L->contains(BB: Succ)) {
808	// Exit weights stay the same.
809	SubWeights.push_back(Elt: `0`);
810	continue;
811	}
812
813	// Subtract exit weights on each iteration, distributed across all
814	// fallthrough edges.
815	double W = (double)Weight / (double)FallThroughWeights;
816	SubWeights.push_back(Elt: (uint32_t)(ExitWeights * W));
817	}
818
819	WeightInfos.insert(KV: {Term, {.Weights: std::move(Weights), .SubWeights: std::move(SubWeights)}});
820	}
821	}
822
823	/// Clones the body of the loop L, putting it between \p InsertTop and \p
824	/// InsertBot.
825	/// \param IterNumber The serial number of the iteration currently being
826	/// peeled off.
827	/// \param PeelLast Peel off the last iterations from \p L.
828	/// \param ExitEdges The exit edges of the original loop.
829	/// \param[out] NewBlocks A list of the blocks in the newly created clone
830	/// \param[out] VMap The value map between the loop and the new clone.
831	/// \param LoopBlocks A helper for DFS-traversal of the loop.
832	/// \param LVMap A value-map that maps instructions from the original loop to
833	/// instructions in the last peeled-off iteration.
834	static void cloneLoopBlocks(
835	Loop L, unsigned* IterNumber, bool PeelLast, BasicBlock *InsertTop,
836	BasicBlock InsertBot, BasicBlock OrigPreHeader,
837	SmallVectorImpl<std::pair<BasicBlock , BasicBlock >> &ExitEdges,
838	SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
839	ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
840	LoopInfo LI, ArrayRef<MDNode > LoopLocalNoAliasDeclScopes,
841	ScalarEvolution &SE) {
842	BasicBlock *Header = L->getHeader();
843	BasicBlock *Latch = L->getLoopLatch();
844	BasicBlock *PreHeader = L->getLoopPreheader();
845
846	Function *F = Header->getParent();
847	LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
848	LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
849	Loop *ParentLoop = L->getParentLoop();
850
851	// For each block in the original loop, create a new copy,
852	// and update the value map with the newly created values.
853	for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
854	BasicBlock NewBB = CloneBasicBlock(BB: BB, VMap, NameSuffix: ".peel", F);
855	NewBlocks.push_back(Elt: NewBB);
856
857	// If an original block is an immediate child of the loop L, its copy
858	// is a child of a ParentLoop after peeling. If a block is a child of
859	// a nested loop, it is handled in the cloneLoop() call below.
860	if (ParentLoop && LI->getLoopFor(BB: *BB) == L)
861	ParentLoop->addBasicBlockToLoop(NewBB, LI&: *LI);
862
863	VMap [*BB] = NewBB;
864
865	// If dominator tree is available, insert nodes to represent cloned blocks.
866	if (DT) {
867	if (Header == *BB)
868	DT->addNewBlock(BB: NewBB, DomBB: InsertTop);
869	else {
870	DomTreeNode IDom = DT->getNode(BB: BB)->getIDom();
871	// VMap must contain entry for IDom, as the iteration order is RPO.
872	DT->addNewBlock(BB: NewBB, DomBB: cast<BasicBlock>(Val&: VMap [IDom->getBlock()]));
873	}
874	}
875	}
876
877	{
878	// Identify what other metadata depends on the cloned version. After
879	// cloning, replace the metadata with the corrected version for both
880	// memory instructions and noalias intrinsics.
881	std::string Ext = (Twine ("Peel") + Twine (IterNumber)).str();
882	cloneAndAdaptNoAliasScopes(NoAliasDeclScopes: LoopLocalNoAliasDeclScopes, NewBlocks,
883	Context&: Header->getContext(), Ext);
884	}
885
886	// Recursively create the new Loop objects for nested loops, if any,
887	// to preserve LoopInfo.
888	for (Loop ChildLoop : L) {
889	cloneLoop(L: ChildLoop, PL: ParentLoop, VM&: VMap, LI, LPM: nullptr);
890	}
891
892	// Hook-up the control flow for the newly inserted blocks.
893	// The new header is hooked up directly to the "top", which is either
894	// the original loop preheader (for the first iteration) or the previous
895	// iteration's exiting block (for every other iteration)
896	InsertTop->getTerminator()->setSuccessor(Idx: `0`, BB: cast<BasicBlock>(Val&: VMap [Header]));
897
898	// Similarly, for the latch:
899	// The original exiting edge is still hooked up to the loop exit.
900	BasicBlock *NewLatch = cast<BasicBlock>(Val&: VMap [Latch]);
901	if (PeelLast) {
902	// This is the last iteration and we definitely will go to the exit. Just
903	// set both successors to InsertBot and let the branch be simplified later.
904	assert(IterNumber == `0` && "Only peeling a single iteration implemented.");
905	auto *LatchTerm = cast<BranchInst>(Val: NewLatch->getTerminator());
906	LatchTerm->setSuccessor(idx: `0`, NewSucc: InsertBot);
907	LatchTerm->setSuccessor(idx: `1`, NewSucc: InsertBot);
908	} else {
909	auto *LatchTerm = cast<Instruction>(Val: NewLatch->getTerminator());
910	// The backedge now goes to the "bottom", which is either the loop's real
911	// header (for the last peeled iteration) or the copied header of the next
912	// iteration (for every other iteration)
913	for (unsigned idx = `0`, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
914	if (LatchTerm->getSuccessor(Idx: idx) == Header) {
915	LatchTerm->setSuccessor(Idx: idx, BB: InsertBot);
916	break;
917	}
918	}
919	}
920	if (DT)
921	DT->changeImmediateDominator(BB: InsertBot, NewBB: NewLatch);
922
923	// The new copy of the loop body starts with a bunch of PHI nodes
924	// that pick an incoming value from either the preheader, or the previous
925	// loop iteration. Since this copy is no longer part of the loop, we
926	// resolve this statically:
927	if (PeelLast) {
928	// For the last iteration, we introduce new phis for each header phi in
929	// InsertTop, using the incoming value from the preheader for the original
930	// preheader (when skipping the main loop) and the incoming value from the
931	// latch for the latch (when continuing from the main loop).
932	IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
933	for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
934	PHINode NewPHI = cast<PHINode>(Val&: VMap [&I]);
935	PHINode *PN = B.CreatePHI(Ty: NewPHI->getType(), NumReservedValues: `2`);
936	NewPHI->eraseFromParent();
937	if (OrigPreHeader)
938	PN->addIncoming(V: cast<PHINode>(Val: &*I)->getIncomingValueForBlock(BB: PreHeader),
939	BB: OrigPreHeader);
940
941	PN->addIncoming(V: cast<PHINode>(Val: &*I)->getIncomingValueForBlock(BB: Latch),
942	BB: Latch);
943	VMap [&*I] = PN;
944	}
945	} else {
946	// For the first iteration, we use the value from the preheader directly.
947	// For any other iteration, we replace the phi with the value generated by
948	// the immediately preceding clone of the loop body (which represents
949	// the previous iteration).
950	for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
951	PHINode NewPHI = cast<PHINode>(Val&: VMap [&I]);
952	if (IterNumber == `0`) {
953	VMap [&*I] = NewPHI->getIncomingValueForBlock(BB: PreHeader);
954	} else {
955	Value *LatchVal = NewPHI->getIncomingValueForBlock(BB: Latch);
956	Instruction *LatchInst = dyn_cast<Instruction>(Val: LatchVal);
957	if (LatchInst && L->contains(Inst: LatchInst))
958	VMap [&*I] = LVMap [LatchInst];
959	else
960	VMap [&*I] = LatchVal;
961	}
962	NewPHI->eraseFromParent();
963	}
964	}
965
966	// Fix up the outgoing values - we need to add a value for the iteration
967	// we've just created. Note that this must happen after* the incoming*
968	// values are adjusted, since the value going out of the latch may also be
969	// a value coming into the header.
970	for (auto Edge : ExitEdges)
971	for (PHINode &PHI : Edge.second->phis()) {
972	Value *LatchVal = PHI.getIncomingValueForBlock(BB: Edge.first);
973	Instruction *LatchInst = dyn_cast<Instruction>(Val: LatchVal);
974	if (LatchInst && L->contains(Inst: LatchInst))
975	LatchVal = VMap [LatchVal];
976	PHI.addIncoming(V: LatchVal, BB: cast<BasicBlock>(Val&: VMap [Edge.first]));
977	SE.forgetLcssaPhiWithNewPredecessor(L, V: &PHI);
978	}
979
980	// LastValueMap is updated with the values for the current loop
981	// which are used the next time this function is called.
982	for (auto KV : VMap)
983	LVMap [KV.first] = KV.second;
984	}
985
986	TargetTransformInfo::PeelingPreferences
987	llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
988	const TargetTransformInfo &TTI,
989	std::optional<bool> UserAllowPeeling,
990	std::optional<bool> UserAllowProfileBasedPeeling,
991	bool UnrollingSpecficValues) {
992	TargetTransformInfo::PeelingPreferences PP;
993
994	// Set the default values.
995	PP.PeelCount = `0`;
996	PP.AllowPeeling = true;
997	PP.AllowLoopNestsPeeling = false;
998	PP.PeelLast = false;
999	PP.PeelProfiledIterations = true;
1000
1001	// Get the target specifc values.
1002	TTI.getPeelingPreferences(L, SE, PP);
1003
1004	// User specified values using cl::opt.
1005	if (UnrollingSpecficValues) {
1006	if (UnrollPeelCount.getNumOccurrences() > `0`)
1007	PP.PeelCount = UnrollPeelCount;
1008	if (UnrollAllowPeeling.getNumOccurrences() > `0`)
1009	PP.AllowPeeling = UnrollAllowPeeling;
1010	if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > `0`)
1011	PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
1012	}
1013
1014	// User specifed values provided by argument.
1015	if (UserAllowPeeling)
1016	PP.AllowPeeling = *UserAllowPeeling;
1017	if (UserAllowProfileBasedPeeling)
1018	PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1019
1020	return PP;
1021	}
1022
1023	/// Peel off the first \p PeelCount iterations of loop \p L.
1024	///
1025	/// Note that this does not peel them off as a single straight-line block.
1026	/// Rather, each iteration is peeled off separately, and needs to check the
1027	/// exit condition.
1028	/// For loops that dynamically execute \p PeelCount iterations or less
1029	/// this provides a benefit, since the peeled off iterations, which account
1030	/// for the bulk of dynamic execution, can be further simplified by scalar
1031	/// optimizations.
1032	bool llvm::peelLoop(Loop L, unsigned* PeelCount, bool PeelLast, LoopInfo *LI,
1033	ScalarEvolution SE, DominatorTree &DT, AssumptionCache AC,
1034	bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1035	assert(PeelCount > `0` && "Attempt to peel out zero iterations?");
1036	assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1037	assert((!PeelLast \|\| (canPeelLastIteration(L, SE) && PeelCount == `1`)) &&
1038	"when peeling the last iteration, the loop must be supported and can "
1039	"only peel a single iteration");
1040
1041	LoopBlocksDFS LoopBlocks(L);
1042	LoopBlocks.perform(LI);
1043
1044	BasicBlock *Header = L->getHeader();
1045	BasicBlock *PreHeader = L->getLoopPreheader();
1046	BasicBlock *Latch = L->getLoopLatch();
1047	SmallVector<std::pair<BasicBlock , BasicBlock >, `4`> ExitEdges;
1048	L->getExitEdges(ExitEdges);
1049
1050	// Remember dominators of blocks we might reach through exits to change them
1051	// later. Immediate dominator of such block might change, because we add more
1052	// routes which can lead to the exit: we can reach it from the peeled
1053	// iterations too.
1054	DenseMap<BasicBlock , BasicBlock > NonLoopBlocksIDom;
1055	for (auto *BB : L->blocks()) {
1056	auto *BBDomNode = DT.getNode(BB);
1057	SmallVector<BasicBlock *, `16`> ChildrenToUpdate;
1058	for (auto *ChildDomNode : BBDomNode->children()) {
1059	auto *ChildBB = ChildDomNode->getBlock();
1060	if (!L->contains(BB: ChildBB))
1061	ChildrenToUpdate.push_back(Elt: ChildBB);
1062	}
1063	// The new idom of the block will be the nearest common dominator
1064	// of all copies of the previous idom. This is equivalent to the
1065	// nearest common dominator of the previous idom and the first latch,
1066	// which dominates all copies of the previous idom.
1067	BasicBlock *NewIDom = DT.findNearestCommonDominator(A: BB, B: Latch);
1068	for (auto *ChildBB : ChildrenToUpdate)
1069	NonLoopBlocksIDom [ChildBB] = NewIDom;
1070	}
1071
1072	Function *F = Header->getParent();
1073
1074	// Set up all the necessary basic blocks.
1075	BasicBlock *InsertTop;
1076	BasicBlock *InsertBot;
1077	BasicBlock NewPreHeader = nullptr*;
1078	DenseMap<Instruction , Value > ExitValues;
1079	if (PeelLast) {
1080	// It is convenient to split the single exit block from the latch the
1081	// into 3 parts - two blocks to anchor the peeled copy of the loop body,
1082	// and a new final exit block.
1083
1084	// Peeling the last iteration transforms.
1085	//
1086	// PreHeader:
1087	// ...
1088	// Header:
1089	// LoopBody
1090	// If (cond) goto Header
1091	// Exit:
1092	//
1093	// into
1094	//
1095	// Header:
1096	// LoopBody
1097	// If (cond) goto Header
1098	// InsertTop:
1099	// LoopBody
1100	// If (!cond) goto InsertBot
1101	// InsertBot:
1102	// Exit:
1103	// ...
1104	BasicBlock *Exit = L->getExitBlock();
1105	for (PHINode &P : Exit->phis())
1106	ExitValues [&P] = P.getIncomingValueForBlock(BB: Latch);
1107
1108	const SCEV *BTC = SE->getBackedgeTakenCount(L);
1109
1110	InsertTop = SplitEdge(From: Latch, To: Exit, DT: &DT, LI);
1111	InsertBot = SplitBlock(Old: InsertTop, SplitPt: InsertTop->getTerminator(), DT: &DT, LI);
1112
1113	InsertTop->setName(Exit->getName() + ".peel.begin");
1114	InsertBot->setName(Exit->getName() + ".peel.next");
1115	NewPreHeader = nullptr;
1116
1117	// If the original loop may only execute a single iteration we need to
1118	// insert a trip count check and skip the original loop with the last
1119	// iteration peeled off if necessary.
1120	if (!SE->isKnownNonZero(S: BTC)) {
1121	NewPreHeader = SplitEdge(From: PreHeader, To: Header, DT: &DT, LI);
1122	SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
1123
1124	BranchInst *PreHeaderBR = cast<BranchInst>(Val: PreHeader->getTerminator());
1125	Value *BTCValue =
1126	Expander.expandCodeFor(SH: BTC, Ty: BTC->getType(), I: PreHeaderBR);
1127	IRBuilder<> B(PreHeaderBR);
1128	Value *Cond =
1129	B.CreateICmpNE(LHS: BTCValue, RHS: ConstantInt::get(Ty: BTCValue->getType(), V: `0`));
1130	B.CreateCondBr(Cond, True: NewPreHeader, False: InsertTop);
1131	PreHeaderBR->eraseFromParent();
1132
1133	// PreHeader now dominates InsertTop.
1134	DT.changeImmediateDominator(BB: InsertTop, NewBB: PreHeader);
1135	}
1136	} else {
1137	// It is convenient to split the preheader into 3 parts - two blocks to
1138	// anchor the peeled copy of the loop body, and a new preheader for the
1139	// "real" loop.
1140
1141	// Peeling the first iteration transforms.
1142	//
1143	// PreHeader:
1144	// ...
1145	// Header:
1146	// LoopBody
1147	// If (cond) goto Header
1148	// Exit:
1149	//
1150	// into
1151	//
1152	// InsertTop:
1153	// LoopBody
1154	// If (!cond) goto Exit
1155	// InsertBot:
1156	// NewPreHeader:
1157	// ...
1158	// Header:
1159	// LoopBody
1160	// If (cond) goto Header
1161	// Exit:
1162	//
1163	// Each following iteration will split the current bottom anchor in two,
1164	// and put the new copy of the loop body between these two blocks. That
1165	// is, after peeling another iteration from the example above, we'll
1166	// split InsertBot, and get:
1167	//
1168	// InsertTop:
1169	// LoopBody
1170	// If (!cond) goto Exit
1171	// InsertBot:
1172	// LoopBody
1173	// If (!cond) goto Exit
1174	// InsertBot.next:
1175	// NewPreHeader:
1176	// ...
1177	// Header:
1178	// LoopBody
1179	// If (cond) goto Header
1180	// Exit:
1181	//
1182	InsertTop = SplitEdge(From: PreHeader, To: Header, DT: &DT, LI);
1183	InsertBot = SplitBlock(Old: InsertTop, SplitPt: InsertTop->getTerminator(), DT: &DT, LI);
1184	NewPreHeader = SplitBlock(Old: InsertBot, SplitPt: InsertBot->getTerminator(), DT: &DT, LI);
1185
1186	InsertTop->setName(Header->getName() + ".peel.begin");
1187	InsertBot->setName(Header->getName() + ".peel.next");
1188	NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1189	}
1190
1191	Instruction *LatchTerm =
1192	cast<Instruction>(Val: cast<BasicBlock>(Val: Latch)->getTerminator());
1193
1194	// If we have branch weight information, we'll want to update it for the
1195	// newly created branches.
1196	DenseMap<Instruction *, WeightInfo> Weights;
1197	initBranchWeights(WeightInfos&: Weights, L);
1198
1199	// Identify what noalias metadata is inside the loop: if it is inside the
1200	// loop, the associated metadata must be cloned for each iteration.
1201	SmallVector<MDNode *, `6`> LoopLocalNoAliasDeclScopes;
1202	identifyNoAliasScopesToClone(BBs: L->getBlocks(), NoAliasDeclScopes&: LoopLocalNoAliasDeclScopes);
1203
1204	// For each peeled-off iteration, make a copy of the loop.
1205	ValueToValueMapTy VMap;
1206	for (unsigned Iter = `0`; Iter < PeelCount; ++Iter) {
1207	SmallVector<BasicBlock *, `8`> NewBlocks;
1208
1209	cloneLoopBlocks(L, IterNumber: Iter, PeelLast, InsertTop, InsertBot,
1210	OrigPreHeader: NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1211	LoopBlocks, VMap, LVMap, DT: &DT, LI,
1212	LoopLocalNoAliasDeclScopes, SE&: *SE);
1213
1214	// Remap to use values from the current iteration instead of the
1215	// previous one.
1216	remapInstructionsInBlocks(Blocks: NewBlocks, VMap);
1217
1218	if (Iter == `0`) {
1219	if (PeelLast) {
1220	// Adjust the exit condition so the loop exits one iteration early.
1221	// For now we simply subtract one form the second operand of the
1222	// exit condition. This relies on the peel count computation to
1223	// check that this is actually legal. In particular, it ensures that
1224	// the first operand of the compare is an AddRec with step 1 and we
1225	// execute more than one iteration.
1226	auto *Cmp =
1227	cast<ICmpInst>(Val: L->getLoopLatch()->getTerminator()->getOperand(i: `0`));
1228	IRBuilder B(Cmp);
1229	Cmp->setOperand(
1230	i_nocapture: `1`, Val_nocapture: B.CreateSub(LHS: Cmp->getOperand(i_nocapture: `1`),
1231	RHS: ConstantInt::get(Ty: Cmp->getOperand(i_nocapture: `1`)->getType(), V: `1`)));
1232	} else {
1233	// Update IDoms of the blocks reachable through exits.
1234	for (auto BBIDom : NonLoopBlocksIDom)
1235	DT.changeImmediateDominator(BB: BBIDom.first,
1236	NewBB: cast<BasicBlock>(Val&: LVMap [BBIDom.second]));
1237	}
1238	}
1239
1240	#ifdef EXPENSIVE_CHECKS
1241	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1242	#endif
1243
1244	for (auto &[Term, Info] : Weights) {
1245	auto *TermCopy = cast<Instruction>(Val&: VMap [Term]);
1246	updateBranchWeights(Term: TermCopy, Info);
1247	}
1248
1249	// Remove Loop metadata from the latch branch instruction
1250	// because it is not the Loop's latch branch anymore.
1251	auto *LatchTermCopy = cast<Instruction>(Val&: VMap [LatchTerm]);
1252	LatchTermCopy->setMetadata(KindID: LLVMContext::MD_loop, Node: nullptr);
1253
1254	InsertTop = InsertBot;
1255	InsertBot = SplitBlock(Old: InsertBot, SplitPt: InsertBot->getTerminator(), DT: &DT, LI);
1256	InsertBot->setName(Header->getName() + ".peel.next");
1257
1258	F->splice(ToIt: InsertTop->getIterator(), FromF: F, FromBeginIt: NewBlocks [`0`]->getIterator(),
1259	FromEndIt: F->end());
1260	}
1261
1262	if (PeelLast) {
1263	// Now adjust users of the original exit values by replacing them with the
1264	// exit value from the peeled iteration and remove them.
1265	for (const auto &[P, E] : ExitValues) {
1266	Instruction *ExitInst = dyn_cast<Instruction>(Val: E);
1267	if (ExitInst && L->contains(Inst: ExitInst))
1268	P->replaceAllUsesWith(V: &*VMap [ExitInst]);
1269	else
1270	P->replaceAllUsesWith(V: E);
1271	P->eraseFromParent();
1272	}
1273	formLCSSA(L&: *L, DT, LI, SE);
1274	} else {
1275	// Now adjust the phi nodes in the loop header to get their initial values
1276	// from the last peeled-off iteration instead of the preheader.
1277	for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
1278	PHINode *PHI = cast<PHINode>(Val&: I);
1279	Value *NewVal = PHI->getIncomingValueForBlock(BB: Latch);
1280	Instruction *LatchInst = dyn_cast<Instruction>(Val: NewVal);
1281	if (LatchInst && L->contains(Inst: LatchInst))
1282	NewVal = LVMap [LatchInst];
1283
1284	PHI->setIncomingValueForBlock(BB: NewPreHeader, V: NewVal);
1285	}
1286	}
1287
1288	for (const auto &[Term, Info] : Weights) {
1289	setBranchWeights(I&: Term, Weights: Info.Weights, /IsExpected=/*false);
1290	}
1291
1292	// Update Metadata for count of peeled off iterations.
1293	unsigned AlreadyPeeled = `0`;
1294	if (auto Peeled = getOptionalIntLoopAttribute(TheLoop: L, Name: PeeledCountMetaData))
1295	AlreadyPeeled = *Peeled;
1296	addStringMetadataToLoop(TheLoop: L, MDString: PeeledCountMetaData, V: AlreadyPeeled + PeelCount);
1297
1298	if (Loop *ParentLoop = L->getParentLoop())
1299	L = ParentLoop;
1300
1301	// We modified the loop, update SE.
1302	SE->forgetTopmostLoop(L);
1303	SE->forgetBlockAndLoopDispositions();
1304
1305	#ifdef EXPENSIVE_CHECKS
1306	// Finally DomtTree must be correct.
1307	assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1308	#endif
1309
1310	// FIXME: Incrementally update loop-simplify
1311	simplifyLoop(L, DT: &DT, LI, SE, AC, MSSAU: nullptr, PreserveLCSSA);
1312
1313	NumPeeled ++;
1314	NumPeeledEnd += PeelLast;
1315
1316	return true;
1317	}
1318

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/LoopPeel.cpp