IndVarSimplify.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp]

1	//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into simpler forms suitable for subsequent
11	// analysis and transformation.
12	//
13	// If the trip count of a loop is computable, this pass also makes the following
14	// changes:
15	// 1. The exit condition for the loop is canonicalized to compare the
16	// induction value against the exit value. This turns loops like:
17	// 'for (i = 7; ii < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'*
18	// 2. Any use outside of the loop of an expression derived from the indvar
19	// is changed to compute the derived value outside of the loop, eliminating
20	// the dependence on the exit value of the induction variable. If the only
21	// purpose of the loop is to compute the exit value of some derived
22	// expression, this transformation will make the loop dead.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27	#include "llvm/ADT/APFloat.h"
28	#include "llvm/ADT/ArrayRef.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/SmallPtrSet.h"
31	#include "llvm/ADT/SmallSet.h"
32	#include "llvm/ADT/SmallVector.h"
33	#include "llvm/ADT/Statistic.h"
34	#include "llvm/ADT/iterator_range.h"
35	#include "llvm/Analysis/LoopInfo.h"
36	#include "llvm/Analysis/LoopPass.h"
37	#include "llvm/Analysis/MemorySSA.h"
38	#include "llvm/Analysis/MemorySSAUpdater.h"
39	#include "llvm/Analysis/ScalarEvolution.h"
40	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
42	#include "llvm/Analysis/TargetLibraryInfo.h"
43	#include "llvm/Analysis/TargetTransformInfo.h"
44	#include "llvm/Analysis/ValueTracking.h"
45	#include "llvm/IR/BasicBlock.h"
46	#include "llvm/IR/Constant.h"
47	#include "llvm/IR/ConstantRange.h"
48	#include "llvm/IR/Constants.h"
49	#include "llvm/IR/DataLayout.h"
50	#include "llvm/IR/DerivedTypes.h"
51	#include "llvm/IR/Dominators.h"
52	#include "llvm/IR/Function.h"
53	#include "llvm/IR/IRBuilder.h"
54	#include "llvm/IR/InstrTypes.h"
55	#include "llvm/IR/Instruction.h"
56	#include "llvm/IR/Instructions.h"
57	#include "llvm/IR/Intrinsics.h"
58	#include "llvm/IR/PassManager.h"
59	#include "llvm/IR/PatternMatch.h"
60	#include "llvm/IR/Type.h"
61	#include "llvm/IR/Use.h"
62	#include "llvm/IR/User.h"
63	#include "llvm/IR/Value.h"
64	#include "llvm/IR/ValueHandle.h"
65	#include "llvm/Support/Casting.h"
66	#include "llvm/Support/CommandLine.h"
67	#include "llvm/Support/Debug.h"
68	#include "llvm/Support/MathExtras.h"
69	#include "llvm/Support/raw_ostream.h"
70	#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
72	#include "llvm/Transforms/Utils/Local.h"
73	#include "llvm/Transforms/Utils/LoopUtils.h"
74	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75	#include "llvm/Transforms/Utils/SimplifyIndVar.h"
76	#include <cassert>
77	#include <cstdint>
78	#include <utility>
79
80	using namespace llvm;
81	using namespace PatternMatch;
82	using namespace SCEVPatternMatch;
83
84	#define DEBUG_TYPE "indvars"
85
86	STATISTIC(NumWidened , "Number of indvars widened");
87	STATISTIC(NumReplaced , "Number of exit values replaced");
88	STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89	STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90	STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
92	static cl::opt<ReplaceExitVal> ReplaceExitValue(
93	"replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
94	cl::desc ("Choose the strategy to replace exit value in IndVarSimplify"),
95	cl::values(
96	clEnumValN(NeverRepl, "never", "never replace exit value"),
97	clEnumValN(OnlyCheapRepl, "cheap",
98	"only replace exit value when the cost is cheap"),
99	clEnumValN(
100	UnusedIndVarInLoop, "unusedindvarinloop",
101	"only replace exit value when it is an unused "
102	"induction variable in the loop and has cheap replacement cost"),
103	clEnumValN(NoHardUse, "noharduse",
104	"only replace exit values when loop def likely dead"),
105	clEnumValN(AlwaysRepl, "always",
106	"always replace exit value whenever possible")));
107
108	static cl::opt<bool> UsePostIncrementRanges(
109	"indvars-post-increment-ranges", cl::Hidden,
110	cl::desc ("Use post increment control-dependent ranges in IndVarSimplify"),
111	cl::init(Val: true));
112
113	static cl::opt<bool>
114	DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
115	cl::desc ("Disable Linear Function Test Replace optimization"));
116
117	static cl::opt<bool>
118	LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
119	cl::desc ("Predicate conditions in read only loops"));
120
121	static cl::opt<bool>
122	AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
123	cl::desc ("Allow widening of indvars to eliminate s/zext"));
124
125	namespace {
126
127	class IndVarSimplify {
128	LoopInfo *LI;
129	ScalarEvolution *SE;
130	DominatorTree *DT;
131	const DataLayout &DL;
132	TargetLibraryInfo *TLI;
133	const TargetTransformInfo *TTI;
134	std::unique_ptr<MemorySSAUpdater> MSSAU;
135
136	SmallVector<WeakTrackingVH, `16`> DeadInsts;
137	bool WidenIndVars;
138
139	bool RunUnswitching = false;
140
141	bool handleFloatingPointIV(Loop L, PHINode PH);
142	bool rewriteNonIntegerIVs(Loop *L);
143
144	bool simplifyAndExtend(Loop L, SCEVExpander &Rewriter, LoopInfo LI);
145	/// Try to improve our exit conditions by converting condition from signed
146	/// to unsigned or rotating computation out of the loop.
147	/// (See inline comment about why this is duplicated from simplifyAndExtend)
148	bool canonicalizeExitCondition(Loop *L);
149	/// Try to eliminate loop exits based on analyzeable exit counts
150	bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
151	/// Try to form loop invariant tests for loop exits by changing how many
152	/// iterations of the loop run when that is unobservable.
153	bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
154
155	bool rewriteFirstIterationLoopExitValues(Loop *L);
156
157	bool linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
158	const SCEV *ExitCount,
159	PHINode *IndVar, SCEVExpander &Rewriter);
160
161	bool sinkUnusedInvariants(Loop *L);
162
163	public:
164	IndVarSimplify(LoopInfo LI, ScalarEvolution SE, DominatorTree *DT,
165	const DataLayout &DL, TargetLibraryInfo *TLI,
166	TargetTransformInfo TTI, MemorySSA MSSA, bool WidenIndVars)
167	: LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
168	WidenIndVars(WidenIndVars) {
169	if (MSSA)
170	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
171	}
172
173	bool run(Loop *L);
174
175	bool runUnswitching() const { return RunUnswitching; }
176	};
177
178	} // end anonymous namespace
179
180	//===----------------------------------------------------------------------===//
181	// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182	//===----------------------------------------------------------------------===//
183
184	/// Convert APF to an integer, if possible.
185	static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186	bool isExact = false;
187	// See if we can convert this to an int64_t
188	uint64_t UIntVal;
189	if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: `64`, IsSigned: true,
190	RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK \|\|
191	!isExact)
192	return false;
193	IntVal = UIntVal;
194	return true;
195	}
196
197	/// If the loop has floating induction variable then insert corresponding
198	/// integer induction variable if possible.
199	/// For example,
200	/// for(double i = 0; i < 10000; ++i)
201	/// bar(i)
202	/// is converted into
203	/// for(int i = 0; i < 10000; ++i)
204	/// bar((double)i);
205	bool IndVarSimplify::handleFloatingPointIV(Loop L, PHINode PN) {
206	unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: `0`));
207	unsigned BackEdge = IncomingEdge^`1`;
208
209	// Check incoming value.
210	auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
211
212	int64_t InitValue;
213	if (!InitValueVal \|\| !ConvertToSInt(APF: InitValueVal->getValueAPF(), IntVal&: InitValue))
214	return false;
215
216	// Check IV increment. Reject this PN if increment operation is not
217	// an add or increment value can not be represented by an integer.
218	auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
219	if (Incr == nullptr \|\| Incr->getOpcode() != Instruction::FAdd) return false;
220
221	// If this is not an add of the PHI with a constantfp, or if the constant fp
222	// is not an integer, bail out.
223	ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: `1`));
224	int64_t IncValue;
225	if (IncValueVal == nullptr \|\| Incr->getOperand(i_nocapture: `0`) != PN \|\|
226	!ConvertToSInt(APF: IncValueVal->getValueAPF(), IntVal&: IncValue))
227	return false;
228
229	// Check Incr uses. One user is PN and the other user is an exit condition
230	// used by the conditional terminator.
231	Value::user_iterator IncrUse = Incr->user_begin();
232	Instruction U1 = cast<Instruction>(Val: IncrUse ++);
233	if (IncrUse == Incr->user_end()) return false;
234	Instruction U2 = cast<Instruction>(Val: IncrUse ++);
235	if (IncrUse != Incr->user_end()) return false;
236
237	// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
238	// only used by a branch, we can't transform it.
239	FCmpInst *Compare = dyn_cast<FCmpInst>(Val: U1);
240	if (!Compare)
241	Compare = dyn_cast<FCmpInst>(Val: U2);
242	if (!Compare \|\| !Compare->hasOneUse() \|\|
243	!isa<BranchInst>(Val: Compare->user_back()))
244	return false;
245
246	BranchInst *TheBr = cast<BranchInst>(Val: Compare->user_back());
247
248	// We need to verify that the branch actually controls the iteration count
249	// of the loop. If not, the new IV can overflow and no one will notice.
250	// The branch block must be in the loop and one of the successors must be out
251	// of the loop.
252	assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253	if (!L->contains(BB: TheBr->getParent()) \|\|
254	(L->contains(BB: TheBr->getSuccessor(i: `0`)) &&
255	L->contains(BB: TheBr->getSuccessor(i: `1`))))
256	return false;
257
258	// If it isn't a comparison with an integer-as-fp (the exit value), we can't
259	// transform it.
260	ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: `1`));
261	int64_t ExitValue;
262	if (ExitValueVal == nullptr \|\|
263	!ConvertToSInt(APF: ExitValueVal->getValueAPF(), IntVal&: ExitValue))
264	return false;
265
266	// Find new predicate for integer comparison.
267	CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
268	switch (Compare->getPredicate()) {
269	default: return false; // Unknown comparison.
270	case CmpInst::FCMP_OEQ:
271	case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272	case CmpInst::FCMP_ONE:
273	case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274	case CmpInst::FCMP_OGT:
275	case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276	case CmpInst::FCMP_OGE:
277	case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278	case CmpInst::FCMP_OLT:
279	case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280	case CmpInst::FCMP_OLE:
281	case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282	}
283
284	// We convert the floating point induction variable to a signed i32 value if
285	// we can. This is only safe if the comparison will not overflow in a way
286	// that won't be trapped by the integer equivalent operations. Check for this
287	// now.
288	// TODO: We could use i64 if it is native and the range requires it.
289
290	// The start/stride/exit values must all fit in signed i32.
291	if (!isInt<`32`>(x: InitValue) \|\| !isInt<`32`>(x: IncValue) \|\| !isInt<`32`>(x: ExitValue))
292	return false;
293
294	// If not actually striding (add x, 0.0), avoid touching the code.
295	if (IncValue == `0`)
296	return false;
297
298	// Positive and negative strides have different safety conditions.
299	if (IncValue > `0`) {
300	// If we have a positive stride, we require the init to be less than the
301	// exit value.
302	if (InitValue >= ExitValue)
303	return false;
304
305	uint32_t Range = uint32_t(ExitValue-InitValue);
306	// Check for infinite loop, either:
307	// while (i <= Exit) or until (i > Exit)
308	if (NewPred == CmpInst::ICMP_SLE \|\| NewPred == CmpInst::ICMP_SGT) {
309	if (++Range == `0`) return false; // Range overflows.
310	}
311
312	unsigned Leftover = Range % uint32_t(IncValue);
313
314	// If this is an equality comparison, we require that the strided value
315	// exactly land on the exit value, otherwise the IV condition will wrap
316	// around and do things the fp IV wouldn't.
317	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
318	Leftover != `0`)
319	return false;
320
321	// If the stride would wrap around the i32 before exiting, we can't
322	// transform the IV.
323	if (Leftover != `0` && int32_t(ExitValue+IncValue) < ExitValue)
324	return false;
325	} else {
326	// If we have a negative stride, we require the init to be greater than the
327	// exit value.
328	if (InitValue <= ExitValue)
329	return false;
330
331	uint32_t Range = uint32_t(InitValue-ExitValue);
332	// Check for infinite loop, either:
333	// while (i >= Exit) or until (i < Exit)
334	if (NewPred == CmpInst::ICMP_SGE \|\| NewPred == CmpInst::ICMP_SLT) {
335	if (++Range == `0`) return false; // Range overflows.
336	}
337
338	unsigned Leftover = Range % uint32_t(-IncValue);
339
340	// If this is an equality comparison, we require that the strided value
341	// exactly land on the exit value, otherwise the IV condition will wrap
342	// around and do things the fp IV wouldn't.
343	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
344	Leftover != `0`)
345	return false;
346
347	// If the stride would wrap around the i32 before exiting, we can't
348	// transform the IV.
349	if (Leftover != `0` && int32_t(ExitValue+IncValue) > ExitValue)
350	return false;
351	}
352
353	IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
354
355	// Insert new integer induction variable.
356	PHINode *NewPHI =
357	PHINode::Create(Ty: Int32Ty, NumReservedValues: `2`, NameStr: PN->getName() + ".int", InsertBefore: PN->getIterator());
358	NewPHI->addIncoming(V: ConstantInt::getSigned(Ty: Int32Ty, V: InitValue),
359	BB: PN->getIncomingBlock(i: IncomingEdge));
360	NewPHI->setDebugLoc(PN->getDebugLoc());
361
362	Instruction *NewAdd = BinaryOperator::CreateAdd(
363	V1: NewPHI, V2: ConstantInt::getSigned(Ty: Int32Ty, V: IncValue),
364	Name: Incr->getName() + ".int", InsertBefore: Incr->getIterator());
365	NewAdd->setDebugLoc(Incr->getDebugLoc());
366	NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
367
368	ICmpInst NewCompare = new* ICmpInst (
369	TheBr->getIterator(), NewPred, NewAdd,
370	ConstantInt::getSigned(Ty: Int32Ty, V: ExitValue), Compare->getName());
371	NewCompare->setDebugLoc(Compare->getDebugLoc());
372
373	// In the following deletions, PN may become dead and may be deleted.
374	// Use a WeakTrackingVH to observe whether this happens.
375	WeakTrackingVH WeakPH = PN;
376
377	// Delete the old floating point exit comparison. The branch starts using the
378	// new comparison.
379	NewCompare->takeName(V: Compare);
380	Compare->replaceAllUsesWith(V: NewCompare);
381	RecursivelyDeleteTriviallyDeadInstructions(V: Compare, TLI, MSSAU: MSSAU.get());
382
383	// Delete the old floating point increment.
384	Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
385	RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
386
387	// If the FP induction variable still has uses, this is because something else
388	// in the loop uses its value. In order to canonicalize the induction
389	// variable, we chose to eliminate the IV and rewrite it in terms of an
390	// int->fp cast.
391	//
392	// We give preference to sitofp over uitofp because it is faster on most
393	// platforms.
394	if (WeakPH) {
395	Instruction Conv = new* SIToFPInst (NewPHI, PN->getType(), "indvar.conv",
396	PN->getParent()->getFirstInsertionPt());
397	Conv->setDebugLoc(PN->getDebugLoc());
398	PN->replaceAllUsesWith(V: Conv);
399	RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
400	}
401	return true;
402	}
403
404	bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405	// First step. Check to see if there are any floating-point recurrences.
406	// If there are, change them into integer recurrences, permitting analysis by
407	// the SCEV routines.
408	BasicBlock *Header = L->getHeader();
409
410	SmallVector<WeakTrackingVH, `8`> PHIs(llvm::make_pointer_range(Range: Header->phis()));
411
412	bool Changed = false;
413	for (WeakTrackingVH &PHI : PHIs)
414	if (PHINode PN = dyn_cast_or_null<PHINode>(Val: &PHI))
415	Changed \|= handleFloatingPointIV(L, PN);
416
417	// If the loop previously had floating-point IV, ScalarEvolution
418	// may not have been able to compute a trip count. Now that we've done some
419	// re-writing, the trip count may be computable.
420	if (Changed)
421	SE->forgetLoop(L);
422	return Changed;
423	}
424
425	//===---------------------------------------------------------------------===//
426	// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
427	// they will exit at the first iteration.
428	//===---------------------------------------------------------------------===//
429
430	/// Check to see if this loop has loop invariant conditions which lead to loop
431	/// exits. If so, we know that if the exit path is taken, it is at the first
432	/// loop iteration. This lets us predict exit values of PHI nodes that live in
433	/// loop header.
434	bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
435	// Verify the input to the pass is already in LCSSA form.
436	assert(L->isLCSSAForm(*DT));
437
438	SmallVector<BasicBlock *, `8`> ExitBlocks;
439	L->getUniqueExitBlocks(ExitBlocks);
440
441	bool MadeAnyChanges = false;
442	for (auto *ExitBB : ExitBlocks) {
443	// If there are no more PHI nodes in this exit block, then no more
444	// values defined inside the loop are used on this path.
445	for (PHINode &PN : ExitBB->phis()) {
446	for (unsigned IncomingValIdx = `0`, E = PN.getNumIncomingValues();
447	IncomingValIdx != E; ++IncomingValIdx) {
448	auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
449
450	// Can we prove that the exit must run on the first iteration if it
451	// runs at all? (i.e. early exits are fine for our purposes, but
452	// traces which lead to this exit being taken on the 2nd iteration
453	// aren't.) Note that this is about whether the exit branch is
454	// executed, not about whether it is taken.
455	if (!L->getLoopLatch() \|\|
456	!DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
457	continue;
458
459	// Get condition that leads to the exit path.
460	auto *TermInst = IncomingBB->getTerminator();
461
462	Value Cond = nullptr*;
463	if (auto *BI = dyn_cast<BranchInst>(Val: TermInst)) {
464	// Must be a conditional branch, otherwise the block
465	// should not be in the loop.
466	Cond = BI->getCondition();
467	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
468	Cond = SI->getCondition();
469	else
470	continue;
471
472	if (!L->isLoopInvariant(V: Cond))
473	continue;
474
475	auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
476
477	// Only deal with PHIs in the loop header.
478	if (!ExitVal \|\| ExitVal->getParent() != L->getHeader())
479	continue;
480
481	// If ExitVal is a PHI on the loop header, then we know its
482	// value along this exit because the exit can only be taken
483	// on the first iteration.
484	auto *LoopPreheader = L->getLoopPreheader();
485	assert(LoopPreheader && "Invalid loop");
486	int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
487	if (PreheaderIdx != -`1`) {
488	assert(ExitVal->getParent() == L->getHeader() &&
489	"ExitVal must be in loop header");
490	MadeAnyChanges = true;
491	PN.setIncomingValue(i: IncomingValIdx,
492	V: ExitVal->getIncomingValue(i: PreheaderIdx));
493	SE->forgetValue(V: &PN);
494	}
495	}
496	}
497	}
498	return MadeAnyChanges;
499	}
500
501	//===----------------------------------------------------------------------===//
502	// IV Widening - Extend the width of an IV to cover its widest uses.
503	//===----------------------------------------------------------------------===//
504
505	/// Update information about the induction variable that is extended by this
506	/// sign or zero extend operation. This is used to determine the final width of
507	/// the IV before actually widening it.
508	static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
509	ScalarEvolution *SE,
510	const TargetTransformInfo *TTI) {
511	bool IsSigned = Cast->getOpcode() == Instruction::SExt;
512	if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
513	return;
514
515	Type *Ty = Cast->getType();
516	uint64_t Width = SE->getTypeSizeInBits(Ty);
517	if (!Cast->getDataLayout().isLegalInteger(Width))
518	return;
519
520	// Check that `Cast` actually extends the induction variable (we rely on this
521	// later). This takes care of cases where `Cast` is extending a truncation of
522	// the narrow induction variable, and thus can end up being narrower than the
523	// "narrow" induction variable.
524	uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
525	if (NarrowIVWidth >= Width)
526	return;
527
528	// Cast is either an sext or zext up to this point.
529	// We should not widen an indvar if arithmetics on the wider indvar are more
530	// expensive than those on the narrower indvar. We check only the cost of ADD
531	// because at least an ADD is required to increment the induction variable. We
532	// could compute more comprehensively the cost of all instructions on the
533	// induction variable when necessary.
534	if (TTI &&
535	TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
536	TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
537	Ty: Cast->getOperand(i_nocapture: `0`)->getType())) {
538	return;
539	}
540
541	if (!WI.WidestNativeType \|\|
542	Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
543	WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
544	WI.IsSigned = IsSigned;
545	return;
546	}
547
548	// We extend the IV to satisfy the sign of its user(s), or 'signed'
549	// if there are multiple users with both sign- and zero extensions,
550	// in order not to introduce nondeterministic behaviour based on the
551	// unspecified order of a PHI nodes' users-iterator.
552	WI.IsSigned \|= IsSigned;
553	}
554
555	//===----------------------------------------------------------------------===//
556	// Live IV Reduction - Minimize IVs live across the loop.
557	//===----------------------------------------------------------------------===//
558
559	//===----------------------------------------------------------------------===//
560	// Simplification of IV users based on SCEV evaluation.
561	//===----------------------------------------------------------------------===//
562
563	namespace {
564
565	class IndVarSimplifyVisitor : public IVVisitor {
566	ScalarEvolution *SE;
567	const TargetTransformInfo *TTI;
568	PHINode *IVPhi;
569
570	public:
571	WideIVInfo WI;
572
573	IndVarSimplifyVisitor(PHINode IV, ScalarEvolution SCEV,
574	const TargetTransformInfo *TTI,
575	const DominatorTree *DTree)
576	: SE(SCEV), TTI(TTI), IVPhi(IV) {
577	DT = DTree;
578	WI.NarrowIV = IVPhi;
579	}
580
581	// Implement the interface used by simplifyUsersOfIV.
582	void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
583	};
584
585	} // end anonymous namespace
586
587	/// Iteratively perform simplification on a worklist of IV users. Each
588	/// successive simplification may push more users which may themselves be
589	/// candidates for simplification.
590	///
591	/// Sign/Zero extend elimination is interleaved with IV simplification.
592	bool IndVarSimplify::simplifyAndExtend(Loop *L,
593	SCEVExpander &Rewriter,
594	LoopInfo *LI) {
595	SmallVector<WideIVInfo, `8`> WideIVs;
596
597	auto *GuardDecl = Intrinsic::getDeclarationIfExists(
598	M: L->getBlocks()[`0`]->getModule(), id: Intrinsic::experimental_guard);
599	bool HasGuards = GuardDecl && !GuardDecl->use_empty();
600
601	SmallVector<PHINode *, `8`> LoopPhis(
602	llvm::make_pointer_range(Range: L->getHeader()->phis()));
603
604	// Each round of simplification iterates through the SimplifyIVUsers worklist
605	// for all current phis, then determines whether any IVs can be
606	// widened. Widening adds new phis to LoopPhis, inducing another round of
607	// simplification on the wide IVs.
608	bool Changed = false;
609	while (!LoopPhis.empty()) {
610	// Evaluate as many IV expressions as possible before widening any IVs. This
611	// forces SCEV to set no-wrap flags before evaluating sign/zero
612	// extension. The first time SCEV attempts to normalize sign/zero extension,
613	// the result becomes final. So for the most predictable results, we delay
614	// evaluation of sign/zero extend evaluation until needed, and avoid running
615	// other SCEV based analysis prior to simplifyAndExtend.
616	do {
617	PHINode *CurrIV = LoopPhis.pop_back_val();
618
619	// Information about sign/zero extensions of CurrIV.
620	IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
621
622	const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts,
623	Rewriter, V: &Visitor);
624
625	Changed \|= C;
626	RunUnswitching \|= U;
627	if (Visitor.WI.WidestNativeType) {
628	WideIVs.push_back(Elt: Visitor.WI);
629	}
630	} while(!LoopPhis.empty());
631
632	// Continue if we disallowed widening.
633	if (!WidenIndVars)
634	continue;
635
636	for (; !WideIVs.empty(); WideIVs.pop_back()) {
637	unsigned ElimExt;
638	unsigned Widened;
639	if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
640	DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
641	HasGuards, UsePostIncrementRanges)) {
642	NumElimExt += ElimExt;
643	NumWidened += Widened;
644	Changed = true;
645	LoopPhis.push_back(Elt: WidePhi);
646	}
647	}
648	}
649	return Changed;
650	}
651
652	//===----------------------------------------------------------------------===//
653	// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
654	//===----------------------------------------------------------------------===//
655
656	/// Given an Value which is hoped to be part of an add recurance in the given
657	/// loop, return the associated Phi node if so. Otherwise, return null. Note
658	/// that this is less general than SCEVs AddRec checking.
659	static PHINode getLoopPhiForCounter(Value IncV, Loop *L) {
660	Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
661	if (!IncI)
662	return nullptr;
663
664	switch (IncI->getOpcode()) {
665	case Instruction::Add:
666	case Instruction::Sub:
667	break;
668	case Instruction::GetElementPtr:
669	// An IV counter must preserve its type.
670	if (IncI->getNumOperands() == `2`)
671	break;
672	[[fallthrough]];
673	default:
674	return nullptr;
675	}
676
677	PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `0`));
678	if (Phi && Phi->getParent() == L->getHeader()) {
679	if (L->isLoopInvariant(V: IncI->getOperand(i: `1`)))
680	return Phi;
681	return nullptr;
682	}
683	if (IncI->getOpcode() == Instruction::GetElementPtr)
684	return nullptr;
685
686	// Allow add/sub to be commuted.
687	Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `1`));
688	if (Phi && Phi->getParent() == L->getHeader()) {
689	if (L->isLoopInvariant(V: IncI->getOperand(i: `0`)))
690	return Phi;
691	}
692	return nullptr;
693	}
694
695	/// Whether the current loop exit test is based on this value. Currently this
696	/// is limited to a direct use in the loop condition.
697	static bool isLoopExitTestBasedOn(Value V, BasicBlock ExitingBB) {
698	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
699	ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
700	// TODO: Allow non-icmp loop test.
701	if (!ICmp)
702	return false;
703
704	// TODO: Allow indirect use.
705	return ICmp->getOperand(i_nocapture: `0`) == V \|\| ICmp->getOperand(i_nocapture: `1`) == V;
706	}
707
708	/// linearFunctionTestReplace policy. Return true unless we can show that the
709	/// current exit test is already sufficiently canonical.
710	static bool needsLFTR(Loop L, BasicBlock ExitingBB) {
711	assert(L->getLoopLatch() && "Must be in simplified form");
712
713	// Avoid converting a constant or loop invariant test back to a runtime
714	// test. This is critical for when SCEV's cached ExitCount is less precise
715	// than the current IR (such as after we've proven a particular exit is
716	// actually dead and thus the BE count never reaches our ExitCount.)
717	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
718	if (L->isLoopInvariant(V: BI->getCondition()))
719	return false;
720
721	// Do LFTR to simplify the exit condition to an ICMP.
722	ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
723	if (!Cond)
724	return true;
725
726	// Do LFTR to simplify the exit ICMP to EQ/NE
727	ICmpInst::Predicate Pred = Cond->getPredicate();
728	if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
729	return true;
730
731	// Look for a loop invariant RHS
732	Value *LHS = Cond->getOperand(i_nocapture: `0`);
733	Value *RHS = Cond->getOperand(i_nocapture: `1`);
734	if (!L->isLoopInvariant(V: RHS)) {
735	if (!L->isLoopInvariant(V: LHS))
736	return true;
737	std::swap(a&: LHS, b&: RHS);
738	}
739	// Look for a simple IV counter LHS
740	PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
741	if (!Phi)
742	Phi = getLoopPhiForCounter(IncV: LHS, L);
743
744	if (!Phi)
745	return true;
746
747	// Do LFTR if PHI node is defined in the loop, but is not* a counter.*
748	int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
749	if (Idx < `0`)
750	return true;
751
752	// Do LFTR if the exit condition's IV is not* a simple counter.*
753	Value *IncV = Phi->getIncomingValue(i: Idx);
754	return Phi != getLoopPhiForCounter(IncV, L);
755	}
756
757	/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
758	/// down to checking that all operands are constant and listing instructions
759	/// that may hide undef.
760	static bool hasConcreteDefImpl(Value V, SmallPtrSetImpl<Value> &Visited,
761	unsigned Depth) {
762	if (isa<Constant>(Val: V))
763	return !isa<UndefValue>(Val: V);
764
765	if (Depth >= `6`)
766	return false;
767
768	// Conservatively handle non-constant non-instructions. For example, Arguments
769	// may be undef.
770	Instruction *I = dyn_cast<Instruction>(Val: V);
771	if (!I)
772	return false;
773
774	// Load and return values may be undef.
775	if(I->mayReadFromMemory() \|\| isa<CallInst>(Val: I) \|\| isa<InvokeInst>(Val: I))
776	return false;
777
778	// Optimistically handle other instructions.
779	for (Value *Op : I->operands()) {
780	if (!Visited.insert(Ptr: Op).second)
781	continue;
782	if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+`1`))
783	return false;
784	}
785	return true;
786	}
787
788	/// Return true if the given value is concrete. We must prove that undef can
789	/// never reach it.
790	///
791	/// TODO: If we decide that this is a good approach to checking for undef, we
792	/// may factor it into a common location.
793	static bool hasConcreteDef(Value *V) {
794	SmallPtrSet<Value*, `8`> Visited;
795	Visited.insert(Ptr: V);
796	return hasConcreteDefImpl(V, Visited, Depth: `0`);
797	}
798
799	/// Return true if the given phi is a "counter" in L. A counter is an
800	/// add recurance (of integer or pointer type) with an arbitrary start, and a
801	/// step of 1. Note that L must have exactly one latch.
802	static bool isLoopCounter(PHINode* Phi, Loop *L,
803	ScalarEvolution *SE) {
804	assert(Phi->getParent() == L->getHeader());
805	assert(L->getLoopLatch());
806
807	if (!SE->isSCEVable(Ty: Phi->getType()))
808	return false;
809
810	const SCEV *S = SE->getSCEV(V: Phi);
811	if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L))))
812	return false;
813
814	int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
815	Value *IncV = Phi->getIncomingValue(i: LatchIdx);
816	return (getLoopPhiForCounter(IncV, L) == Phi &&
817	isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
818	}
819
820	/// Search the loop header for a loop counter (anadd rec w/step of one)
821	/// suitable for use by LFTR. If multiple counters are available, select the
822	/// "best" one based profitable heuristics.
823	///
824	/// BECount may be an i8 pointer type. The pointer difference is already*
825	/// valid count without scaling the address stride, so it remains a pointer
826	/// expression as far as SCEV is concerned.
827	static PHINode FindLoopCounter(Loop L, BasicBlock *ExitingBB,
828	const SCEV *BECount,
829	ScalarEvolution SE, DominatorTree DT) {
830	uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
831
832	Value *Cond = cast<BranchInst>(Val: ExitingBB->getTerminator())->getCondition();
833
834	// Loop over all of the PHI nodes, looking for a simple counter.
835	PHINode BestPhi = nullptr*;
836	const SCEV BestInit = nullptr*;
837	BasicBlock *LatchBlock = L->getLoopLatch();
838	assert(LatchBlock && "Must be in simplified form");
839	const DataLayout &DL = L->getHeader()->getDataLayout();
840
841	for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
842	PHINode *Phi = cast<PHINode>(Val&: I);
843	if (!isLoopCounter(Phi, L, SE))
844	continue;
845
846	const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
847
848	// AR may be a pointer type, while BECount is an integer type.
849	// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
850	// AR may not be a narrower type, or we may never exit.
851	uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
852	if (PhiWidth < BCWidth \|\| !DL.isLegalInteger(Width: PhiWidth))
853	continue;
854
855	// Avoid reusing a potentially undef value to compute other values that may
856	// have originally had a concrete definition.
857	if (!hasConcreteDef(V: Phi)) {
858	// We explicitly allow unknown phis as long as they are already used by
859	// the loop exit test. This is legal since performing LFTR could not
860	// increase the number of undef users.
861	Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
862	if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
863	!isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
864	continue;
865	}
866
867	// Avoid introducing undefined behavior due to poison which didn't exist in
868	// the original program. (Annoyingly, the rules for poison and undef
869	// propagation are distinct, so this does NOT cover the undef case above.)
870	// We have to ensure that we don't introduce UB by introducing a use on an
871	// iteration where said IV produces poison. Our strategy here differs for
872	// pointers and integer IVs. For integers, we strip and reinfer as needed,
873	// see code in linearFunctionTestReplace. For pointers, we restrict
874	// transforms as there is no good way to reinfer inbounds once lost.
875	if (!Phi->getType()->isIntegerTy() &&
876	!mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
877	continue;
878
879	const SCEV *Init = AR->getStart();
880
881	if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
882	// Don't force a live loop counter if another IV can be used.
883	if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
884	continue;
885
886	// Prefer to count-from-zero. This is a more "canonical" counter form. It
887	// also prefers integer to pointer IVs.
888	if (BestInit->isZero() != Init->isZero()) {
889	if (BestInit->isZero())
890	continue;
891	}
892	// If two IVs both count from zero or both count from nonzero then the
893	// narrower is likely a dead phi that has been widened. Use the wider phi
894	// to allow the other to be eliminated.
895	else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
896	continue;
897	}
898	BestPhi = Phi;
899	BestInit = Init;
900	}
901	return BestPhi;
902	}
903
904	/// Insert an IR expression which computes the value held by the IV IndVar
905	/// (which must be an loop counter w/unit stride) after the backedge of loop L
906	/// is taken ExitCount times.
907	static Value genLoopLimit(PHINode IndVar, BasicBlock *ExitingBB,
908	const SCEV ExitCount, bool* UsePostInc, Loop *L,
909	SCEVExpander &Rewriter, ScalarEvolution *SE) {
910	assert(isLoopCounter(IndVar, L, SE));
911	assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
913	assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914
915	// For integer IVs, truncate the IV before computing the limit unless we
916	// know apriori that the limit must be a constant when evaluated in the
917	// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
918	// IV in the loop over a (potentially) expensive expansion of the widened
919	// exit count add(zext(add)) expression.
920	if (IndVar->getType()->isIntegerTy() &&
921	SE->getTypeSizeInBits(Ty: AR->getType()) >
922	SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
923	const SCEV *IVInit = AR->getStart();
924	if (!isa<SCEVConstant>(Val: IVInit) \|\| !isa<SCEVConstant>(Val: ExitCount))
925	AR = cast<SCEVAddRecExpr>(Val: SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType()));
926	}
927
928	const SCEVAddRecExpr ARBase = UsePostInc ? AR->getPostIncExpr(SE&: SE) : AR;
929	const SCEV IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: SE);
930	assert(SE->isLoopInvariant(IVLimit, L) &&
931	"Computed iteration count is not loop invariant!");
932	return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
933	I: ExitingBB->getTerminator());
934	}
935
936	/// This method rewrites the exit condition of the loop to be a canonical !=
937	/// comparison against the incremented loop induction variable. This pass is
938	/// able to rewrite the exit tests of any loop where the SCEV analysis can
939	/// determine a loop-invariant trip count of the loop, which is actually a much
940	/// broader range than just linear tests.
941	bool IndVarSimplify::
942	linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
943	const SCEV *ExitCount,
944	PHINode *IndVar, SCEVExpander &Rewriter) {
945	assert(L->getLoopLatch() && "Loop no longer in simplified form?");
946	assert(isLoopCounter(IndVar, L, SE));
947	Instruction * const IncVar =
948	cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
949
950	// Initialize CmpIndVar to the preincremented IV.
951	Value *CmpIndVar = IndVar;
952	bool UsePostInc = false;
953
954	// If the exiting block is the same as the backedge block, we prefer to
955	// compare against the post-incremented value, otherwise we must compare
956	// against the preincremented value.
957	if (ExitingBB == L->getLoopLatch()) {
958	// For pointer IVs, we chose to not strip inbounds which requires us not
959	// to add a potentially UB introducing use. We need to either a) show
960	// the loop test we're modifying is already in post-inc form, or b) show
961	// that adding a use must not introduce UB.
962	bool SafeToPostInc =
963	IndVar->getType()->isIntegerTy() \|\|
964	isLoopExitTestBasedOn(V: IncVar, ExitingBB) \|\|
965	mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
966	if (SafeToPostInc) {
967	UsePostInc = true;
968	CmpIndVar = IncVar;
969	}
970	}
971
972	// It may be necessary to drop nowrap flags on the incrementing instruction
973	// if either LFTR moves from a pre-inc check to a post-inc check (in which
974	// case the increment might have previously been poison on the last iteration
975	// only) or if LFTR switches to a different IV that was previously dynamically
976	// dead (and as such may be arbitrarily poison). We remove any nowrap flags
977	// that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
978	// check), because the pre-inc addrec flags may be adopted from the original
979	// instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
980	// TODO: This handling is inaccurate for one case: If we switch to a
981	// dynamically dead IV that wraps on the first loop iteration only, which is
982	// not covered by the post-inc addrec. (If the new IV was not dynamically
983	// dead, it could not be poison on the first iteration in the first place.)
984	if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
985	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
986	if (BO->hasNoUnsignedWrap())
987	BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
988	if (BO->hasNoSignedWrap())
989	BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990	}
991
992	Value *ExitCnt = genLoopLimit(
993	IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
994	assert(ExitCnt->getType()->isPointerTy() ==
995	IndVar->getType()->isPointerTy() &&
996	"genLoopLimit missed a cast");
997
998	// Insert a new icmp_ne or icmp_eq instruction before the branch.
999	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1000	ICmpInst::Predicate P;
1001	if (L->contains(BB: BI->getSuccessor(i: `0`)))
1002	P = ICmpInst::ICMP_NE;
1003	else
1004	P = ICmpInst::ICMP_EQ;
1005
1006	IRBuilder<> Builder(BI);
1007
1008	// The new loop exit condition should reuse the debug location of the
1009	// original loop exit condition.
1010	if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1011	Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1012
1013	// For integer IVs, if we evaluated the limit in the narrower bitwidth to
1014	// avoid the expensive expansion of the limit expression in the wider type,
1015	// emit a truncate to narrow the IV to the ExitCount type. This is safe
1016	// since we know (from the exit count bitwidth), that we can't self-wrap in
1017	// the narrower type.
1018	unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1019	unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1020	if (CmpIndVarSize > ExitCntSize) {
1021	assert(!CmpIndVar->getType()->isPointerTy() &&
1022	!ExitCnt->getType()->isPointerTy());
1023
1024	// Before resorting to actually inserting the truncate, use the same
1025	// reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1026	// the other side of the comparison instead. We still evaluate the limit
1027	// in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1028	// a truncate within in.
1029	bool Extended = false;
1030	const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1031	const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1032	const SCEV *ZExtTrunc =
1033	SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1034
1035	if (ZExtTrunc == IV) {
1036	Extended = true;
1037	ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1038	Name: "wide.trip.count");
1039	} else {
1040	const SCEV *SExtTrunc =
1041	SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1042	if (SExtTrunc == IV) {
1043	Extended = true;
1044	ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1045	Name: "wide.trip.count");
1046	}
1047	}
1048
1049	if (Extended) {
1050	bool Discard;
1051	L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1052	} else
1053	CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1054	Name: "lftr.wideiv");
1055	}
1056	LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1057	<< " LHS:" << *CmpIndVar << `'\n'`
1058	<< " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1059	<< "\n"
1060	<< " RHS:\t" << *ExitCnt << "\n"
1061	<< "ExitCount:\t" << *ExitCount << "\n"
1062	<< " was: " << *BI->getCondition() << "\n");
1063
1064	Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1065	Value *OrigCond = BI->getCondition();
1066	// It's tempting to use replaceAllUsesWith here to fully replace the old
1067	// comparison, but that's not immediately safe, since users of the old
1068	// comparison may not be dominated by the new comparison. Instead, just
1069	// update the branch to use the new comparison; in the common case this
1070	// will make old comparison dead.
1071	BI->setCondition(Cond);
1072	DeadInsts.emplace_back(Args&: OrigCond);
1073
1074	++NumLFTR;
1075	return true;
1076	}
1077
1078	//===----------------------------------------------------------------------===//
1079	// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1080	//===----------------------------------------------------------------------===//
1081
1082	/// If there's a single exit block, sink any loop-invariant values that
1083	/// were defined in the preheader but not used inside the loop into the
1084	/// exit block to reduce register pressure in the loop.
1085	bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1086	BasicBlock *ExitBlock = L->getExitBlock();
1087	if (!ExitBlock) return false;
1088
1089	BasicBlock *Preheader = L->getLoopPreheader();
1090	if (!Preheader) return false;
1091
1092	bool MadeAnyChanges = false;
1093	for (Instruction &I : llvm::make_early_inc_range(Range: llvm::reverse(C&: *Preheader))) {
1094
1095	// Skip BB Terminator.
1096	if (Preheader->getTerminator() == &I)
1097	continue;
1098
1099	// New instructions were inserted at the end of the preheader.
1100	if (isa<PHINode>(Val: I))
1101	break;
1102
1103	// Don't move instructions which might have side effects, since the side
1104	// effects need to complete before instructions inside the loop. Also don't
1105	// move instructions which might read memory, since the loop may modify
1106	// memory. Note that it's okay if the instruction might have undefined
1107	// behavior: LoopSimplify guarantees that the preheader dominates the exit
1108	// block.
1109	if (I.mayHaveSideEffects() \|\| I.mayReadFromMemory())
1110	continue;
1111
1112	// Skip debug or pseudo instructions.
1113	if (I.isDebugOrPseudoInst())
1114	continue;
1115
1116	// Skip eh pad instructions.
1117	if (I.isEHPad())
1118	continue;
1119
1120	// Don't sink alloca: we never want to sink static alloca's out of the
1121	// entry block, and correctly sinking dynamic alloca's requires
1122	// checks for stacksave/stackrestore intrinsics.
1123	// FIXME: Refactor this check somehow?
1124	if (isa<AllocaInst>(Val: &I))
1125	continue;
1126
1127	// Determine if there is a use in or before the loop (direct or
1128	// otherwise).
1129	bool UsedInLoop = false;
1130	for (Use &U : I.uses()) {
1131	Instruction *User = cast<Instruction>(Val: U.getUser());
1132	BasicBlock *UseBB = User->getParent();
1133	if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1134	unsigned i =
1135	PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1136	UseBB = P->getIncomingBlock(i);
1137	}
1138	if (UseBB == Preheader \|\| L->contains(BB: UseBB)) {
1139	UsedInLoop = true;
1140	break;
1141	}
1142	}
1143
1144	// If there is, the def must remain in the preheader.
1145	if (UsedInLoop)
1146	continue;
1147
1148	// Otherwise, sink it to the exit block.
1149	I.moveBefore(InsertPos: ExitBlock->getFirstInsertionPt());
1150	SE->forgetValue(V: &I);
1151	MadeAnyChanges = true;
1152	}
1153
1154	return MadeAnyChanges;
1155	}
1156
1157	static void replaceExitCond(BranchInst BI, Value NewCond,
1158	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1159	auto *OldCond = BI->getCondition();
1160	LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1161	<< " with " << *NewCond << "\n");
1162	BI->setCondition(NewCond);
1163	if (OldCond->use_empty())
1164	DeadInsts.emplace_back(Args&: OldCond);
1165	}
1166
1167	static Constant createFoldedExitCond(const* Loop L, BasicBlock ExitingBB,
1168	bool IsTaken) {
1169	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1170	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1171	auto *OldCond = BI->getCondition();
1172	return ConstantInt::get(Ty: OldCond->getType(),
1173	V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1174	}
1175
1176	static void foldExit(const Loop L, BasicBlock ExitingBB, bool IsTaken,
1177	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1178	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1179	auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1180	replaceExitCond(BI, NewCond, DeadInsts);
1181	}
1182
1183	static void replaceLoopPHINodesWithPreheaderValues(
1184	LoopInfo LI, Loop L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1185	ScalarEvolution &SE) {
1186	assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1187	auto *LoopPreheader = L->getLoopPreheader();
1188	auto *LoopHeader = L->getHeader();
1189	SmallVector<Instruction *> Worklist;
1190	for (auto &PN : LoopHeader->phis()) {
1191	auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1192	for (User *U : PN.users())
1193	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1194	SE.forgetValue(V: &PN);
1195	PN.replaceAllUsesWith(V: PreheaderIncoming);
1196	DeadInsts.emplace_back(Args: &PN);
1197	}
1198
1199	// Replacing with the preheader value will often allow IV users to simplify
1200	// (especially if the preheader value is a constant).
1201	SmallPtrSet<Instruction *, `16`> Visited;
1202	while (!Worklist.empty()) {
1203	auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1204	if (!Visited.insert(Ptr: I).second)
1205	continue;
1206
1207	// Don't simplify instructions outside the loop.
1208	if (!L->contains(Inst: I))
1209	continue;
1210
1211	Value *Res = simplifyInstruction(I, Q: I->getDataLayout());
1212	if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1213	for (User *U : I->users())
1214	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1215	I->replaceAllUsesWith(V: Res);
1216	DeadInsts.emplace_back(Args&: I);
1217	}
1218	}
1219	}
1220
1221	static Value *
1222	createInvariantCond(const Loop L, BasicBlock ExitingBB,
1223	const ScalarEvolution::LoopInvariantPredicate &LIP,
1224	SCEVExpander &Rewriter) {
1225	ICmpInst::Predicate InvariantPred = LIP.Pred;
1226	BasicBlock *Preheader = L->getLoopPreheader();
1227	assert(Preheader && "Preheader doesn't exist");
1228	Rewriter.setInsertPoint(Preheader->getTerminator());
1229	auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1230	auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1231	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1232	if (ExitIfTrue)
1233	InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1234	IRBuilder<> Builder(Preheader->getTerminator());
1235	BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1236	return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1237	Name: BI->getCondition()->getName());
1238	}
1239
1240	static std::optional<Value *>
1241	createReplacement(ICmpInst ICmp, const* Loop L, BasicBlock ExitingBB,
1242	const SCEV MaxIter, bool* Inverted, bool SkipLastIter,
1243	ScalarEvolution *SE, SCEVExpander &Rewriter) {
1244	CmpPredicate Pred = ICmp->getCmpPredicate();
1245	Value *LHS = ICmp->getOperand(i_nocapture: `0`);
1246	Value *RHS = ICmp->getOperand(i_nocapture: `1`);
1247
1248	// 'LHS pred RHS' should now mean that we stay in loop.
1249	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1250	if (Inverted)
1251	Pred = ICmpInst::getInverseCmpPredicate(Pred);
1252
1253	const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1254	const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1255	// Can we prove it to be trivially true or false?
1256	if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1257	return createFoldedExitCond(L, ExitingBB, /IsTaken/ !*EV);
1258
1259	auto *ARTy = LHSS->getType();
1260	auto *MaxIterTy = MaxIter->getType();
1261	// If possible, adjust types.
1262	if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1263	MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1264	else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1265	const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1266	const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1267	if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1268	MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1269	}
1270
1271	if (SkipLastIter) {
1272	// Semantically skip last iter is "subtract 1, do not bother about unsigned
1273	// wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1274	// with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1275	// So we manually construct umin(a - 1, b - 1).
1276	SmallVector<const SCEV *, `4`> Elements;
1277	if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1278	for (const SCEV *Op : UMin->operands())
1279	Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op->getType())));
1280	MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1281	} else
1282	MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1283	}
1284
1285	// Check if there is a loop-invariant predicate equivalent to our check.
1286	auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1287	L, CtxI: BI, MaxIter);
1288	if (!LIP)
1289	return std::nullopt;
1290
1291	// Can we prove it to be trivially true?
1292	if (SE->isKnownPredicateAt(Pred: LIP ->Pred, LHS: LIP ->LHS, RHS: LIP ->RHS, CtxI: BI))
1293	return createFoldedExitCond(L, ExitingBB, /IsTaken/ false);
1294	else
1295	return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1296	}
1297
1298	static bool optimizeLoopExitWithUnknownExitCount(
1299	const Loop L, BranchInst BI, BasicBlock ExitingBB, const* SCEV *MaxIter,
1300	bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1301	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1302	assert(
1303	(L->contains(BI->getSuccessor(`0`)) != L->contains(BI->getSuccessor(`1`))) &&
1304	"Not a loop exit!");
1305
1306	// For branch that stays in loop by TRUE condition, go through AND. For branch
1307	// that stays in loop by FALSE condition, go through OR. Both gives the
1308	// similar logic: "stay in loop iff all conditions are true(false)".
1309	bool Inverted = L->contains(BB: BI->getSuccessor(i: `1`));
1310	SmallVector<ICmpInst *, `4`> LeafConditions;
1311	SmallVector<Value *, `4`> Worklist;
1312	SmallPtrSet<Value *, `4`> Visited;
1313	Value *OldCond = BI->getCondition();
1314	Visited.insert(Ptr: OldCond);
1315	Worklist.push_back(Elt: OldCond);
1316
1317	auto GoThrough = [&](Value *V) {
1318	Value LHS = nullptr, RHS = nullptr;
1319	if (Inverted) {
1320	if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1321	return false;
1322	} else {
1323	if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1324	return false;
1325	}
1326	if (Visited.insert(Ptr: LHS).second)
1327	Worklist.push_back(Elt: LHS);
1328	if (Visited.insert(Ptr: RHS).second)
1329	Worklist.push_back(Elt: RHS);
1330	return true;
1331	};
1332
1333	do {
1334	Value *Curr = Worklist.pop_back_val();
1335	// Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1336	// those with one use, to avoid instruction duplication.
1337	if (Curr->hasOneUse())
1338	if (!GoThrough (Curr))
1339	if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1340	LeafConditions.push_back(Elt: ICmp);
1341	} while (!Worklist.empty());
1342
1343	// If the current basic block has the same exit count as the whole loop, and
1344	// it consists of multiple icmp's, try to collect all icmp's that give exact
1345	// same exit count. For all other icmp's, we could use one less iteration,
1346	// because their value on the last iteration doesn't really matter.
1347	SmallPtrSet<ICmpInst *, `4`> ICmpsFailingOnLastIter;
1348	if (!SkipLastIter && LeafConditions.size() > `1` &&
1349	SE->getExitCount(L, ExitingBlock: ExitingBB,
1350	Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1351	MaxIter)
1352	for (auto *ICmp : LeafConditions) {
1353	auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1354	/ControlsExit/ ControlsOnlyExit: false);
1355	const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1356	if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1357	continue;
1358	// They could be of different types (specifically this happens after
1359	// IV widening).
1360	auto *WiderType =
1361	SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1362	const SCEV *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1363	const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1364	if (WideExitMax == WideMaxIter)
1365	ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1366	}
1367
1368	bool Changed = false;
1369	for (auto *OldCond : LeafConditions) {
1370	// Skip last iteration for this icmp under one of two conditions:
1371	// - We do it for all conditions;
1372	// - There is another ICmp that would fail on last iter, so this one doesn't
1373	// really matter.
1374	bool OptimisticSkipLastIter = SkipLastIter;
1375	if (!OptimisticSkipLastIter) {
1376	if (ICmpsFailingOnLastIter.size() > `1`)
1377	OptimisticSkipLastIter = true;
1378	else if (ICmpsFailingOnLastIter.size() == `1`)
1379	OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1380	}
1381	if (auto Replaced =
1382	createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1383	SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1384	Changed = true;
1385	auto NewCond = Replaced;
1386	if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1387	NCI->setName(OldCond->getName() + ".first_iter");
1388	}
1389	LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1390	<< " with " << *NewCond << "\n");
1391	assert(OldCond->hasOneUse() && "Must be!");
1392	OldCond->replaceAllUsesWith(V: NewCond);
1393	DeadInsts.push_back(Elt: OldCond);
1394	// Make sure we no longer consider this condition as failing on last
1395	// iteration.
1396	ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1397	}
1398	}
1399	return Changed;
1400	}
1401
1402	bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1403	// Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1404	// We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1405	// never reaches the icmp since the zext doesn't fold to an AddRec unless
1406	// it already has flags. The alternative to this would be to extending the
1407	// set of "interesting" IV users to include the icmp, but doing that
1408	// regresses results in practice by querying SCEVs before trip counts which
1409	// rely on them which results in SCEV caching sub-optimal answers. The
1410	// concern about caching sub-optimal results is why we only query SCEVs of
1411	// the loop invariant RHS here.
1412	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1413	L->getExitingBlocks(ExitingBlocks);
1414	bool Changed = false;
1415	for (auto *ExitingBB : ExitingBlocks) {
1416	auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1417	if (!BI)
1418	continue;
1419	assert(BI->isConditional() && "exit branch must be conditional");
1420
1421	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1422	if (!ICmp \|\| !ICmp->hasOneUse())
1423	continue;
1424
1425	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1426	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1427	// For the range reasoning, avoid computing SCEVs in the loop to avoid
1428	// poisoning cache with sub-optimal results. For the must-execute case,
1429	// this is a neccessary precondition for correctness.
1430	if (!L->isLoopInvariant(V: RHS)) {
1431	if (!L->isLoopInvariant(V: LHS))
1432	continue;
1433	// Same logic applies for the inverse case
1434	std::swap(a&: LHS, b&: RHS);
1435	}
1436
1437	// Match (icmp signed-cond zext, RHS)
1438	Value LHSOp = nullptr*;
1439	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) \|\| !ICmp->isSigned())
1440	continue;
1441
1442	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1443	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1444	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1445	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1446	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1447	if (FullCR.contains(CR: RHSCR)) {
1448	// We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1449	// replace the signed condition with the unsigned version.
1450	ICmp->setPredicate(ICmp->getUnsignedPredicate());
1451	Changed = true;
1452	// Note: No SCEV invalidation needed. We've changed the predicate, but
1453	// have not changed exit counts, or the values produced by the compare.
1454	continue;
1455	}
1456	}
1457
1458	// Now that we've canonicalized the condition to match the extend,
1459	// see if we can rotate the extend out of the loop.
1460	for (auto *ExitingBB : ExitingBlocks) {
1461	auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1462	if (!BI)
1463	continue;
1464	assert(BI->isConditional() && "exit branch must be conditional");
1465
1466	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1467	if (!ICmp \|\| !ICmp->hasOneUse() \|\| !ICmp->isUnsigned())
1468	continue;
1469
1470	bool Swapped = false;
1471	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1472	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1473	if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1474	// Nothing to rotate
1475	continue;
1476	if (L->isLoopInvariant(V: LHS)) {
1477	// Same logic applies for the inverse case until we actually pick
1478	// which operand of the compare to update.
1479	Swapped = true;
1480	std::swap(a&: LHS, b&: RHS);
1481	}
1482	assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1483
1484	// Match (icmp unsigned-cond zext, RHS)
1485	// TODO: Extend to handle corresponding sext/signed-cmp case
1486	// TODO: Extend to other invertible functions
1487	Value LHSOp = nullptr*;
1488	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1489	continue;
1490
1491	// In general, we only rotate if we can do so without increasing the number
1492	// of instructions. The exception is when we have an zext(add-rec). The
1493	// reason for allowing this exception is that we know we need to get rid
1494	// of the zext for SCEV to be able to compute a trip count for said loops;
1495	// we consider the new trip count valuable enough to increase instruction
1496	// count by one.
1497	if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1498	continue;
1499
1500	// Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1501	// replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1502	// when zext is loop varying and RHS is loop invariant. This converts
1503	// loop varying work to loop-invariant work.
1504	auto doRotateTransform = [&]() {
1505	assert(ICmp->isUnsigned() && "must have proven unsigned already");
1506	auto *NewRHS = CastInst::Create(
1507	Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1508	InsertBefore: L->getLoopPreheader()->getTerminator()->getIterator());
1509	// NewRHS is an operation that has been hoisted out of the loop, and
1510	// therefore should have a dropped location.
1511	NewRHS->setDebugLoc(DebugLoc::getDropped());
1512	ICmp->setOperand(i_nocapture: Swapped ? `1` : `0`, Val_nocapture: LHSOp);
1513	ICmp->setOperand(i_nocapture: Swapped ? `0` : `1`, Val_nocapture: NewRHS);
1514	// Samesign flag cannot be preserved after narrowing the compare.
1515	ICmp->setSameSign(false);
1516	if (LHS->use_empty())
1517	DeadInsts.push_back(Elt: LHS);
1518	};
1519
1520	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1521	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1522	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1523	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1524	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1525	if (FullCR.contains(CR: RHSCR)) {
1526	doRotateTransform ();
1527	Changed = true;
1528	// Note, we are leaving SCEV in an unfortunately imprecise case here
1529	// as rotation tends to reveal information about trip counts not
1530	// previously visible.
1531	continue;
1532	}
1533	}
1534
1535	return Changed;
1536	}
1537
1538	bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1539	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1540	L->getExitingBlocks(ExitingBlocks);
1541
1542	// Remove all exits which aren't both rewriteable and execute on every
1543	// iteration.
1544	llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1545	// If our exitting block exits multiple loops, we can only rewrite the
1546	// innermost one. Otherwise, we're changing how many times the innermost
1547	// loop runs before it exits.
1548	if (LI->getLoopFor(BB: ExitingBB) != L)
1549	return true;
1550
1551	// Can't rewrite non-branch yet.
1552	BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1553	if (!BI)
1554	return true;
1555
1556	// Likewise, the loop latch must be dominated by the exiting BB.
1557	if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1558	return true;
1559
1560	if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1561	// If already constant, nothing to do. However, if this is an
1562	// unconditional exit, we can still replace header phis with their
1563	// preheader value.
1564	if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1565	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1566	return true;
1567	}
1568
1569	return false;
1570	});
1571
1572	if (ExitingBlocks.empty())
1573	return false;
1574
1575	// Get a symbolic upper bound on the loop backedge taken count.
1576	const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1577	if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1578	return false;
1579
1580	// Visit our exit blocks in order of dominance. We know from the fact that
1581	// all exits must dominate the latch, so there is a total dominance order
1582	// between them.
1583	llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock A, BasicBlock B) {
1584	// std::sort sorts in ascending order, so we want the inverse of
1585	// the normal dominance relation.
1586	if (A == B) return false;
1587	if (DT->properlyDominates(A, B))
1588	return true;
1589	else {
1590	assert(DT->properlyDominates(B, A) &&
1591	"expected total dominance order!");
1592	return false;
1593	}
1594	});
1595	#ifdef ASSERT
1596	for (unsigned i = `1`; i < ExitingBlocks.size(); i++) {
1597	assert(DT->dominates(ExitingBlocks[i-`1`], ExitingBlocks[i]));
1598	}
1599	#endif
1600
1601	bool Changed = false;
1602	bool SkipLastIter = false;
1603	const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1604	auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1605	if (SkipLastIter \|\| isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1606	return;
1607	if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1608	CurrMaxExit = MaxExitCount;
1609	else
1610	CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1611	// If the loop has more than 1 iteration, all further checks will be
1612	// executed 1 iteration less.
1613	if (CurrMaxExit == MaxBECount)
1614	SkipLastIter = true;
1615	};
1616	SmallSet<const SCEV *, `8`> DominatingExactExitCounts;
1617	for (BasicBlock *ExitingBB : ExitingBlocks) {
1618	const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1619	const SCEV *MaxExitCount = SE->getExitCount(
1620	L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1621	if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1622	// Okay, we do not know the exit count here. Can we at least prove that it
1623	// will remain the same within iteration space?
1624	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1625	auto OptimizeCond = [&](bool SkipLastIter) {
1626	return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1627	MaxIter: MaxBECount, SkipLastIter,
1628	SE, Rewriter, DeadInsts);
1629	};
1630
1631	// TODO: We might have proved that we can skip the last iteration for
1632	// this check. In this case, we only want to check the condition on the
1633	// pre-last iteration (MaxBECount - 1). However, there is a nasty
1634	// corner case:
1635	//
1636	// for (i = len; i != 0; i--) { ... check (i ult X) ... }
1637	//
1638	// If we could not prove that len != 0, then we also could not prove that
1639	// (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1640	// OptimizeCond will likely not prove anything for it, even if it could
1641	// prove the same fact for len.
1642	//
1643	// As a temporary solution, we query both last and pre-last iterations in
1644	// hope that we will be able to prove triviality for at least one of
1645	// them. We can stop querying MaxBECount for this case once SCEV
1646	// understands that (MaxBECount - 1) will not overflow here.
1647	if (OptimizeCond (false))
1648	Changed = true;
1649	else if (SkipLastIter && OptimizeCond (true))
1650	Changed = true;
1651	UpdateSkipLastIter (MaxExitCount);
1652	continue;
1653	}
1654
1655	UpdateSkipLastIter (ExactExitCount);
1656
1657	// If we know we'd exit on the first iteration, rewrite the exit to
1658	// reflect this. This does not imply the loop must exit through this
1659	// exit; there may be an earlier one taken on the first iteration.
1660	// We know that the backedge can't be taken, so we replace all
1661	// the header PHIs with values coming from the preheader.
1662	if (ExactExitCount->isZero()) {
1663	foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1664	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1665	Changed = true;
1666	continue;
1667	}
1668
1669	assert(ExactExitCount->getType()->isIntegerTy() &&
1670	MaxBECount->getType()->isIntegerTy() &&
1671	"Exit counts must be integers");
1672
1673	Type *WiderType =
1674	SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1675	ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1676	MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1677	assert(MaxBECount->getType() == ExactExitCount->getType());
1678
1679	// Can we prove that some other exit must be taken strictly before this
1680	// one?
1681	if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1682	RHS: ExactExitCount)) {
1683	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1684	Changed = true;
1685	continue;
1686	}
1687
1688	// As we run, keep track of which exit counts we've encountered. If we
1689	// find a duplicate, we've found an exit which would have exited on the
1690	// exiting iteration, but (from the visit order) strictly follows another
1691	// which does the same and is thus dead.
1692	if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1693	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1694	Changed = true;
1695	continue;
1696	}
1697
1698	// TODO: There might be another oppurtunity to leverage SCEV's reasoning
1699	// here. If we kept track of the min of dominanting exits so far, we could
1700	// discharge exits with EC >= MDEC. This is less powerful than the existing
1701	// transform (since later exits aren't considered), but potentially more
1702	// powerful for any case where SCEV can prove a >=u b, but neither a == b
1703	// or a >u b. Such a case is not currently known.
1704	}
1705	return Changed;
1706	}
1707
1708	bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1709	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1710	L->getExitingBlocks(ExitingBlocks);
1711
1712	// Finally, see if we can rewrite our exit conditions into a loop invariant
1713	// form. If we have a read-only loop, and we can tell that we must exit down
1714	// a path which does not need any of the values computed within the loop, we
1715	// can rewrite the loop to exit on the first iteration. Note that this
1716	// doesn't either a) tell us the loop exits on the first iteration (unless
1717	// all* exits are predicateable) or b) tell us which exit might be taken.*
1718	// This transformation looks a lot like a restricted form of dead loop
1719	// elimination, but restricted to read-only loops and without neccesssarily
1720	// needing to kill the loop entirely.
1721	if (!LoopPredication)
1722	return false;
1723
1724	// Note: ExactBTC is the exact backedge taken count iff* the loop exits*
1725	// through explicit* control flow. We have to eliminate the possibility of*
1726	// implicit exits (see below) before we know it's truly exact.
1727	const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1728	if (isa<SCEVCouldNotCompute>(Val: ExactBTC) \|\| !Rewriter.isSafeToExpand(S: ExactBTC))
1729	return false;
1730
1731	assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1732	assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1733
1734	auto BadExit = [&](BasicBlock *ExitingBB) {
1735	// If our exiting block exits multiple loops, we can only rewrite the
1736	// innermost one. Otherwise, we're changing how many times the innermost
1737	// loop runs before it exits.
1738	if (LI->getLoopFor(BB: ExitingBB) != L)
1739	return true;
1740
1741	// Can't rewrite non-branch yet.
1742	BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1743	if (!BI)
1744	return true;
1745
1746	// If already constant, nothing to do.
1747	if (isa<Constant>(Val: BI->getCondition()))
1748	return true;
1749
1750	// If the exit block has phis, we need to be able to compute the values
1751	// within the loop which contains them. This assumes trivially lcssa phis
1752	// have already been removed; TODO: generalize
1753	BasicBlock *ExitBlock =
1754	BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`);
1755	if (!ExitBlock->phis().empty())
1756	return true;
1757
1758	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1759	if (isa<SCEVCouldNotCompute>(Val: ExitCount) \|\|
1760	!Rewriter.isSafeToExpand(S: ExitCount))
1761	return true;
1762
1763	assert(SE->isLoopInvariant(ExitCount, L) &&
1764	"Exit count must be loop invariant");
1765	assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1766	return false;
1767	};
1768
1769	// Make sure all exits dominate the latch. This means there is a linear chain
1770	// of exits. We check this before sorting so we have a total order.
1771	BasicBlock *Latch = L->getLoopLatch();
1772	for (BasicBlock *ExitingBB : ExitingBlocks)
1773	if (!DT->dominates(A: ExitingBB, B: Latch))
1774	return false;
1775
1776	// If we have any exits which can't be predicated themselves, than we can't
1777	// predicate any exit which isn't guaranteed to execute before it. Consider
1778	// two exits (a) and (b) which would both exit on the same iteration. If we
1779	// can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1780	// we could convert a loop from exiting through (a) to one exiting through
1781	// (b). Note that this problem exists only for exits with the same exit
1782	// count, and we could be more aggressive when exit counts are known inequal.
1783	llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock A, BasicBlock B) {
1784	// llvm::sort sorts in ascending order, so we want the inverse of
1785	// the normal dominance relation.
1786	if (A == B)
1787	return false;
1788	if (DT->properlyDominates(A, B))
1789	return true;
1790	if (DT->properlyDominates(A: B, B: A))
1791	return false;
1792	llvm_unreachable("Should have total dominance order");
1793	});
1794
1795	// Make sure our exit blocks are really a total order (i.e. a linear chain of
1796	// exits before the backedge).
1797	for (unsigned i = `1`; i < ExitingBlocks.size(); i++)
1798	assert(DT->dominates(ExitingBlocks[i - `1`], ExitingBlocks[i]) &&
1799	"Not sorted by dominance");
1800
1801	// Given our sorted total order, we know that exit[j] must be evaluated
1802	// after all exit[i] such j > i.
1803	for (unsigned i = `0`, e = ExitingBlocks.size(); i < e; i++)
1804	if (BadExit (ExitingBlocks [i])) {
1805	ExitingBlocks.resize(N: i);
1806	break;
1807	}
1808
1809	if (ExitingBlocks.empty())
1810	return false;
1811
1812	// At this point, ExitingBlocks consists of only those blocks which are
1813	// predicatable. Given that, we know we have at least one exit we can
1814	// predicate if the loop is doesn't have side effects and doesn't have any
1815	// implicit exits (because then our exact BTC isn't actually exact).
1816	// @Reviewers - As structured, this is O(I^2) for loop nests. Any
1817	// suggestions on how to improve this? I can obviously bail out for outer
1818	// loops, but that seems less than ideal. MemorySSA can find memory writes,
1819	// is that enough for all* side effects?*
1820	for (BasicBlock *BB : L->blocks())
1821	for (auto &I : *BB)
1822	// TODO:isGuaranteedToTransfer
1823	if (I.mayHaveSideEffects())
1824	return false;
1825
1826	bool Changed = false;
1827	// Finally, do the actual predication for all predicatable blocks. A couple
1828	// of notes here:
1829	// 1) We don't bother to constant fold dominated exits with identical exit
1830	// counts; that's simply a form of CSE/equality propagation and we leave
1831	// it for dedicated passes.
1832	// 2) We insert the comparison at the branch. Hoisting introduces additional
1833	// legality constraints and we leave that to dedicated logic. We want to
1834	// predicate even if we can't insert a loop invariant expression as
1835	// peeling or unrolling will likely reduce the cost of the otherwise loop
1836	// varying check.
1837	Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1838	IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1839	Value ExactBTCV = nullptr; // Lazily generated if needed.*
1840	for (BasicBlock *ExitingBB : ExitingBlocks) {
1841	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1842
1843	auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1844	Value *NewCond;
1845	if (ExitCount == ExactBTC) {
1846	NewCond = L->contains(BB: BI->getSuccessor(i: `0`)) ?
1847	B.getFalse() : B.getTrue();
1848	} else {
1849	Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
1850	if (!ExactBTCV)
1851	ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
1852	Value *RHS = ExactBTCV;
1853	if (ECV->getType() != RHS->getType()) {
1854	Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
1855	ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
1856	RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
1857	}
1858	auto Pred = L->contains(BB: BI->getSuccessor(i: `0`)) ?
1859	ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1860	NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
1861	}
1862	Value *OldCond = BI->getCondition();
1863	BI->setCondition(NewCond);
1864	if (OldCond->use_empty())
1865	DeadInsts.emplace_back(Args&: OldCond);
1866	Changed = true;
1867	RunUnswitching = true;
1868	}
1869
1870	return Changed;
1871	}
1872
1873	//===----------------------------------------------------------------------===//
1874	// IndVarSimplify driver. Manage several subpasses of IV simplification.
1875	//===----------------------------------------------------------------------===//
1876
1877	bool IndVarSimplify::run(Loop *L) {
1878	// We need (and expect!) the incoming loop to be in LCSSA.
1879	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
1880	"LCSSA required to run indvars!");
1881
1882	// If LoopSimplify form is not available, stay out of trouble. Some notes:
1883	// - LSR currently only supports LoopSimplify-form loops. Indvars'
1884	// canonicalization can be a pessimization without LSR to "clean up"
1885	// afterwards.
1886	// - We depend on having a preheader; in particular,
1887	// Loop::getCanonicalInductionVariable only supports loops with preheaders,
1888	// and we're in trouble if we can't find the induction variable even when
1889	// we've manually inserted one.
1890	// - LFTR relies on having a single backedge.
1891	if (!L->isLoopSimplifyForm())
1892	return false;
1893
1894	bool Changed = false;
1895	// If there are any floating-point recurrences, attempt to
1896	// transform them to use integer recurrences.
1897	Changed \|= rewriteNonIntegerIVs(L);
1898
1899	// Create a rewriter object which we'll use to transform the code with.
1900	SCEVExpander Rewriter(*SE, DL, "indvars");
1901	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1902	Rewriter.setDebugType(DEBUG_TYPE);
1903	#endif
1904
1905	// Eliminate redundant IV users.
1906	//
1907	// Simplification works best when run before other consumers of SCEV. We
1908	// attempt to avoid evaluating SCEVs for sign/zero extend operations until
1909	// other expressions involving loop IVs have been evaluated. This helps SCEV
1910	// set no-wrap flags before normalizing sign/zero extension.
1911	Rewriter.disableCanonicalMode();
1912	Changed \|= simplifyAndExtend(L, Rewriter, LI);
1913
1914	// Check to see if we can compute the final value of any expressions
1915	// that are recurrent in the loop, and substitute the exit values from the
1916	// loop into any instructions outside of the loop that use the final values
1917	// of the current expressions.
1918	if (ReplaceExitValue != NeverRepl) {
1919	if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1920	ReplaceExitValue, DeadInsts)) {
1921	NumReplaced += Rewrites;
1922	Changed = true;
1923	}
1924	}
1925
1926	// Eliminate redundant IV cycles.
1927	NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1928
1929	// Try to convert exit conditions to unsigned and rotate computation
1930	// out of the loop. Note: Handles invalidation internally if needed.
1931	Changed \|= canonicalizeExitCondition(L);
1932
1933	// Try to eliminate loop exits based on analyzeable exit counts
1934	if (optimizeLoopExits(L, Rewriter)) {
1935	Changed = true;
1936	// Given we've changed exit counts, notify SCEV
1937	// Some nested loops may share same folded exit basic block,
1938	// thus we need to notify top most loop.
1939	SE->forgetTopmostLoop(L);
1940	}
1941
1942	// Try to form loop invariant tests for loop exits by changing how many
1943	// iterations of the loop run when that is unobservable.
1944	if (predicateLoopExits(L, Rewriter)) {
1945	Changed = true;
1946	// Given we've changed exit counts, notify SCEV
1947	SE->forgetLoop(L);
1948	}
1949
1950	// If we have a trip count expression, rewrite the loop's exit condition
1951	// using it.
1952	if (!DisableLFTR) {
1953	BasicBlock *PreHeader = L->getLoopPreheader();
1954
1955	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1956	L->getExitingBlocks(ExitingBlocks);
1957	for (BasicBlock *ExitingBB : ExitingBlocks) {
1958	// Can't rewrite non-branch yet.
1959	if (!isa<BranchInst>(Val: ExitingBB->getTerminator()))
1960	continue;
1961
1962	// If our exitting block exits multiple loops, we can only rewrite the
1963	// innermost one. Otherwise, we're changing how many times the innermost
1964	// loop runs before it exits.
1965	if (LI->getLoopFor(BB: ExitingBB) != L)
1966	continue;
1967
1968	if (!needsLFTR(L, ExitingBB))
1969	continue;
1970
1971	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1972	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1973	continue;
1974
1975	// This was handled above, but as we form SCEVs, we can sometimes refine
1976	// existing ones; this allows exit counts to be folded to zero which
1977	// weren't when optimizeLoopExits saw them. Arguably, we should iterate
1978	// until stable to handle cases like this better.
1979	if (ExitCount->isZero())
1980	continue;
1981
1982	PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
1983	if (!IndVar)
1984	continue;
1985
1986	// Avoid high cost expansions. Note: This heuristic is questionable in
1987	// that our definition of "high cost" is not exactly principled.
1988	if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
1989	TTI, At: PreHeader->getTerminator()))
1990	continue;
1991
1992	if (!Rewriter.isSafeToExpand(S: ExitCount))
1993	continue;
1994
1995	Changed \|= linearFunctionTestReplace(L, ExitingBB,
1996	ExitCount, IndVar,
1997	Rewriter);
1998	}
1999	}
2000	// Clear the rewriter cache, because values that are in the rewriter's cache
2001	// can be deleted in the loop below, causing the AssertingVH in the cache to
2002	// trigger.
2003	Rewriter.clear();
2004
2005	// Now that we're done iterating through lists, clean up any instructions
2006	// which are now dead.
2007	while (!DeadInsts.empty()) {
2008	Value *V = DeadInsts.pop_back_val();
2009
2010	if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2011	Changed \|= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2012	else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2013	Changed \|=
2014	RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2015	}
2016
2017	// The Rewriter may not be used from this point on.
2018
2019	// Loop-invariant instructions in the preheader that aren't used in the
2020	// loop may be sunk below the loop to reduce register pressure.
2021	Changed \|= sinkUnusedInvariants(L);
2022
2023	// rewriteFirstIterationLoopExitValues does not rely on the computation of
2024	// trip count and therefore can further simplify exit values in addition to
2025	// rewriteLoopExitValues.
2026	Changed \|= rewriteFirstIterationLoopExitValues(L);
2027
2028	// Clean up dead instructions.
2029	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2030
2031	// Check a post-condition.
2032	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
2033	"Indvars did not preserve LCSSA!");
2034	if (VerifyMemorySSA && MSSAU)
2035	MSSAU ->getMemorySSA()->verifyMemorySSA();
2036
2037	return Changed;
2038	}
2039
2040	PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2041	LoopStandardAnalysisResults &AR,
2042	LPMUpdater &) {
2043	Function *F = L.getHeader()->getParent();
2044	const DataLayout &DL = F->getDataLayout();
2045
2046	IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2047	WidenIndVars && AllowIVWidening);
2048	if (!IVS.run(L: &L))
2049	return PreservedAnalyses::all();
2050
2051	auto PA = getLoopPassPreservedAnalyses();
2052	PA.preserveSet<CFGAnalyses>();
2053	if (IVS.runUnswitching()) {
2054	AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(IR&: L, ExtraArgs&: AR);
2055	PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2056	}
2057
2058	if (AR.MSSA)
2059	PA.preserve<MemorySSAAnalysis>();
2060	return PA;
2061	}
2062

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp