IndVarSimplify.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp]

1	//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into simpler forms suitable for subsequent
11	// analysis and transformation.
12	//
13	// If the trip count of a loop is computable, this pass also makes the following
14	// changes:
15	// 1. The exit condition for the loop is canonicalized to compare the
16	// induction value against the exit value. This turns loops like:
17	// 'for (i = 7; ii < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'*
18	// 2. Any use outside of the loop of an expression derived from the indvar
19	// is changed to compute the derived value outside of the loop, eliminating
20	// the dependence on the exit value of the induction variable. If the only
21	// purpose of the loop is to compute the exit value of some derived
22	// expression, this transformation will make the loop dead.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27	#include "llvm/ADT/APFloat.h"
28	#include "llvm/ADT/ArrayRef.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/SmallPtrSet.h"
31	#include "llvm/ADT/SmallVector.h"
32	#include "llvm/ADT/Statistic.h"
33	#include "llvm/ADT/iterator_range.h"
34	#include "llvm/Analysis/LoopInfo.h"
35	#include "llvm/Analysis/LoopPass.h"
36	#include "llvm/Analysis/MemorySSA.h"
37	#include "llvm/Analysis/MemorySSAUpdater.h"
38	#include "llvm/Analysis/ScalarEvolution.h"
39	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
40	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
41	#include "llvm/Analysis/TargetLibraryInfo.h"
42	#include "llvm/Analysis/TargetTransformInfo.h"
43	#include "llvm/Analysis/ValueTracking.h"
44	#include "llvm/IR/BasicBlock.h"
45	#include "llvm/IR/Constant.h"
46	#include "llvm/IR/ConstantRange.h"
47	#include "llvm/IR/Constants.h"
48	#include "llvm/IR/DataLayout.h"
49	#include "llvm/IR/DerivedTypes.h"
50	#include "llvm/IR/Dominators.h"
51	#include "llvm/IR/Function.h"
52	#include "llvm/IR/IRBuilder.h"
53	#include "llvm/IR/InstrTypes.h"
54	#include "llvm/IR/Instruction.h"
55	#include "llvm/IR/Instructions.h"
56	#include "llvm/IR/IntrinsicInst.h"
57	#include "llvm/IR/Intrinsics.h"
58	#include "llvm/IR/PassManager.h"
59	#include "llvm/IR/PatternMatch.h"
60	#include "llvm/IR/Type.h"
61	#include "llvm/IR/Use.h"
62	#include "llvm/IR/User.h"
63	#include "llvm/IR/Value.h"
64	#include "llvm/IR/ValueHandle.h"
65	#include "llvm/Support/Casting.h"
66	#include "llvm/Support/CommandLine.h"
67	#include "llvm/Support/Debug.h"
68	#include "llvm/Support/MathExtras.h"
69	#include "llvm/Support/raw_ostream.h"
70	#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
72	#include "llvm/Transforms/Utils/Local.h"
73	#include "llvm/Transforms/Utils/LoopUtils.h"
74	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75	#include "llvm/Transforms/Utils/SimplifyIndVar.h"
76	#include <cassert>
77	#include <cstdint>
78	#include <utility>
79
80	using namespace llvm;
81	using namespace PatternMatch;
82	using namespace SCEVPatternMatch;
83
84	#define DEBUG_TYPE "indvars"
85
86	STATISTIC(NumWidened , "Number of indvars widened");
87	STATISTIC(NumReplaced , "Number of exit values replaced");
88	STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89	STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90	STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
92	static cl::opt<ReplaceExitVal> ReplaceExitValue(
93	"replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
94	cl::desc ("Choose the strategy to replace exit value in IndVarSimplify"),
95	cl::values(
96	clEnumValN(NeverRepl, "never", "never replace exit value"),
97	clEnumValN(OnlyCheapRepl, "cheap",
98	"only replace exit value when the cost is cheap"),
99	clEnumValN(
100	UnusedIndVarInLoop, "unusedindvarinloop",
101	"only replace exit value when it is an unused "
102	"induction variable in the loop and has cheap replacement cost"),
103	clEnumValN(NoHardUse, "noharduse",
104	"only replace exit values when loop def likely dead"),
105	clEnumValN(AlwaysRepl, "always",
106	"always replace exit value whenever possible")));
107
108	static cl::opt<bool> UsePostIncrementRanges(
109	"indvars-post-increment-ranges", cl::Hidden,
110	cl::desc ("Use post increment control-dependent ranges in IndVarSimplify"),
111	cl::init(Val: true));
112
113	static cl::opt<bool>
114	DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
115	cl::desc ("Disable Linear Function Test Replace optimization"));
116
117	static cl::opt<bool>
118	LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
119	cl::desc ("Predicate conditions in read only loops"));
120
121	static cl::opt<bool> LoopPredicationTraps(
122	"indvars-predicate-loop-traps", cl::Hidden, cl::init(Val: true),
123	cl::desc ("Predicate conditions that trap in loops with only local writes"));
124
125	static cl::opt<bool>
126	AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
127	cl::desc ("Allow widening of indvars to eliminate s/zext"));
128
129	namespace {
130
131	class IndVarSimplify {
132	LoopInfo *LI;
133	ScalarEvolution *SE;
134	DominatorTree *DT;
135	const DataLayout &DL;
136	TargetLibraryInfo *TLI;
137	const TargetTransformInfo *TTI;
138	std::unique_ptr<MemorySSAUpdater> MSSAU;
139
140	SmallVector<WeakTrackingVH, `16`> DeadInsts;
141	bool WidenIndVars;
142
143	bool RunUnswitching = false;
144
145	bool handleFloatingPointIV(Loop L, PHINode PH);
146	bool rewriteNonIntegerIVs(Loop *L);
147
148	bool simplifyAndExtend(Loop L, SCEVExpander &Rewriter, LoopInfo LI);
149	/// Try to improve our exit conditions by converting condition from signed
150	/// to unsigned or rotating computation out of the loop.
151	/// (See inline comment about why this is duplicated from simplifyAndExtend)
152	bool canonicalizeExitCondition(Loop *L);
153	/// Try to eliminate loop exits based on analyzeable exit counts
154	bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155	/// Try to form loop invariant tests for loop exits by changing how many
156	/// iterations of the loop run when that is unobservable.
157	bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159	bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161	bool linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
162	const SCEV *ExitCount,
163	PHINode *IndVar, SCEVExpander &Rewriter);
164
165	bool sinkUnusedInvariants(Loop *L);
166
167	public:
168	IndVarSimplify(LoopInfo LI, ScalarEvolution SE, DominatorTree *DT,
169	const DataLayout &DL, TargetLibraryInfo *TLI,
170	TargetTransformInfo TTI, MemorySSA MSSA, bool WidenIndVars)
171	: LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172	WidenIndVars(WidenIndVars) {
173	if (MSSA)
174	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
175	}
176
177	bool run(Loop *L);
178
179	bool runUnswitching() const { return RunUnswitching; }
180	};
181
182	} // end anonymous namespace
183
184	//===----------------------------------------------------------------------===//
185	// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186	//===----------------------------------------------------------------------===//
187
188	/// Convert APF to an integer, if possible.
189	static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190	bool isExact = false;
191	// See if we can convert this to an int64_t
192	uint64_t UIntVal;
193	if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: `64`, IsSigned: true,
194	RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK \|\|
195	!isExact)
196	return false;
197	IntVal = UIntVal;
198	return true;
199	}
200
201	/// Ensure we stay within the bounds of fp values that can be represented as
202	/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
203	/// double precision respectively (both on negative and positive side).
204	static bool isRepresentableAsExactInteger(const APFloat &FPVal,
205	int64_t IntVal) {
206	const auto &FltSema = FPVal.getSemantics();
207	if (!APFloat::isIEEELikeFP(FltSema))
208	return false;
209	return isUIntN(N: APFloat::semanticsPrecision(FltSema), x: AbsoluteValue(X: IntVal));
210	}
211
212	/// Represents a floating-point induction variable pattern that may be
213	/// convertible to integer form.
214	struct FloatingPointIV {
215	APFloat InitValue;
216	APFloat IncrValue;
217	APFloat ExitValue;
218	FCmpInst *Compare;
219	BinaryOperator *Add;
220
221	FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare,
222	BinaryOperator *Add)
223	: InitValue (std::move(Init)), IncrValue (std::move(Incr)),
224	ExitValue (std::move(Exit)), Compare(Compare), Add(Add) {}
225	};
226
227	/// Represents the integer values for a converted IV.
228	struct IntegerIV {
229	int64_t InitValue;
230	int64_t IncrValue;
231	int64_t ExitValue;
232	CmpInst::Predicate NewPred;
233	};
234
235	static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred) {
236	switch (FPPred) {
237	case CmpInst::FCMP_OEQ:
238	case CmpInst::FCMP_UEQ:
239	return CmpInst::ICMP_EQ;
240	case CmpInst::FCMP_ONE:
241	case CmpInst::FCMP_UNE:
242	return CmpInst::ICMP_NE;
243	case CmpInst::FCMP_OGT:
244	case CmpInst::FCMP_UGT:
245	return CmpInst::ICMP_SGT;
246	case CmpInst::FCMP_OGE:
247	case CmpInst::FCMP_UGE:
248	return CmpInst::ICMP_SGE;
249	case CmpInst::FCMP_OLT:
250	case CmpInst::FCMP_ULT:
251	return CmpInst::ICMP_SLT;
252	case CmpInst::FCMP_OLE:
253	case CmpInst::FCMP_ULE:
254	return CmpInst::ICMP_SLE;
255	default:
256	return CmpInst::BAD_ICMP_PREDICATE;
257	}
258	}
259
260	/// Analyze a PN to determine whether it represents a simple floating-point
261	/// induction variable, with constant fp init, increment, and exit values.
262	///
263	/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
264	static std::optional<FloatingPointIV>
265	maybeFloatingPointRecurrence(Loop L, PHINode PN) {
266	// Identify incoming and backedge for the PN.
267	unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: `0`));
268	unsigned BackEdge = IncomingEdge ^ `1`;
269
270	// Check incoming value.
271	auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
272	if (!InitValueVal)
273	return std::nullopt;
274
275	// Check IV increment. Reject this PN if increment operation is not
276	// an add or increment value can not be represented by an integer.
277	auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
278	if (!Incr \|\| Incr->getOpcode() != Instruction::FAdd)
279	return std::nullopt;
280
281	// If this is not an add of the PHI with a constantfp, or if the constant fp
282	// is not an integer, bail out.
283	auto *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: `1`));
284	if (!IncValueVal \|\| Incr->getOperand(i_nocapture: `0`) != PN)
285	return std::nullopt;
286
287	// Check Incr uses. One user is PN and the other user is an exit condition
288	// used by the conditional terminator.
289	// TODO: Should relax this, so as to allow any `fpext` that may occur.
290	if (!Incr->hasNUses(N: `2`))
291	return std::nullopt;
292
293	// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
294	// only used by a branch, we can't transform it.
295	auto It = llvm::find_if(Range: Incr->users(),
296	P: [](const User U) { return* isa<FCmpInst>(Val: U); });
297	if (It == Incr->users().end())
298	return std::nullopt;
299
300	FCmpInst Compare = cast<FCmpInst>(Val: It);
301	if (!Compare->hasOneUse())
302	return std::nullopt;
303
304	// We need to verify that the branch actually controls the iteration count
305	// of the loop. If not, the new IV can overflow and no one will notice.
306	// The branch block must be in the loop and one of the successors must be out
307	// of the loop.
308	auto *BI = dyn_cast<CondBrInst>(Val: Compare->user_back());
309	if (!BI)
310	return std::nullopt;
311
312	if (!L->contains(BB: BI->getParent()) \|\|
313	(L->contains(BB: BI->getSuccessor(i: `0`)) && L->contains(BB: BI->getSuccessor(i: `1`))))
314	return std::nullopt;
315
316	// If it isn't a comparison with an integer-as-fp (the exit value), we can't
317	// transform it.
318	auto *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: `1`));
319	if (!ExitValueVal)
320	return std::nullopt;
321
322	return FloatingPointIV (InitValueVal->getValueAPF(),
323	IncValueVal->getValueAPF(),
324	ExitValueVal->getValueAPF(), Compare, Incr);
325	}
326
327	/// Ensure that the floating-point IV can be converted to a semantics-preserving
328	/// signed 32-bit integer IV.
329	///
330	/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
331	static std::optional<IntegerIV>
332	tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
333	// Convert floating-point predicate to integer.
334	auto NewPred = getIntegerPredicate(FPPred: FPIV.Compare->getPredicate());
335	if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
336	return std::nullopt;
337
338	// Convert APFloat values to signed integers.
339	int64_t InitValue, IncrValue, ExitValue;
340	if (!ConvertToSInt(APF: FPIV.InitValue, IntVal&: InitValue) \|\|
341	!ConvertToSInt(APF: FPIV.IncrValue, IntVal&: IncrValue) \|\|
342	!ConvertToSInt(APF: FPIV.ExitValue, IntVal&: ExitValue))
343	return std::nullopt;
344
345	// Bail out if integers cannot be represented exactly.
346	if (!isRepresentableAsExactInteger(FPVal: FPIV.InitValue, IntVal: InitValue) \|\|
347	!isRepresentableAsExactInteger(FPVal: FPIV.ExitValue, IntVal: ExitValue))
348	return std::nullopt;
349
350	// We convert the floating point induction variable to a signed i32 value if
351	// we can. This is only safe if the comparison will not overflow in a way that
352	// won't be trapped by the integer equivalent operations. Check for this now.
353	// TODO: We could use i64 if it is native and the range requires it.
354
355	// The start/stride/exit values must all fit in signed i32.
356	if (!isInt<`32`>(x: InitValue) \|\| !isInt<`32`>(x: IncrValue) \|\| !isInt<`32`>(x: ExitValue))
357	return std::nullopt;
358
359	// If not actually striding (add x, 0.0), avoid touching the code.
360	if (IncrValue == `0`)
361	return std::nullopt;
362
363	// Positive and negative strides have different safety conditions.
364	if (IncrValue > `0`) {
365	// If we have a positive stride, we require the init to be less than the
366	// exit value.
367	if (InitValue >= ExitValue)
368	return std::nullopt;
369
370	uint32_t Range = uint32_t(ExitValue - InitValue);
371	// Check for infinite loop, either:
372	// while (i <= Exit) or until (i > Exit)
373	if (NewPred == CmpInst::ICMP_SLE \|\| NewPred == CmpInst::ICMP_SGT) {
374	if (++Range == `0`)
375	return std::nullopt; // Range overflows.
376	}
377
378	unsigned Leftover = Range % uint32_t(IncrValue);
379
380	// If this is an equality comparison, we require that the strided value
381	// exactly land on the exit value, otherwise the IV condition will wrap
382	// around and do things the fp IV wouldn't.
383	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
384	Leftover != `0`)
385	return std::nullopt;
386
387	// If the stride would wrap around the i32 before exiting, we can't
388	// transform the IV.
389	if (Leftover != `0` && int32_t(ExitValue + IncrValue) < ExitValue)
390	return std::nullopt;
391	} else {
392	// If we have a negative stride, we require the init to be greater than the
393	// exit value.
394	if (InitValue <= ExitValue)
395	return std::nullopt;
396
397	uint32_t Range = uint32_t(InitValue - ExitValue);
398	// Check for infinite loop, either:
399	// while (i >= Exit) or until (i < Exit)
400	if (NewPred == CmpInst::ICMP_SGE \|\| NewPred == CmpInst::ICMP_SLT) {
401	if (++Range == `0`)
402	return std::nullopt; // Range overflows.
403	}
404
405	unsigned Leftover = Range % uint32_t(-IncrValue);
406
407	// If this is an equality comparison, we require that the strided value
408	// exactly land on the exit value, otherwise the IV condition will wrap
409	// around and do things the fp IV wouldn't.
410	if ((NewPred == CmpInst::ICMP_EQ \|\| NewPred == CmpInst::ICMP_NE) &&
411	Leftover != `0`)
412	return std::nullopt;
413
414	// If the stride would wrap around the i32 before exiting, we can't
415	// transform the IV.
416	if (Leftover != `0` && int32_t(ExitValue + IncrValue) > ExitValue)
417	return std::nullopt;
418	}
419
420	return IntegerIV{.InitValue: InitValue, .IncrValue: IncrValue, .ExitValue: ExitValue, .NewPred: NewPred};
421	}
422
423	/// Rewrite the floating-point IV as an integer IV.
424	static void canonicalizeToIntegerIV(Loop L, PHINode PN,
425	const FloatingPointIV &FPIV,
426	const IntegerIV &IIV,
427	const TargetLibraryInfo *TLI,
428	std::unique_ptr<MemorySSAUpdater> &MSSAU) {
429	unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: `0`));
430	unsigned BackEdge = IncomingEdge ^ `1`;
431
432	IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
433	auto *Incr = cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
434	auto *BI = cast<CondBrInst>(Val: FPIV.Compare->user_back());
435
436	LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
437	<< " Init: " << IIV.InitValue << "\n"
438	<< " Incr: " << IIV.IncrValue << "\n"
439	<< " Exit: " << IIV.ExitValue << "\n"
440	<< " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
441	<< "\n"
442	<< " Original PN: " << *PN << "\n");
443
444	// Insert new integer induction variable.
445	PHINode *NewPHI =
446	PHINode::Create(Ty: Int32Ty, NumReservedValues: `2`, NameStr: PN->getName() + ".int", InsertBefore: PN->getIterator());
447	NewPHI->addIncoming(V: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.InitValue),
448	BB: PN->getIncomingBlock(i: IncomingEdge));
449	NewPHI->setDebugLoc(PN->getDebugLoc());
450
451	Instruction *NewAdd = BinaryOperator::CreateAdd(
452	V1: NewPHI, V2: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.IncrValue),
453	Name: Incr->getName() + ".int", InsertBefore: Incr->getIterator());
454	NewAdd->setDebugLoc(Incr->getDebugLoc());
455	NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
456
457	ICmpInst NewCompare = new* ICmpInst (
458	BI->getIterator(), IIV.NewPred, NewAdd,
459	ConstantInt::getSigned(Ty: Int32Ty, V: IIV.ExitValue), FPIV.Compare->getName());
460	NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
461
462	// In the following deletions, PN may become dead and may be deleted.
463	// Use a WeakTrackingVH to observe whether this happens.
464	WeakTrackingVH WeakPH = PN;
465
466	// Delete the old floating point exit comparison. The branch starts using the
467	// new comparison.
468	NewCompare->takeName(V: FPIV.Compare);
469	FPIV.Compare->replaceAllUsesWith(V: NewCompare);
470	RecursivelyDeleteTriviallyDeadInstructions(V: FPIV.Compare, TLI, MSSAU: MSSAU.get());
471
472	// Delete the old floating point increment.
473	Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
474	RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
475
476	// If the FP induction variable still has uses, this is because something else
477	// in the loop uses its value. In order to canonicalize the induction
478	// variable, we chose to eliminate the IV and rewrite it in terms of an
479	// int->fp cast.
480	//
481	// We give preference to sitofp over uitofp because it is faster on most
482	// platforms.
483	if (WeakPH) {
484	Instruction Conv = new* SIToFPInst (NewPHI, PN->getType(), "indvar.conv",
485	PN->getParent()->getFirstInsertionPt());
486	Conv->setDebugLoc(PN->getDebugLoc());
487	PN->replaceAllUsesWith(V: Conv);
488	RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
489	}
490	}
491
492	/// If the loop has a floating induction variable, then insert corresponding
493	/// integer induction variable if possible. For example, the following:
494	/// for(double i = 0; i < 10000; ++i)
495	/// bar(i)
496	/// is converted into
497	/// for(int i = 0; i < 10000; ++i)
498	/// bar((double)i);
499	bool IndVarSimplify::handleFloatingPointIV(Loop L, PHINode PN) {
500	// See if the PN matches a floating-point IV pattern.
501	auto FPIV = maybeFloatingPointRecurrence(L, PN);
502	if (!FPIV)
503	return false;
504
505	// Can we safely convert the floating-point values to integer ones?
506	auto IIV = tryConvertToIntegerIV(FPIV: *FPIV);
507	if (!IIV)
508	return false;
509
510	// Perform the rewriting.
511	canonicalizeToIntegerIV(L, PN, FPIV: FPIV, IIV: IIV, TLI, MSSAU);
512	return true;
513	}
514
515	bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
516	// First step. Check to see if there are any floating-point recurrences.
517	// If there are, change them into integer recurrences, permitting analysis by
518	// the SCEV routines.
519	BasicBlock *Header = L->getHeader();
520
521	SmallVector<WeakTrackingVH, `8`> PHIs(llvm::make_pointer_range(Range: Header->phis()));
522
523	bool Changed = false;
524	for (WeakTrackingVH &PHI : PHIs)
525	if (PHINode PN = dyn_cast_or_null<PHINode>(Val: &PHI))
526	Changed \|= handleFloatingPointIV(L, PN);
527
528	// If the loop previously had floating-point IV, ScalarEvolution
529	// may not have been able to compute a trip count. Now that we've done some
530	// re-writing, the trip count may be computable.
531	if (Changed)
532	SE->forgetLoop(L);
533	return Changed;
534	}
535
536	//===---------------------------------------------------------------------===//
537	// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
538	// they will exit at the first iteration.
539	//===---------------------------------------------------------------------===//
540
541	/// Check to see if this loop has loop invariant conditions which lead to loop
542	/// exits. If so, we know that if the exit path is taken, it is at the first
543	/// loop iteration. This lets us predict exit values of PHI nodes that live in
544	/// loop header.
545	bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
546	// Verify the input to the pass is already in LCSSA form.
547	assert(L->isLCSSAForm(*DT));
548
549	SmallVector<BasicBlock *, `8`> ExitBlocks;
550	L->getUniqueExitBlocks(ExitBlocks);
551
552	bool MadeAnyChanges = false;
553	for (auto *ExitBB : ExitBlocks) {
554	// If there are no more PHI nodes in this exit block, then no more
555	// values defined inside the loop are used on this path.
556	for (PHINode &PN : ExitBB->phis()) {
557	for (unsigned IncomingValIdx = `0`, E = PN.getNumIncomingValues();
558	IncomingValIdx != E; ++IncomingValIdx) {
559	auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
560
561	// Can we prove that the exit must run on the first iteration if it
562	// runs at all? (i.e. early exits are fine for our purposes, but
563	// traces which lead to this exit being taken on the 2nd iteration
564	// aren't.) Note that this is about whether the exit branch is
565	// executed, not about whether it is taken.
566	if (!L->getLoopLatch() \|\|
567	!DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
568	continue;
569
570	// Get condition that leads to the exit path.
571	auto *TermInst = IncomingBB->getTerminator();
572
573	Value Cond = nullptr*;
574	if (auto *BI = dyn_cast<CondBrInst>(Val: TermInst)) {
575	// Must be a conditional branch, otherwise the block
576	// should not be in the loop.
577	Cond = BI->getCondition();
578	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
579	Cond = SI->getCondition();
580	else
581	continue;
582
583	if (!L->isLoopInvariant(V: Cond))
584	continue;
585
586	auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
587
588	// Only deal with PHIs in the loop header.
589	if (!ExitVal \|\| ExitVal->getParent() != L->getHeader())
590	continue;
591
592	// If ExitVal is a PHI on the loop header, then we know its
593	// value along this exit because the exit can only be taken
594	// on the first iteration.
595	auto *LoopPreheader = L->getLoopPreheader();
596	assert(LoopPreheader && "Invalid loop");
597	int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
598	if (PreheaderIdx != -`1`) {
599	assert(ExitVal->getParent() == L->getHeader() &&
600	"ExitVal must be in loop header");
601	MadeAnyChanges = true;
602	PN.setIncomingValue(i: IncomingValIdx,
603	V: ExitVal->getIncomingValue(i: PreheaderIdx));
604	SE->forgetValue(V: &PN);
605	}
606	}
607	}
608	}
609	return MadeAnyChanges;
610	}
611
612	//===----------------------------------------------------------------------===//
613	// IV Widening - Extend the width of an IV to cover its widest uses.
614	//===----------------------------------------------------------------------===//
615
616	/// Update information about the induction variable that is extended by this
617	/// sign or zero extend operation. This is used to determine the final width of
618	/// the IV before actually widening it.
619	static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
620	ScalarEvolution *SE,
621	const TargetTransformInfo *TTI) {
622	bool IsSigned = Cast->getOpcode() == Instruction::SExt;
623	if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
624	return;
625
626	Type *Ty = Cast->getType();
627	uint64_t Width = SE->getTypeSizeInBits(Ty);
628	if (!Cast->getDataLayout().isLegalInteger(Width))
629	return;
630
631	// Check that `Cast` actually extends the induction variable (we rely on this
632	// later). This takes care of cases where `Cast` is extending a truncation of
633	// the narrow induction variable, and thus can end up being narrower than the
634	// "narrow" induction variable.
635	uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
636	if (NarrowIVWidth >= Width)
637	return;
638
639	// Cast is either an sext or zext up to this point.
640	// We should not widen an indvar if arithmetics on the wider indvar are more
641	// expensive than those on the narrower indvar. We check only the cost of ADD
642	// because at least an ADD is required to increment the induction variable. We
643	// could compute more comprehensively the cost of all instructions on the
644	// induction variable when necessary.
645	if (TTI &&
646	TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
647	TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
648	Ty: Cast->getOperand(i_nocapture: `0`)->getType())) {
649	return;
650	}
651
652	if (!WI.WidestNativeType \|\|
653	Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
654	WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
655	WI.IsSigned = IsSigned;
656	return;
657	}
658
659	// We extend the IV to satisfy the sign of its user(s), or 'signed'
660	// if there are multiple users with both sign- and zero extensions,
661	// in order not to introduce nondeterministic behaviour based on the
662	// unspecified order of a PHI nodes' users-iterator.
663	WI.IsSigned \|= IsSigned;
664	}
665
666	//===----------------------------------------------------------------------===//
667	// Live IV Reduction - Minimize IVs live across the loop.
668	//===----------------------------------------------------------------------===//
669
670	//===----------------------------------------------------------------------===//
671	// Simplification of IV users based on SCEV evaluation.
672	//===----------------------------------------------------------------------===//
673
674	namespace {
675
676	class IndVarSimplifyVisitor : public IVVisitor {
677	ScalarEvolution *SE;
678	const TargetTransformInfo *TTI;
679	PHINode *IVPhi;
680
681	public:
682	WideIVInfo WI;
683
684	IndVarSimplifyVisitor(PHINode IV, ScalarEvolution SCEV,
685	const TargetTransformInfo *TTI,
686	const DominatorTree *DTree)
687	: SE(SCEV), TTI(TTI), IVPhi(IV) {
688	DT = DTree;
689	WI.NarrowIV = IVPhi;
690	}
691
692	// Implement the interface used by simplifyUsersOfIV.
693	void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
694	};
695
696	} // end anonymous namespace
697
698	/// Iteratively perform simplification on a worklist of IV users. Each
699	/// successive simplification may push more users which may themselves be
700	/// candidates for simplification.
701	///
702	/// Sign/Zero extend elimination is interleaved with IV simplification.
703	bool IndVarSimplify::simplifyAndExtend(Loop *L,
704	SCEVExpander &Rewriter,
705	LoopInfo *LI) {
706	SmallVector<WideIVInfo, `8`> WideIVs;
707
708	auto *GuardDecl = Intrinsic::getDeclarationIfExists(
709	M: L->getBlocks()[`0`]->getModule(), id: Intrinsic::experimental_guard);
710	bool HasGuards = GuardDecl && !GuardDecl->use_empty();
711
712	SmallVector<PHINode *, `8`> LoopPhis(
713	llvm::make_pointer_range(Range: L->getHeader()->phis()));
714
715	// Each round of simplification iterates through the SimplifyIVUsers worklist
716	// for all current phis, then determines whether any IVs can be
717	// widened. Widening adds new phis to LoopPhis, inducing another round of
718	// simplification on the wide IVs.
719	bool Changed = false;
720	while (!LoopPhis.empty()) {
721	// Evaluate as many IV expressions as possible before widening any IVs. This
722	// forces SCEV to set no-wrap flags before evaluating sign/zero
723	// extension. The first time SCEV attempts to normalize sign/zero extension,
724	// the result becomes final. So for the most predictable results, we delay
725	// evaluation of sign/zero extend evaluation until needed, and avoid running
726	// other SCEV based analysis prior to simplifyAndExtend.
727	do {
728	PHINode *CurrIV = LoopPhis.pop_back_val();
729
730	// Information about sign/zero extensions of CurrIV.
731	IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
732
733	const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts,
734	Rewriter, V: &Visitor);
735
736	Changed \|= C;
737	RunUnswitching \|= U;
738	if (Visitor.WI.WidestNativeType) {
739	WideIVs.push_back(Elt: Visitor.WI);
740	}
741	} while(!LoopPhis.empty());
742
743	// Continue if we disallowed widening.
744	if (!WidenIndVars)
745	continue;
746
747	for (; !WideIVs.empty(); WideIVs.pop_back()) {
748	unsigned ElimExt;
749	unsigned Widened;
750	if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
751	DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
752	HasGuards, UsePostIncrementRanges)) {
753	NumElimExt += ElimExt;
754	NumWidened += Widened;
755	Changed = true;
756	LoopPhis.push_back(Elt: WidePhi);
757	}
758	}
759	}
760	return Changed;
761	}
762
763	//===----------------------------------------------------------------------===//
764	// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
765	//===----------------------------------------------------------------------===//
766
767	/// Given an Value which is hoped to be part of an add recurance in the given
768	/// loop, return the associated Phi node if so. Otherwise, return null. Note
769	/// that this is less general than SCEVs AddRec checking.
770	static PHINode getLoopPhiForCounter(Value IncV, Loop *L) {
771	Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
772	if (!IncI)
773	return nullptr;
774
775	switch (IncI->getOpcode()) {
776	case Instruction::Add:
777	case Instruction::Sub:
778	break;
779	case Instruction::GetElementPtr:
780	// An IV counter must preserve its type.
781	if (IncI->getNumOperands() == `2`)
782	break;
783	[[fallthrough]];
784	default:
785	return nullptr;
786	}
787
788	PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `0`));
789	if (Phi && Phi->getParent() == L->getHeader()) {
790	if (L->isLoopInvariant(V: IncI->getOperand(i: `1`)))
791	return Phi;
792	return nullptr;
793	}
794	if (IncI->getOpcode() == Instruction::GetElementPtr)
795	return nullptr;
796
797	// Allow add/sub to be commuted.
798	Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: `1`));
799	if (Phi && Phi->getParent() == L->getHeader()) {
800	if (L->isLoopInvariant(V: IncI->getOperand(i: `0`)))
801	return Phi;
802	}
803	return nullptr;
804	}
805
806	/// Whether the current loop exit test is based on this value. Currently this
807	/// is limited to a direct use in the loop condition.
808	static bool isLoopExitTestBasedOn(Value V, BasicBlock ExitingBB) {
809	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
810	ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
811	// TODO: Allow non-icmp loop test.
812	if (!ICmp)
813	return false;
814
815	// TODO: Allow indirect use.
816	return ICmp->getOperand(i_nocapture: `0`) == V \|\| ICmp->getOperand(i_nocapture: `1`) == V;
817	}
818
819	/// linearFunctionTestReplace policy. Return true unless we can show that the
820	/// current exit test is already sufficiently canonical.
821	static bool needsLFTR(Loop L, BasicBlock ExitingBB) {
822	assert(L->getLoopLatch() && "Must be in simplified form");
823
824	// Avoid converting a constant or loop invariant test back to a runtime
825	// test. This is critical for when SCEV's cached ExitCount is less precise
826	// than the current IR (such as after we've proven a particular exit is
827	// actually dead and thus the BE count never reaches our ExitCount.)
828	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
829	if (L->isLoopInvariant(V: BI->getCondition()))
830	return false;
831
832	// Do LFTR to simplify the exit condition to an ICMP.
833	ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
834	if (!Cond)
835	return true;
836
837	// Do LFTR to simplify the exit ICMP to EQ/NE
838	ICmpInst::Predicate Pred = Cond->getPredicate();
839	if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
840	return true;
841
842	// Look for a loop invariant RHS
843	Value *LHS = Cond->getOperand(i_nocapture: `0`);
844	Value *RHS = Cond->getOperand(i_nocapture: `1`);
845	if (!L->isLoopInvariant(V: RHS)) {
846	if (!L->isLoopInvariant(V: LHS))
847	return true;
848	std::swap(a&: LHS, b&: RHS);
849	}
850	// Look for a simple IV counter LHS
851	PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
852	if (!Phi)
853	Phi = getLoopPhiForCounter(IncV: LHS, L);
854
855	if (!Phi)
856	return true;
857
858	// Do LFTR if PHI node is defined in the loop, but is not* a counter.*
859	int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
860	if (Idx < `0`)
861	return true;
862
863	// Do LFTR if the exit condition's IV is not* a simple counter.*
864	Value *IncV = Phi->getIncomingValue(i: Idx);
865	return Phi != getLoopPhiForCounter(IncV, L);
866	}
867
868	/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
869	/// down to checking that all operands are constant and listing instructions
870	/// that may hide undef.
871	static bool hasConcreteDefImpl(Value V, SmallPtrSetImpl<Value> &Visited,
872	unsigned Depth) {
873	if (isa<Constant>(Val: V))
874	return !isa<UndefValue>(Val: V);
875
876	if (Depth >= `6`)
877	return false;
878
879	// Conservatively handle non-constant non-instructions. For example, Arguments
880	// may be undef.
881	Instruction *I = dyn_cast<Instruction>(Val: V);
882	if (!I)
883	return false;
884
885	// Load and return values may be undef.
886	if(I->mayReadFromMemory() \|\| isa<CallInst>(Val: I) \|\| isa<InvokeInst>(Val: I))
887	return false;
888
889	// Optimistically handle other instructions.
890	for (Value *Op : I->operands()) {
891	if (!Visited.insert(Ptr: Op).second)
892	continue;
893	if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+`1`))
894	return false;
895	}
896	return true;
897	}
898
899	/// Return true if the given value is concrete. We must prove that undef can
900	/// never reach it.
901	///
902	/// TODO: If we decide that this is a good approach to checking for undef, we
903	/// may factor it into a common location.
904	static bool hasConcreteDef(Value *V) {
905	SmallPtrSet<Value*, `8`> Visited;
906	Visited.insert(Ptr: V);
907	return hasConcreteDefImpl(V, Visited, Depth: `0`);
908	}
909
910	/// Return true if the given phi is a "counter" in L. A counter is an
911	/// add recurance (of integer or pointer type) with an arbitrary start, and a
912	/// step of 1. Note that L must have exactly one latch.
913	static bool isLoopCounter(PHINode* Phi, Loop *L,
914	ScalarEvolution *SE) {
915	assert(Phi->getParent() == L->getHeader());
916	assert(L->getLoopLatch());
917
918	if (!SE->isSCEVable(Ty: Phi->getType()))
919	return false;
920
921	const SCEV *S = SE->getSCEV(V: Phi);
922	if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L))))
923	return false;
924
925	int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
926	Value *IncV = Phi->getIncomingValue(i: LatchIdx);
927	return (getLoopPhiForCounter(IncV, L) == Phi &&
928	isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
929	}
930
931	/// Search the loop header for a loop counter (anadd rec w/step of one)
932	/// suitable for use by LFTR. If multiple counters are available, select the
933	/// "best" one based profitable heuristics.
934	///
935	/// BECount may be an i8 pointer type. The pointer difference is already*
936	/// valid count without scaling the address stride, so it remains a pointer
937	/// expression as far as SCEV is concerned.
938	static PHINode FindLoopCounter(Loop L, BasicBlock *ExitingBB,
939	const SCEV *BECount,
940	ScalarEvolution SE, DominatorTree DT) {
941	uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
942
943	Value *Cond = cast<CondBrInst>(Val: ExitingBB->getTerminator())->getCondition();
944
945	// Loop over all of the PHI nodes, looking for a simple counter.
946	PHINode BestPhi = nullptr*;
947	const SCEV BestInit = nullptr*;
948	BasicBlock *LatchBlock = L->getLoopLatch();
949	assert(LatchBlock && "Must be in simplified form");
950	const DataLayout &DL = L->getHeader()->getDataLayout();
951
952	for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
953	PHINode *Phi = cast<PHINode>(Val&: I);
954	if (!isLoopCounter(Phi, L, SE))
955	continue;
956
957	const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
958
959	// AR may be a pointer type, while BECount is an integer type.
960	// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
961	// AR may not be a narrower type, or we may never exit.
962	uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
963	if (PhiWidth < BCWidth \|\| !DL.isLegalInteger(Width: PhiWidth))
964	continue;
965
966	// Avoid reusing a potentially undef value to compute other values that may
967	// have originally had a concrete definition.
968	if (!hasConcreteDef(V: Phi)) {
969	// We explicitly allow unknown phis as long as they are already used by
970	// the loop exit test. This is legal since performing LFTR could not
971	// increase the number of undef users.
972	Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
973	if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
974	!isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
975	continue;
976	}
977
978	// Avoid introducing undefined behavior due to poison which didn't exist in
979	// the original program. (Annoyingly, the rules for poison and undef
980	// propagation are distinct, so this does NOT cover the undef case above.)
981	// We have to ensure that we don't introduce UB by introducing a use on an
982	// iteration where said IV produces poison. Our strategy here differs for
983	// pointers and integer IVs. For integers, we strip and reinfer as needed,
984	// see code in linearFunctionTestReplace. For pointers, we restrict
985	// transforms as there is no good way to reinfer inbounds once lost.
986	if (!Phi->getType()->isIntegerTy() &&
987	!mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
988	continue;
989
990	const SCEV *Init = AR->getStart();
991
992	if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
993	// Don't force a live loop counter if another IV can be used.
994	if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
995	continue;
996
997	// Prefer to count-from-zero. This is a more "canonical" counter form. It
998	// also prefers integer to pointer IVs.
999	if (BestInit->isZero() != Init->isZero()) {
1000	if (BestInit->isZero())
1001	continue;
1002	}
1003	// If two IVs both count from zero or both count from nonzero then the
1004	// narrower is likely a dead phi that has been widened. Use the wider phi
1005	// to allow the other to be eliminated.
1006	else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
1007	continue;
1008	}
1009	BestPhi = Phi;
1010	BestInit = Init;
1011	}
1012	return BestPhi;
1013	}
1014
1015	/// Insert an IR expression which computes the value held by the IV IndVar
1016	/// (which must be an loop counter w/unit stride) after the backedge of loop L
1017	/// is taken ExitCount times.
1018	static Value genLoopLimit(PHINode IndVar, BasicBlock *ExitingBB,
1019	const SCEV ExitCount, bool* UsePostInc, Loop *L,
1020	SCEVExpander &Rewriter, ScalarEvolution *SE) {
1021	assert(isLoopCounter(IndVar, L, SE));
1022	assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
1023	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
1024	assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1025
1026	// For integer IVs, truncate the IV before computing the limit unless we
1027	// know apriori that the limit must be a constant when evaluated in the
1028	// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
1029	// IV in the loop over a (potentially) expensive expansion of the widened
1030	// exit count add(zext(add)) expression.
1031	if (IndVar->getType()->isIntegerTy() &&
1032	SE->getTypeSizeInBits(Ty: AR->getType()) >
1033	SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
1034	const SCEV *IVInit = AR->getStart();
1035	if (!isa<SCEVConstant>(Val: IVInit) \|\| !isa<SCEVConstant>(Val: ExitCount)) {
1036	const SCEV *TruncExpr = SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType());
1037
1038	// The following bailout is necessary due to the interaction with
1039	// the depth limit in SCEV analysis.
1040	if (!isa<SCEVAddRecExpr>(Val: TruncExpr))
1041	return nullptr;
1042	AR = cast<SCEVAddRecExpr>(Val: TruncExpr);
1043	}
1044	}
1045
1046	const SCEVAddRecExpr ARBase = UsePostInc ? AR->getPostIncExpr(SE&: SE) : AR;
1047	const SCEV IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: SE);
1048	assert(SE->isLoopInvariant(IVLimit, L) &&
1049	"Computed iteration count is not loop invariant!");
1050	return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
1051	I: ExitingBB->getTerminator());
1052	}
1053
1054	/// This method rewrites the exit condition of the loop to be a canonical !=
1055	/// comparison against the incremented loop induction variable. This pass is
1056	/// able to rewrite the exit tests of any loop where the SCEV analysis can
1057	/// determine a loop-invariant trip count of the loop, which is actually a much
1058	/// broader range than just linear tests.
1059	bool IndVarSimplify::
1060	linearFunctionTestReplace(Loop L, BasicBlock ExitingBB,
1061	const SCEV *ExitCount,
1062	PHINode *IndVar, SCEVExpander &Rewriter) {
1063	assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1064	assert(isLoopCounter(IndVar, L, SE));
1065	Instruction * const IncVar =
1066	cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
1067
1068	// Initialize CmpIndVar to the preincremented IV.
1069	Value *CmpIndVar = IndVar;
1070	bool UsePostInc = false;
1071
1072	// If the exiting block is the same as the backedge block, we prefer to
1073	// compare against the post-incremented value, otherwise we must compare
1074	// against the preincremented value.
1075	if (ExitingBB == L->getLoopLatch()) {
1076	// For pointer IVs, we chose to not strip inbounds which requires us not
1077	// to add a potentially UB introducing use. We need to either a) show
1078	// the loop test we're modifying is already in post-inc form, or b) show
1079	// that adding a use must not introduce UB.
1080	bool SafeToPostInc =
1081	IndVar->getType()->isIntegerTy() \|\|
1082	isLoopExitTestBasedOn(V: IncVar, ExitingBB) \|\|
1083	mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
1084	if (SafeToPostInc) {
1085	UsePostInc = true;
1086	CmpIndVar = IncVar;
1087	}
1088	}
1089
1090	Value *ExitCnt =
1091	genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1092	if (!ExitCnt)
1093	return false;
1094
1095	assert(ExitCnt->getType()->isPointerTy() ==
1096	IndVar->getType()->isPointerTy() &&
1097	"genLoopLimit missed a cast");
1098
1099	// It may be necessary to drop nowrap flags on the incrementing instruction
1100	// if either LFTR moves from a pre-inc check to a post-inc check (in which
1101	// case the increment might have previously been poison on the last iteration
1102	// only) or if LFTR switches to a different IV that was previously dynamically
1103	// dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104	// that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105	// check), because the pre-inc addrec flags may be adopted from the original
1106	// instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107	// TODO: This handling is inaccurate for one case: If we switch to a
1108	// dynamically dead IV that wraps on the first loop iteration only, which is
1109	// not covered by the post-inc addrec. (If the new IV was not dynamically
1110	// dead, it could not be poison on the first iteration in the first place.)
1111	if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
1112	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
1113	if (BO->hasNoUnsignedWrap())
1114	BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115	if (BO->hasNoSignedWrap())
1116	BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117	}
1118
1119	// Insert a new icmp_ne or icmp_eq instruction before the branch.
1120	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1121	ICmpInst::Predicate P;
1122	if (L->contains(BB: BI->getSuccessor(i: `0`)))
1123	P = ICmpInst::ICMP_NE;
1124	else
1125	P = ICmpInst::ICMP_EQ;
1126
1127	IRBuilder<> Builder(BI);
1128
1129	// The new loop exit condition should reuse the debug location of the
1130	// original loop exit condition.
1131	if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1132	Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1133
1134	// For integer IVs, if we evaluated the limit in the narrower bitwidth to
1135	// avoid the expensive expansion of the limit expression in the wider type,
1136	// emit a truncate to narrow the IV to the ExitCount type. This is safe
1137	// since we know (from the exit count bitwidth), that we can't self-wrap in
1138	// the narrower type.
1139	unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1140	unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1141	if (CmpIndVarSize > ExitCntSize) {
1142	assert(!CmpIndVar->getType()->isPointerTy() &&
1143	!ExitCnt->getType()->isPointerTy());
1144
1145	// Before resorting to actually inserting the truncate, use the same
1146	// reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1147	// the other side of the comparison instead. We still evaluate the limit
1148	// in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1149	// a truncate within in.
1150	bool Extended = false;
1151	const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1152	const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1153	const SCEV *ZExtTrunc =
1154	SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1155
1156	if (ZExtTrunc == IV) {
1157	Extended = true;
1158	ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1159	Name: "wide.trip.count");
1160	} else {
1161	const SCEV *SExtTrunc =
1162	SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1163	if (SExtTrunc == IV) {
1164	Extended = true;
1165	ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1166	Name: "wide.trip.count");
1167	}
1168	}
1169
1170	if (Extended) {
1171	bool Discard;
1172	L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1173	} else
1174	CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1175	Name: "lftr.wideiv");
1176	}
1177	LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178	<< " LHS:" << *CmpIndVar << `'\n'`
1179	<< " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180	<< "\n"
1181	<< " RHS:\t" << *ExitCnt << "\n"
1182	<< "ExitCount:\t" << *ExitCount << "\n"
1183	<< " was: " << *BI->getCondition() << "\n");
1184
1185	Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1186	Value *OrigCond = BI->getCondition();
1187	// It's tempting to use replaceAllUsesWith here to fully replace the old
1188	// comparison, but that's not immediately safe, since users of the old
1189	// comparison may not be dominated by the new comparison. Instead, just
1190	// update the branch to use the new comparison; in the common case this
1191	// will make old comparison dead.
1192	BI->setCondition(Cond);
1193	DeadInsts.emplace_back(Args&: OrigCond);
1194
1195	++NumLFTR;
1196	return true;
1197	}
1198
1199	//===----------------------------------------------------------------------===//
1200	// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201	//===----------------------------------------------------------------------===//
1202
1203	/// If there's a single exit block, sink any loop-invariant values that
1204	/// were defined in the preheader but not used inside the loop into the
1205	/// exit block to reduce register pressure in the loop.
1206	bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207	BasicBlock *ExitBlock = L->getExitBlock();
1208	if (!ExitBlock) return false;
1209
1210	BasicBlock *Preheader = L->getLoopPreheader();
1211	if (!Preheader) return false;
1212
1213	bool MadeAnyChanges = false;
1214	for (Instruction &I : llvm::make_early_inc_range(Range: llvm::reverse(C&: *Preheader))) {
1215
1216	// Skip BB Terminator.
1217	if (Preheader->getTerminator() == &I)
1218	continue;
1219
1220	// New instructions were inserted at the end of the preheader.
1221	if (isa<PHINode>(Val: I))
1222	break;
1223
1224	// Don't move instructions which might have side effects, since the side
1225	// effects need to complete before instructions inside the loop. Also don't
1226	// move instructions which might read memory, since the loop may modify
1227	// memory. Note that it's okay if the instruction might have undefined
1228	// behavior: LoopSimplify guarantees that the preheader dominates the exit
1229	// block.
1230	if (I.mayHaveSideEffects() \|\| I.mayReadFromMemory())
1231	continue;
1232
1233	// Skip debug or pseudo instructions.
1234	if (I.isDebugOrPseudoInst())
1235	continue;
1236
1237	// Skip eh pad instructions.
1238	if (I.isEHPad())
1239	continue;
1240
1241	// Don't sink alloca: we never want to sink static alloca's out of the
1242	// entry block, and correctly sinking dynamic alloca's requires
1243	// checks for stacksave/stackrestore intrinsics.
1244	// FIXME: Refactor this check somehow?
1245	if (isa<AllocaInst>(Val: &I))
1246	continue;
1247
1248	// Determine if there is a use in or before the loop (direct or
1249	// otherwise).
1250	bool UsedInLoop = false;
1251	for (Use &U : I.uses()) {
1252	Instruction *User = cast<Instruction>(Val: U.getUser());
1253	BasicBlock *UseBB = User->getParent();
1254	if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1255	unsigned i =
1256	PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1257	UseBB = P->getIncomingBlock(i);
1258	}
1259	if (UseBB == Preheader \|\| L->contains(BB: UseBB)) {
1260	UsedInLoop = true;
1261	break;
1262	}
1263	}
1264
1265	// If there is, the def must remain in the preheader.
1266	if (UsedInLoop)
1267	continue;
1268
1269	// Otherwise, sink it to the exit block.
1270	I.moveBefore(InsertPos: ExitBlock->getFirstInsertionPt());
1271	SE->forgetValue(V: &I);
1272	MadeAnyChanges = true;
1273	}
1274
1275	return MadeAnyChanges;
1276	}
1277
1278	static void replaceExitCond(CondBrInst BI, Value NewCond,
1279	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1280	auto *OldCond = BI->getCondition();
1281	LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1282	<< " with " << *NewCond << "\n");
1283	BI->setCondition(NewCond);
1284	if (OldCond->use_empty())
1285	DeadInsts.emplace_back(Args&: OldCond);
1286	}
1287
1288	static Constant createFoldedExitCond(const* Loop L, BasicBlock ExitingBB,
1289	bool IsTaken) {
1290	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1291	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1292	auto *OldCond = BI->getCondition();
1293	return ConstantInt::get(Ty: OldCond->getType(),
1294	V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1295	}
1296
1297	static void foldExit(const Loop L, BasicBlock ExitingBB, bool IsTaken,
1298	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1299	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1300	auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1301	replaceExitCond(BI, NewCond, DeadInsts);
1302	}
1303
1304	static void replaceLoopPHINodesWithPreheaderValues(
1305	LoopInfo LI, Loop L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1306	ScalarEvolution &SE) {
1307	assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1308	auto *LoopPreheader = L->getLoopPreheader();
1309	auto *LoopHeader = L->getHeader();
1310	SmallVector<Instruction *> Worklist;
1311	for (auto &PN : LoopHeader->phis()) {
1312	auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1313	for (User *U : PN.users())
1314	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1315	SE.forgetValue(V: &PN);
1316	PN.replaceAllUsesWith(V: PreheaderIncoming);
1317	DeadInsts.emplace_back(Args: &PN);
1318	}
1319
1320	// Replacing with the preheader value will often allow IV users to simplify
1321	// (especially if the preheader value is a constant).
1322	SmallPtrSet<Instruction *, `16`> Visited;
1323	while (!Worklist.empty()) {
1324	auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1325	if (!Visited.insert(Ptr: I).second)
1326	continue;
1327
1328	// Don't simplify instructions outside the loop.
1329	if (!L->contains(Inst: I))
1330	continue;
1331
1332	Value *Res = simplifyInstruction(I, Q: I->getDataLayout());
1333	if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1334	for (User *U : I->users())
1335	Worklist.push_back(Elt: cast<Instruction>(Val: U));
1336	I->replaceAllUsesWith(V: Res);
1337	DeadInsts.emplace_back(Args&: I);
1338	}
1339	}
1340	}
1341
1342	static Value *
1343	createInvariantCond(const Loop L, BasicBlock ExitingBB,
1344	const ScalarEvolution::LoopInvariantPredicate &LIP,
1345	SCEVExpander &Rewriter) {
1346	ICmpInst::Predicate InvariantPred = LIP.Pred;
1347	BasicBlock *Preheader = L->getLoopPreheader();
1348	assert(Preheader && "Preheader doesn't exist");
1349	Rewriter.setInsertPoint(Preheader->getTerminator());
1350	auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1351	auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1352	bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1353	if (ExitIfTrue)
1354	InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1355	IRBuilder<> Builder(Preheader->getTerminator());
1356	CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1357	return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1358	Name: BI->getCondition()->getName());
1359	}
1360
1361	static std::optional<Value *>
1362	createReplacement(ICmpInst ICmp, const* Loop L, BasicBlock ExitingBB,
1363	const SCEV MaxIter, bool* Inverted, bool SkipLastIter,
1364	ScalarEvolution *SE, SCEVExpander &Rewriter) {
1365	CmpPredicate Pred = ICmp->getCmpPredicate();
1366	Value *LHS = ICmp->getOperand(i_nocapture: `0`);
1367	Value *RHS = ICmp->getOperand(i_nocapture: `1`);
1368
1369	// 'LHS pred RHS' should now mean that we stay in loop.
1370	auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1371	if (Inverted)
1372	Pred = ICmpInst::getInverseCmpPredicate(Pred);
1373
1374	const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1375	const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1376	// Can we prove it to be trivially true or false?
1377	if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1378	return createFoldedExitCond(L, ExitingBB, /IsTaken/ !*EV);
1379
1380	auto *ARTy = LHSS->getType();
1381	auto *MaxIterTy = MaxIter->getType();
1382	// If possible, adjust types.
1383	if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1384	MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1385	else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1386	const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1387	const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1388	if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1389	MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1390	}
1391
1392	if (SkipLastIter) {
1393	// Semantically skip last iter is "subtract 1, do not bother about unsigned
1394	// wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1395	// with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1396	// So we manually construct umin(a - 1, b - 1).
1397	SmallVector<SCEVUse, `4`> Elements;
1398	if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1399	for (SCEVUse Op : UMin->operands())
1400	Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op ->getType())));
1401	MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1402	} else
1403	MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1404	}
1405
1406	// Check if there is a loop-invariant predicate equivalent to our check.
1407	auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1408	L, CtxI: BI, MaxIter);
1409	if (!LIP)
1410	return std::nullopt;
1411
1412	// Can we prove it to be trivially true?
1413	if (SE->isKnownPredicateAt(Pred: LIP ->Pred, LHS: LIP ->LHS, RHS: LIP ->RHS, CtxI: BI))
1414	return createFoldedExitCond(L, ExitingBB, /IsTaken/ false);
1415	else
1416	return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1417	}
1418
1419	static bool optimizeLoopExitWithUnknownExitCount(
1420	const Loop L, CondBrInst BI, BasicBlock ExitingBB, const* SCEV *MaxIter,
1421	bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1422	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1423	assert(
1424	(L->contains(BI->getSuccessor(`0`)) != L->contains(BI->getSuccessor(`1`))) &&
1425	"Not a loop exit!");
1426
1427	// For branch that stays in loop by TRUE condition, go through AND. For branch
1428	// that stays in loop by FALSE condition, go through OR. Both gives the
1429	// similar logic: "stay in loop iff all conditions are true(false)".
1430	bool Inverted = L->contains(BB: BI->getSuccessor(i: `1`));
1431	SmallVector<ICmpInst *, `4`> LeafConditions;
1432	SmallVector<Value *, `4`> Worklist;
1433	SmallPtrSet<Value *, `4`> Visited;
1434	Value *OldCond = BI->getCondition();
1435	Visited.insert(Ptr: OldCond);
1436	Worklist.push_back(Elt: OldCond);
1437
1438	auto GoThrough = [&](Value *V) {
1439	Value LHS = nullptr, RHS = nullptr;
1440	if (Inverted) {
1441	if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1442	return false;
1443	} else {
1444	if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1445	return false;
1446	}
1447	if (Visited.insert(Ptr: LHS).second)
1448	Worklist.push_back(Elt: LHS);
1449	if (Visited.insert(Ptr: RHS).second)
1450	Worklist.push_back(Elt: RHS);
1451	return true;
1452	};
1453
1454	do {
1455	Value *Curr = Worklist.pop_back_val();
1456	// Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1457	// those with one use, to avoid instruction duplication.
1458	if (Curr->hasOneUse())
1459	if (!GoThrough (Curr))
1460	if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1461	LeafConditions.push_back(Elt: ICmp);
1462	} while (!Worklist.empty());
1463
1464	// If the current basic block has the same exit count as the whole loop, and
1465	// it consists of multiple icmp's, try to collect all icmp's that give exact
1466	// same exit count. For all other icmp's, we could use one less iteration,
1467	// because their value on the last iteration doesn't really matter.
1468	SmallPtrSet<ICmpInst *, `4`> ICmpsFailingOnLastIter;
1469	if (!SkipLastIter && LeafConditions.size() > `1` &&
1470	SE->getExitCount(L, ExitingBlock: ExitingBB,
1471	Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1472	MaxIter)
1473	for (auto *ICmp : LeafConditions) {
1474	auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1475	/ControlsExit/ ControlsOnlyExit: false);
1476	const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1477	if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1478	continue;
1479	// They could be of different types (specifically this happens after
1480	// IV widening).
1481	auto *WiderType =
1482	SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1483	const SCEV *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1484	const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1485	if (WideExitMax == WideMaxIter)
1486	ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1487	}
1488
1489	bool Changed = false;
1490	for (auto *OldCond : LeafConditions) {
1491	// Skip last iteration for this icmp under one of two conditions:
1492	// - We do it for all conditions;
1493	// - There is another ICmp that would fail on last iter, so this one doesn't
1494	// really matter.
1495	bool OptimisticSkipLastIter = SkipLastIter;
1496	if (!OptimisticSkipLastIter) {
1497	if (ICmpsFailingOnLastIter.size() > `1`)
1498	OptimisticSkipLastIter = true;
1499	else if (ICmpsFailingOnLastIter.size() == `1`)
1500	OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1501	}
1502	if (auto Replaced =
1503	createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1504	SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1505	Changed = true;
1506	auto NewCond = Replaced;
1507	if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1508	NCI->setName(OldCond->getName() + ".first_iter");
1509	}
1510	LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1511	<< " with " << *NewCond << "\n");
1512	assert(OldCond->hasOneUse() && "Must be!");
1513	OldCond->replaceAllUsesWith(V: NewCond);
1514	DeadInsts.push_back(Elt: OldCond);
1515	// Make sure we no longer consider this condition as failing on last
1516	// iteration.
1517	ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1518	}
1519	}
1520	return Changed;
1521	}
1522
1523	bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1524	// Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1525	// We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1526	// never reaches the icmp since the zext doesn't fold to an AddRec unless
1527	// it already has flags. The alternative to this would be to extending the
1528	// set of "interesting" IV users to include the icmp, but doing that
1529	// regresses results in practice by querying SCEVs before trip counts which
1530	// rely on them which results in SCEV caching sub-optimal answers. The
1531	// concern about caching sub-optimal results is why we only query SCEVs of
1532	// the loop invariant RHS here.
1533	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1534	L->getExitingBlocks(ExitingBlocks);
1535	bool Changed = false;
1536	for (auto *ExitingBB : ExitingBlocks) {
1537	auto *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1538	if (!BI)
1539	continue;
1540
1541	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1542	if (!ICmp \|\| !ICmp->hasOneUse())
1543	continue;
1544
1545	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1546	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1547	// For the range reasoning, avoid computing SCEVs in the loop to avoid
1548	// poisoning cache with sub-optimal results. For the must-execute case,
1549	// this is a neccessary precondition for correctness.
1550	if (!L->isLoopInvariant(V: RHS)) {
1551	if (!L->isLoopInvariant(V: LHS))
1552	continue;
1553	// Same logic applies for the inverse case
1554	std::swap(a&: LHS, b&: RHS);
1555	}
1556
1557	// Match (icmp signed-cond zext, RHS)
1558	Value LHSOp = nullptr*;
1559	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) \|\| !ICmp->isSigned())
1560	continue;
1561
1562	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1563	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1564	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1565	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1566	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1567	if (FullCR.contains(CR: RHSCR)) {
1568	// We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1569	// replace the signed condition with the unsigned version.
1570	ICmp->setPredicate(ICmp->getUnsignedPredicate());
1571	Changed = true;
1572	// Note: No SCEV invalidation needed. We've changed the predicate, but
1573	// have not changed exit counts, or the values produced by the compare.
1574	continue;
1575	}
1576	}
1577
1578	// Now that we've canonicalized the condition to match the extend,
1579	// see if we can rotate the extend out of the loop.
1580	for (auto *ExitingBB : ExitingBlocks) {
1581	auto *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1582	if (!BI)
1583	continue;
1584
1585	auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1586	if (!ICmp \|\| !ICmp->hasOneUse() \|\| !ICmp->isUnsigned())
1587	continue;
1588
1589	bool Swapped = false;
1590	auto *LHS = ICmp->getOperand(i_nocapture: `0`);
1591	auto *RHS = ICmp->getOperand(i_nocapture: `1`);
1592	if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1593	// Nothing to rotate
1594	continue;
1595	if (L->isLoopInvariant(V: LHS)) {
1596	// Same logic applies for the inverse case until we actually pick
1597	// which operand of the compare to update.
1598	Swapped = true;
1599	std::swap(a&: LHS, b&: RHS);
1600	}
1601	assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1602
1603	// Match (icmp unsigned-cond zext, RHS)
1604	// TODO: Extend to handle corresponding sext/signed-cmp case
1605	// TODO: Extend to other invertible functions
1606	Value LHSOp = nullptr*;
1607	if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1608	continue;
1609
1610	// In general, we only rotate if we can do so without increasing the number
1611	// of instructions. The exception is when we have an zext(add-rec). The
1612	// reason for allowing this exception is that we know we need to get rid
1613	// of the zext for SCEV to be able to compute a trip count for said loops;
1614	// we consider the new trip count valuable enough to increase instruction
1615	// count by one.
1616	if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1617	continue;
1618
1619	// Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1620	// replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1621	// when zext is loop varying and RHS is loop invariant. This converts
1622	// loop varying work to loop-invariant work.
1623	auto doRotateTransform = [&]() {
1624	assert(ICmp->isUnsigned() && "must have proven unsigned already");
1625	auto *NewRHS = CastInst::Create(
1626	Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1627	InsertBefore: L->getLoopPreheader()->getTerminator()->getIterator());
1628	// NewRHS is an operation that has been hoisted out of the loop, and
1629	// therefore should have a dropped location.
1630	NewRHS->setDebugLoc(DebugLoc::getDropped());
1631	ICmp->setOperand(i_nocapture: Swapped ? `1` : `0`, Val_nocapture: LHSOp);
1632	ICmp->setOperand(i_nocapture: Swapped ? `0` : `1`, Val_nocapture: NewRHS);
1633	// Samesign flag cannot be preserved after narrowing the compare.
1634	ICmp->setSameSign(false);
1635	if (LHS->use_empty())
1636	DeadInsts.push_back(Elt: LHS);
1637	};
1638
1639	const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1640	const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1641	auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1642	FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1643	auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1644	if (FullCR.contains(CR: RHSCR)) {
1645	doRotateTransform ();
1646	Changed = true;
1647	// Note, we are leaving SCEV in an unfortunately imprecise case here
1648	// as rotation tends to reveal information about trip counts not
1649	// previously visible.
1650	continue;
1651	}
1652	}
1653
1654	return Changed;
1655	}
1656
1657	bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1658	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1659	L->getExitingBlocks(ExitingBlocks);
1660
1661	// Remove all exits which aren't both rewriteable and execute on every
1662	// iteration.
1663	llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1664	// If our exitting block exits multiple loops, we can only rewrite the
1665	// innermost one. Otherwise, we're changing how many times the innermost
1666	// loop runs before it exits.
1667	if (LI->getLoopFor(BB: ExitingBB) != L)
1668	return true;
1669
1670	// Can't rewrite non-branch yet.
1671	CondBrInst *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1672	if (!BI)
1673	return true;
1674
1675	// Likewise, the loop latch must be dominated by the exiting BB.
1676	if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1677	return true;
1678
1679	if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1680	// If already constant, nothing to do. However, if this is an
1681	// unconditional exit, we can still replace header phis with their
1682	// preheader value.
1683	if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1684	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1685	return true;
1686	}
1687
1688	return false;
1689	});
1690
1691	if (ExitingBlocks.empty())
1692	return false;
1693
1694	// Get a symbolic upper bound on the loop backedge taken count.
1695	const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1696	if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1697	return false;
1698
1699	// Visit our exit blocks in order of dominance. We know from the fact that
1700	// all exits must dominate the latch, so there is a total dominance order
1701	// between them.
1702	llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock A, BasicBlock B) {
1703	// std::sort sorts in ascending order, so we want the inverse of
1704	// the normal dominance relation.
1705	if (A == B) return false;
1706	if (DT->properlyDominates(A, B))
1707	return true;
1708	else {
1709	assert(DT->properlyDominates(B, A) &&
1710	"expected total dominance order!");
1711	return false;
1712	}
1713	});
1714	#ifdef ASSERT
1715	for (unsigned i = `1`; i < ExitingBlocks.size(); i++) {
1716	assert(DT->dominates(ExitingBlocks[i-`1`], ExitingBlocks[i]));
1717	}
1718	#endif
1719
1720	bool Changed = false;
1721	bool SkipLastIter = false;
1722	const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1723	auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1724	if (SkipLastIter \|\| isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1725	return;
1726	if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1727	CurrMaxExit = MaxExitCount;
1728	else
1729	CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1730	// If the loop has more than 1 iteration, all further checks will be
1731	// executed 1 iteration less.
1732	if (CurrMaxExit == MaxBECount)
1733	SkipLastIter = true;
1734	};
1735	SmallPtrSet<const SCEV *, `8`> DominatingExactExitCounts;
1736	for (BasicBlock *ExitingBB : ExitingBlocks) {
1737	const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1738	const SCEV *MaxExitCount = SE->getExitCount(
1739	L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1740	if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1741	// Okay, we do not know the exit count here. Can we at least prove that it
1742	// will remain the same within iteration space?
1743	auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1744	auto OptimizeCond = [&](bool SkipLastIter) {
1745	return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1746	MaxIter: MaxBECount, SkipLastIter,
1747	SE, Rewriter, DeadInsts);
1748	};
1749
1750	// TODO: We might have proved that we can skip the last iteration for
1751	// this check. In this case, we only want to check the condition on the
1752	// pre-last iteration (MaxBECount - 1). However, there is a nasty
1753	// corner case:
1754	//
1755	// for (i = len; i != 0; i--) { ... check (i ult X) ... }
1756	//
1757	// If we could not prove that len != 0, then we also could not prove that
1758	// (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1759	// OptimizeCond will likely not prove anything for it, even if it could
1760	// prove the same fact for len.
1761	//
1762	// As a temporary solution, we query both last and pre-last iterations in
1763	// hope that we will be able to prove triviality for at least one of
1764	// them. We can stop querying MaxBECount for this case once SCEV
1765	// understands that (MaxBECount - 1) will not overflow here.
1766	if (OptimizeCond (false))
1767	Changed = true;
1768	else if (SkipLastIter && OptimizeCond (true))
1769	Changed = true;
1770	UpdateSkipLastIter (MaxExitCount);
1771	continue;
1772	}
1773
1774	UpdateSkipLastIter (ExactExitCount);
1775
1776	// If we know we'd exit on the first iteration, rewrite the exit to
1777	// reflect this. This does not imply the loop must exit through this
1778	// exit; there may be an earlier one taken on the first iteration.
1779	// We know that the backedge can't be taken, so we replace all
1780	// the header PHIs with values coming from the preheader.
1781	if (ExactExitCount->isZero()) {
1782	foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1783	replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1784	Changed = true;
1785	continue;
1786	}
1787
1788	assert(ExactExitCount->getType()->isIntegerTy() &&
1789	MaxBECount->getType()->isIntegerTy() &&
1790	"Exit counts must be integers");
1791
1792	Type *WiderType =
1793	SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1794	ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1795	MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1796	assert(MaxBECount->getType() == ExactExitCount->getType());
1797
1798	// Can we prove that some other exit must be taken strictly before this
1799	// one?
1800	if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1801	RHS: ExactExitCount)) {
1802	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1803	Changed = true;
1804	continue;
1805	}
1806
1807	// As we run, keep track of which exit counts we've encountered. If we
1808	// find a duplicate, we've found an exit which would have exited on the
1809	// exiting iteration, but (from the visit order) strictly follows another
1810	// which does the same and is thus dead.
1811	if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1812	foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1813	Changed = true;
1814	continue;
1815	}
1816
1817	// TODO: There might be another oppurtunity to leverage SCEV's reasoning
1818	// here. If we kept track of the min of dominanting exits so far, we could
1819	// discharge exits with EC >= MDEC. This is less powerful than the existing
1820	// transform (since later exits aren't considered), but potentially more
1821	// powerful for any case where SCEV can prove a >=u b, but neither a == b
1822	// or a >u b. Such a case is not currently known.
1823	}
1824	return Changed;
1825	}
1826
1827	static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1828	return llvm::all_of(Range: BB, P: [](const Instruction &I) {
1829	// TODO: for now this is overly restrictive, to make sure nothing in this
1830	// BB can depend on the loop body.
1831	// It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1832	// load does not have a side effect, but we could have
1833	// %a = load ptr, ptr %ptr
1834	// %b = load i32, ptr %a
1835	// Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1836	// dereference by skipping over loop iterations.
1837	if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
1838	if (CB->onlyAccessesInaccessibleMemory())
1839	return true;
1840	}
1841	return isa<UnreachableInst>(Val: I);
1842	});
1843	}
1844
1845	bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1846	SmallVector<BasicBlock*, `16`> ExitingBlocks;
1847	L->getExitingBlocks(ExitingBlocks);
1848
1849	// Finally, see if we can rewrite our exit conditions into a loop invariant
1850	// form. If we have a read-only loop, and we can tell that we must exit down
1851	// a path which does not need any of the values computed within the loop, we
1852	// can rewrite the loop to exit on the first iteration. Note that this
1853	// doesn't either a) tell us the loop exits on the first iteration (unless
1854	// all* exits are predicateable) or b) tell us which exit might be taken.*
1855	// This transformation looks a lot like a restricted form of dead loop
1856	// elimination, but restricted to read-only loops and without neccesssarily
1857	// needing to kill the loop entirely.
1858	if (!LoopPredication)
1859	return false;
1860
1861	// Note: ExactBTC is the exact backedge taken count iff* the loop exits*
1862	// through explicit* control flow. We have to eliminate the possibility of*
1863	// implicit exits (see below) before we know it's truly exact.
1864	const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1865	if (isa<SCEVCouldNotCompute>(Val: ExactBTC) \|\| !Rewriter.isSafeToExpand(S: ExactBTC))
1866	return false;
1867
1868	assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1869	assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1870
1871	auto BadExit = [&](BasicBlock *ExitingBB) {
1872	// If our exiting block exits multiple loops, we can only rewrite the
1873	// innermost one. Otherwise, we're changing how many times the innermost
1874	// loop runs before it exits.
1875	if (LI->getLoopFor(BB: ExitingBB) != L)
1876	return true;
1877
1878	// Can't rewrite non-branch yet.
1879	CondBrInst *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1880	if (!BI)
1881	return true;
1882
1883	// If already constant, nothing to do.
1884	if (isa<Constant>(Val: BI->getCondition()))
1885	return true;
1886
1887	// If the exit block has phis, we need to be able to compute the values
1888	// within the loop which contains them. This assumes trivially lcssa phis
1889	// have already been removed; TODO: generalize
1890	BasicBlock *ExitBlock =
1891	BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: `0`)) ? `1` : `0`);
1892	if (!ExitBlock->phis().empty())
1893	return true;
1894
1895	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1896	if (isa<SCEVCouldNotCompute>(Val: ExitCount) \|\|
1897	!Rewriter.isSafeToExpand(S: ExitCount))
1898	return true;
1899
1900	assert(SE->isLoopInvariant(ExitCount, L) &&
1901	"Exit count must be loop invariant");
1902	assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1903	return false;
1904	};
1905
1906	// Make sure all exits dominate the latch. This means there is a linear chain
1907	// of exits. We check this before sorting so we have a total order.
1908	BasicBlock *Latch = L->getLoopLatch();
1909	for (BasicBlock *ExitingBB : ExitingBlocks)
1910	if (!DT->dominates(A: ExitingBB, B: Latch))
1911	return false;
1912
1913	// If we have any exits which can't be predicated themselves, than we can't
1914	// predicate any exit which isn't guaranteed to execute before it. Consider
1915	// two exits (a) and (b) which would both exit on the same iteration. If we
1916	// can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1917	// we could convert a loop from exiting through (a) to one exiting through
1918	// (b). Note that this problem exists only for exits with the same exit
1919	// count, and we could be more aggressive when exit counts are known inequal.
1920	llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock A, BasicBlock B) {
1921	// llvm::sort sorts in ascending order, so we want the inverse of
1922	// the normal dominance relation.
1923	if (A == B)
1924	return false;
1925	if (DT->properlyDominates(A, B))
1926	return true;
1927	if (DT->properlyDominates(A: B, B: A))
1928	return false;
1929	llvm_unreachable("Should have total dominance order");
1930	});
1931
1932	// Make sure our exit blocks are really a total order (i.e. a linear chain of
1933	// exits before the backedge).
1934	for (unsigned i = `1`; i < ExitingBlocks.size(); i++)
1935	assert(DT->dominates(ExitingBlocks[i - `1`], ExitingBlocks[i]) &&
1936	"Not sorted by dominance");
1937
1938	// Given our sorted total order, we know that exit[j] must be evaluated
1939	// after all exit[i] such j > i.
1940	for (unsigned i = `0`, e = ExitingBlocks.size(); i < e; i++)
1941	if (BadExit (ExitingBlocks [i])) {
1942	ExitingBlocks.resize(N: i);
1943	break;
1944	}
1945
1946	if (ExitingBlocks.empty())
1947	return false;
1948
1949	// At this point, ExitingBlocks consists of only those blocks which are
1950	// predicatable. Given that, we know we have at least one exit we can
1951	// predicate if the loop is doesn't have side effects and doesn't have any
1952	// implicit exits (because then our exact BTC isn't actually exact).
1953	// @Reviewers - As structured, this is O(I^2) for loop nests. Any
1954	// suggestions on how to improve this? I can obviously bail out for outer
1955	// loops, but that seems less than ideal. MemorySSA can find memory writes,
1956	// is that enough for all* side effects?*
1957	bool HasThreadLocalSideEffects = false;
1958	for (BasicBlock *BB : L->blocks())
1959	for (auto &I : *BB) {
1960	// TODO:isGuaranteedToTransfer
1961	if (I.mayHaveSideEffects()) {
1962	if (!LoopPredicationTraps)
1963	return false;
1964	HasThreadLocalSideEffects = true;
1965	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) {
1966	// Simple stores cannot be observed by other threads.
1967	// If HasThreadLocalSideEffects is set, we check
1968	// crashingBBWithoutEffect to make sure that the crashing BB cannot
1969	// observe them either.
1970	if (!SI->isSimple())
1971	return false;
1972	} else {
1973	return false;
1974	}
1975	}
1976
1977	// Skip if the loop has tokens referenced outside the loop to avoid
1978	// changing convergence behavior.
1979	if (I.getType()->isTokenTy()) {
1980	for (User *U : I.users()) {
1981	Instruction *UserInst = dyn_cast<Instruction>(Val: U);
1982	if (UserInst && !L->contains(Inst: UserInst)) {
1983	return false;
1984	}
1985	}
1986	}
1987	}
1988
1989	bool Changed = false;
1990	// Finally, do the actual predication for all predicatable blocks. A couple
1991	// of notes here:
1992	// 1) We don't bother to constant fold dominated exits with identical exit
1993	// counts; that's simply a form of CSE/equality propagation and we leave
1994	// it for dedicated passes.
1995	// 2) We insert the comparison at the branch. Hoisting introduces additional
1996	// legality constraints and we leave that to dedicated logic. We want to
1997	// predicate even if we can't insert a loop invariant expression as
1998	// peeling or unrolling will likely reduce the cost of the otherwise loop
1999	// varying check.
2000	Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2001	IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2002	Value ExactBTCV = nullptr; // Lazily generated if needed.*
2003	for (BasicBlock *ExitingBB : ExitingBlocks) {
2004	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2005
2006	auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
2007	if (HasThreadLocalSideEffects) {
2008	const BasicBlock Unreachable = nullptr*;
2009	for (const BasicBlock *Succ : BI->successors()) {
2010	if (isa<UnreachableInst>(Val: Succ->getTerminator()))
2011	Unreachable = Succ;
2012	}
2013	// Exit BB which have one branch back into the loop and another one to
2014	// a trap can still be optimized, because local side effects cannot
2015	// be observed in the exit case (the trap). We could be smarter about
2016	// this, but for now lets pattern match common cases that directly trap.
2017	if (Unreachable == nullptr \|\| !crashingBBWithoutEffect(BB: *Unreachable))
2018	return Changed;
2019	}
2020	Value *NewCond;
2021	if (ExitCount == ExactBTC) {
2022	NewCond = L->contains(BB: BI->getSuccessor(i: `0`)) ?
2023	B.getFalse() : B.getTrue();
2024	} else {
2025	Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
2026	if (!ExactBTCV)
2027	ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
2028	Value *RHS = ExactBTCV;
2029	if (ECV->getType() != RHS->getType()) {
2030	Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
2031	ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
2032	RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
2033	}
2034	auto Pred = L->contains(BB: BI->getSuccessor(i: `0`)) ?
2035	ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2036	NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
2037	}
2038	Value *OldCond = BI->getCondition();
2039	BI->setCondition(NewCond);
2040	if (OldCond->use_empty())
2041	DeadInsts.emplace_back(Args&: OldCond);
2042	Changed = true;
2043	RunUnswitching = true;
2044	}
2045
2046	return Changed;
2047	}
2048
2049	//===----------------------------------------------------------------------===//
2050	// IndVarSimplify driver. Manage several subpasses of IV simplification.
2051	//===----------------------------------------------------------------------===//
2052
2053	bool IndVarSimplify::run(Loop *L) {
2054	// We need (and expect!) the incoming loop to be in LCSSA.
2055	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
2056	"LCSSA required to run indvars!");
2057
2058	// If LoopSimplify form is not available, stay out of trouble. Some notes:
2059	// - LSR currently only supports LoopSimplify-form loops. Indvars'
2060	// canonicalization can be a pessimization without LSR to "clean up"
2061	// afterwards.
2062	// - We depend on having a preheader; in particular,
2063	// Loop::getCanonicalInductionVariable only supports loops with preheaders,
2064	// and we're in trouble if we can't find the induction variable even when
2065	// we've manually inserted one.
2066	// - LFTR relies on having a single backedge.
2067	if (!L->isLoopSimplifyForm())
2068	return false;
2069
2070	bool Changed = false;
2071	// If there are any floating-point recurrences, attempt to
2072	// transform them to use integer recurrences.
2073	Changed \|= rewriteNonIntegerIVs(L);
2074
2075	// Create a rewriter object which we'll use to transform the code with.
2076	SCEVExpander Rewriter(*SE, "indvars");
2077	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2078	Rewriter.setDebugType(DEBUG_TYPE);
2079	#endif
2080
2081	// Eliminate redundant IV users.
2082	//
2083	// Simplification works best when run before other consumers of SCEV. We
2084	// attempt to avoid evaluating SCEVs for sign/zero extend operations until
2085	// other expressions involving loop IVs have been evaluated. This helps SCEV
2086	// set no-wrap flags before normalizing sign/zero extension.
2087	Rewriter.disableCanonicalMode();
2088	Changed \|= simplifyAndExtend(L, Rewriter, LI);
2089
2090	// Check to see if we can compute the final value of any expressions
2091	// that are recurrent in the loop, and substitute the exit values from the
2092	// loop into any instructions outside of the loop that use the final values
2093	// of the current expressions.
2094	if (ReplaceExitValue != NeverRepl) {
2095	if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2096	ReplaceExitValue, DeadInsts)) {
2097	NumReplaced += Rewrites;
2098	Changed = true;
2099	}
2100	}
2101
2102	// Eliminate redundant IV cycles.
2103	NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2104
2105	// Try to convert exit conditions to unsigned and rotate computation
2106	// out of the loop. Note: Handles invalidation internally if needed.
2107	Changed \|= canonicalizeExitCondition(L);
2108
2109	// Try to eliminate loop exits based on analyzeable exit counts
2110	if (optimizeLoopExits(L, Rewriter)) {
2111	Changed = true;
2112	// Given we've changed exit counts, notify SCEV
2113	// Some nested loops may share same folded exit basic block,
2114	// thus we need to notify top most loop.
2115	SE->forgetTopmostLoop(L);
2116	}
2117
2118	// Try to form loop invariant tests for loop exits by changing how many
2119	// iterations of the loop run when that is unobservable.
2120	if (predicateLoopExits(L, Rewriter)) {
2121	Changed = true;
2122	// Given we've changed exit counts, notify SCEV
2123	SE->forgetLoop(L);
2124	}
2125
2126	// If we have a trip count expression, rewrite the loop's exit condition
2127	// using it.
2128	if (!DisableLFTR) {
2129	BasicBlock *PreHeader = L->getLoopPreheader();
2130
2131	SmallVector<BasicBlock*, `16`> ExitingBlocks;
2132	L->getExitingBlocks(ExitingBlocks);
2133	for (BasicBlock *ExitingBB : ExitingBlocks) {
2134	// Can't rewrite non-branch yet.
2135	if (!isa<CondBrInst>(Val: ExitingBB->getTerminator()))
2136	continue;
2137
2138	// If our exitting block exits multiple loops, we can only rewrite the
2139	// innermost one. Otherwise, we're changing how many times the innermost
2140	// loop runs before it exits.
2141	if (LI->getLoopFor(BB: ExitingBB) != L)
2142	continue;
2143
2144	if (!needsLFTR(L, ExitingBB))
2145	continue;
2146
2147	const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2148	if (isa<SCEVCouldNotCompute>(Val: ExitCount))
2149	continue;
2150
2151	// This was handled above, but as we form SCEVs, we can sometimes refine
2152	// existing ones; this allows exit counts to be folded to zero which
2153	// weren't when optimizeLoopExits saw them. Arguably, we should iterate
2154	// until stable to handle cases like this better.
2155	if (ExitCount->isZero())
2156	continue;
2157
2158	PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
2159	if (!IndVar)
2160	continue;
2161
2162	// Avoid high cost expansions. Note: This heuristic is questionable in
2163	// that our definition of "high cost" is not exactly principled.
2164	if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
2165	TTI, At: PreHeader->getTerminator()))
2166	continue;
2167
2168	if (!Rewriter.isSafeToExpand(S: ExitCount))
2169	continue;
2170
2171	Changed \|= linearFunctionTestReplace(L, ExitingBB,
2172	ExitCount, IndVar,
2173	Rewriter);
2174	}
2175	}
2176	// Clear the rewriter cache, because values that are in the rewriter's cache
2177	// can be deleted in the loop below, causing the AssertingVH in the cache to
2178	// trigger.
2179	Rewriter.clear();
2180
2181	// Now that we're done iterating through lists, clean up any instructions
2182	// which are now dead.
2183	while (!DeadInsts.empty()) {
2184	Value *V = DeadInsts.pop_back_val();
2185
2186	if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2187	Changed \|= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2188	else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2189	Changed \|=
2190	RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2191	}
2192
2193	// The Rewriter may not be used from this point on.
2194
2195	// Loop-invariant instructions in the preheader that aren't used in the
2196	// loop may be sunk below the loop to reduce register pressure.
2197	Changed \|= sinkUnusedInvariants(L);
2198
2199	// rewriteFirstIterationLoopExitValues does not rely on the computation of
2200	// trip count and therefore can further simplify exit values in addition to
2201	// rewriteLoopExitValues.
2202	Changed \|= rewriteFirstIterationLoopExitValues(L);
2203
2204	// Clean up dead instructions.
2205	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2206
2207	// Check a post-condition.
2208	assert(L->isRecursivelyLCSSAForm(DT, LI) &&
2209	"Indvars did not preserve LCSSA!");
2210	if (VerifyMemorySSA && MSSAU)
2211	MSSAU ->getMemorySSA()->verifyMemorySSA();
2212
2213	return Changed;
2214	}
2215
2216	PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2217	LoopStandardAnalysisResults &AR,
2218	LPMUpdater &) {
2219	Function *F = L.getHeader()->getParent();
2220	const DataLayout &DL = F->getDataLayout();
2221
2222	IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2223	WidenIndVars && AllowIVWidening);
2224	if (!IVS.run(L: &L))
2225	return PreservedAnalyses::all();
2226
2227	auto PA = getLoopPassPreservedAnalyses();
2228	PA.preserveSet<CFGAnalyses>();
2229	if (IVS.runUnswitching()) {
2230	AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(IR&: L, ExtraArgs&: AR);
2231	PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2232	}
2233
2234	if (AR.MSSA)
2235	PA.preserve<MemorySSAAnalysis>();
2236	return PA;
2237	}
2238

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp