ScalarEvolutionExpander.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp]

1	//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the implementation of the scalar evolution expander,
10	// which is used to generate the code corresponding to a given scalar evolution
11	// expression.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/SmallSet.h"
19	#include "llvm/Analysis/InstructionSimplify.h"
20	#include "llvm/Analysis/LoopInfo.h"
21	#include "llvm/Analysis/TargetTransformInfo.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/IR/DataLayout.h"
24	#include "llvm/IR/Dominators.h"
25	#include "llvm/IR/IntrinsicInst.h"
26	#include "llvm/IR/PatternMatch.h"
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/raw_ostream.h"
29	#include "llvm/Transforms/Utils/LoopUtils.h"
30
31	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
32	#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33	#else
34	#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35	#endif
36
37	using namespace llvm;
38
39	cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
40	"scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: `4`),
41	cl::desc ("When performing SCEV expansion only if it is cheap to do, this "
42	"controls the budget that is considered cheap (default = 4)"));
43
44	using namespace PatternMatch;
45
46	PoisonFlags::PoisonFlags(const Instruction *I) {
47	NUW = false;
48	NSW = false;
49	Exact = false;
50	Disjoint = false;
51	NNeg = false;
52	SameSign = false;
53	GEPNW = GEPNoWrapFlags::none();
54	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
55	NUW = OBO->hasNoUnsignedWrap();
56	NSW = OBO->hasNoSignedWrap();
57	}
58	if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
59	Exact = PEO->isExact();
60	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
61	Disjoint = PDI->isDisjoint();
62	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
63	NNeg = PNI->hasNonNeg();
64	if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
65	NUW = TI->hasNoUnsignedWrap();
66	NSW = TI->hasNoSignedWrap();
67	}
68	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
69	GEPNW = GEP->getNoWrapFlags();
70	if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
71	SameSign = ICmp->hasSameSign();
72	}
73
74	void PoisonFlags::apply(Instruction *I) {
75	if (isa<OverflowingBinaryOperator>(Val: I)) {
76	I->setHasNoUnsignedWrap(NUW);
77	I->setHasNoSignedWrap(NSW);
78	}
79	if (isa<PossiblyExactOperator>(Val: I))
80	I->setIsExact(Exact);
81	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
82	PDI->setIsDisjoint(Disjoint);
83	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
84	PNI->setNonNeg(NNeg);
85	if (isa<TruncInst>(Val: I)) {
86	I->setHasNoUnsignedWrap(NUW);
87	I->setHasNoSignedWrap(NSW);
88	}
89	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
90	GEP->setNoWrapFlags(GEPNW);
91	if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
92	ICmp->setSameSign(SameSign);
93	}
94
95	/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
96	/// reusing an existing cast if a suitable one (= dominating IP) exists, or
97	/// creating a new one.
98	Value SCEVExpander::ReuseOrCreateCast(Value V, Type *Ty,
99	Instruction::CastOps Op,
100	BasicBlock::iterator IP) {
101	// This function must be called with the builder having a valid insertion
102	// point. It doesn't need to be the actual IP where the uses of the returned
103	// cast will be added, but it must dominate such IP.
104	// We use this precondition to produce a cast that will dominate all its
105	// uses. In particular, this is crucial for the case where the builder's
106	// insertion point is* the point where we were asked to put the cast.*
107	// Since we don't know the builder's insertion point is actually
108	// where the uses will be added (only that it dominates it), we are
109	// not allowed to move it.
110	BasicBlock::iterator BIP = Builder.GetInsertPoint();
111
112	Value Ret = nullptr*;
113
114	if (!isa<Constant>(Val: V)) {
115	// Check to see if there is already a cast!
116	for (User *U : V->users()) {
117	if (U->getType() != Ty)
118	continue;
119	CastInst *CI = dyn_cast<CastInst>(Val: U);
120	if (!CI \|\| CI->getOpcode() != Op)
121	continue;
122
123	// Found a suitable cast that is at IP or comes before IP. Use it. Note
124	// that the cast must also properly dominate the Builder's insertion
125	// point.
126	if (IP ->getParent() == CI->getParent() && &*BIP != CI &&
127	(&IP == CI \|\| CI->comesBefore(Other: &IP))) {
128	Ret = CI;
129	break;
130	}
131	}
132	}
133
134	// Create a new cast.
135	if (!Ret) {
136	SCEVInsertPointGuard Guard(Builder, this);
137	Builder.SetInsertPoint(&*IP);
138	Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
139	}
140
141	// We assert at the end of the function since IP might point to an
142	// instruction with different dominance properties than a cast
143	// (an invoke for example) and not dominate BIP (but the cast does).
144	assert(!isa<Instruction>(Ret) \|\|
145	SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
146
147	return Ret;
148	}
149
150	BasicBlock::iterator
151	SCEVExpander::findInsertPointAfter(Instruction *I,
152	Instruction MustDominate) const* {
153	BasicBlock::iterator IP = ++I->getIterator();
154	if (auto *II = dyn_cast<InvokeInst>(Val: I))
155	IP = II->getNormalDest()->begin();
156
157	while (isa<PHINode>(Val: IP))
158	++IP;
159
160	if (isa<FuncletPadInst>(Val: IP) \|\| isa<LandingPadInst>(Val: IP)) {
161	++IP;
162	} else if (isa<CatchSwitchInst>(Val: IP)) {
163	IP = MustDominate->getParent()->getFirstInsertionPt();
164	} else {
165	assert(!IP->isEHPad() && "unexpected eh pad!");
166	}
167
168	// Adjust insert point to be after instructions inserted by the expander, so
169	// we can re-use already inserted instructions. Avoid skipping past the
170	// original \p MustDominate, in case it is an inserted instruction.
171	while (isInsertedInstruction(I: &IP) && &IP != MustDominate)
172	++IP;
173
174	return IP;
175	}
176
177	BasicBlock::iterator
178	SCEVExpander::GetOptimalInsertionPointForCastOf(Value V) const* {
179	// Cast the argument at the beginning of the entry block, after
180	// any bitcasts of other arguments.
181	if (Argument *A = dyn_cast<Argument>(Val: V)) {
182	BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
183	while ((isa<BitCastInst>(Val: IP) &&
184	isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`)) &&
185	cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`) != A))
186	++IP;
187	return IP;
188	}
189
190	// Cast the instruction immediately after the instruction.
191	if (Instruction *I = dyn_cast<Instruction>(Val: V))
192	return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
193
194	// Otherwise, this must be some kind of a constant,
195	// so let's plop this cast into the function's entry block.
196	assert(isa<Constant>(V) &&
197	"Expected the cast argument to be a global/constant");
198	return Builder.GetInsertBlock()
199	->getParent()
200	->getEntryBlock()
201	.getFirstInsertionPt();
202	}
203
204	/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
205	/// which must be possible with a noop cast, doing what we can to share
206	/// the casts.
207	Value SCEVExpander::InsertNoopCastOfTo(Value V, Type *Ty) {
208	Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
209	assert((Op == Instruction::BitCast \|\|
210	Op == Instruction::PtrToInt \|\|
211	Op == Instruction::IntToPtr) &&
212	"InsertNoopCastOfTo cannot perform non-noop casts!");
213	assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
214	"InsertNoopCastOfTo cannot change sizes!");
215
216	// inttoptr only works for integral pointers. For non-integral pointers, we
217	// can create a GEP on null with the integral value as index. Note that
218	// it is safe to use GEP of null instead of inttoptr here, because only
219	// expressions already based on a GEP of null should be converted to pointers
220	// during expansion.
221	if (Op == Instruction::IntToPtr) {
222	auto *PtrTy = cast<PointerType>(Val: Ty);
223	if (DL.isNonIntegralPointerType(PT: PtrTy))
224	return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
225	}
226	// Short-circuit unnecessary bitcasts.
227	if (Op == Instruction::BitCast) {
228	if (V->getType() == Ty)
229	return V;
230	if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
231	if (CI->getOperand(i_nocapture: `0`)->getType() == Ty)
232	return CI->getOperand(i_nocapture: `0`);
233	}
234	}
235	// Short-circuit unnecessary inttoptr<->ptrtoint casts.
236	if ((Op == Instruction::PtrToInt \|\| Op == Instruction::IntToPtr) &&
237	SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
238	if (CastInst *CI = dyn_cast<CastInst>(Val: V))
239	if ((CI->getOpcode() == Instruction::PtrToInt \|\|
240	CI->getOpcode() == Instruction::IntToPtr) &&
241	SE.getTypeSizeInBits(Ty: CI->getType()) ==
242	SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: `0`)->getType()))
243	return CI->getOperand(i_nocapture: `0`);
244	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
245	if ((CE->getOpcode() == Instruction::PtrToInt \|\|
246	CE->getOpcode() == Instruction::IntToPtr) &&
247	SE.getTypeSizeInBits(Ty: CE->getType()) ==
248	SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: `0`)->getType()))
249	return CE->getOperand(i_nocapture: `0`);
250	}
251
252	// Fold a cast of a constant.
253	if (Constant *C = dyn_cast<Constant>(Val: V))
254	return ConstantExpr::getCast(ops: Op, C, Ty);
255
256	// Try to reuse existing cast, or insert one.
257	return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
258	}
259
260	/// InsertBinop - Insert the specified binary operator, doing a small amount
261	/// of work to avoid inserting an obviously redundant operation, and hoisting
262	/// to an outer loop when the opportunity is there and it is safe.
263	Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
264	Value LHS, Value RHS,
265	SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
266	// Fold a binop with constant operands.
267	if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
268	if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
269	if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
270	return Res;
271
272	// Do a quick scan to see if we have this binop nearby. If so, reuse it.
273	unsigned ScanLimit = `6`;
274	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
275	// Scanning starts from the last instruction before the insertion point.
276	BasicBlock::iterator IP = Builder.GetInsertPoint();
277	if (IP != BlockBegin) {
278	--IP;
279	for (; ScanLimit; --IP, --ScanLimit) {
280	auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
281	// Ensure that no-wrap flags match.
282	if (isa<OverflowingBinaryOperator>(Val: I)) {
283	if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
284	return true;
285	if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
286	return true;
287	}
288	// Conservatively, do not use any instruction which has any of exact
289	// flags installed.
290	if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
291	return true;
292	return false;
293	};
294	if (IP ->getOpcode() == (unsigned)Opcode && IP ->getOperand(i: `0`) == LHS &&
295	IP ->getOperand(i: `1`) == RHS && !canGenerateIncompatiblePoison (&*IP))
296	return &*IP;
297	if (IP == BlockBegin) break;
298	}
299	}
300
301	// Save the original insertion point so we can restore it when we're done.
302	DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
303	SCEVInsertPointGuard Guard(Builder, this);
304
305	if (IsSafeToHoist) {
306	// Move the insertion point out of as many loops as we can.
307	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
308	if (!L->isLoopInvariant(V: LHS) \|\| !L->isLoopInvariant(V: RHS)) break;
309	BasicBlock *Preheader = L->getLoopPreheader();
310	if (!Preheader) break;
311
312	// Ok, move up a level.
313	Builder.SetInsertPoint(Preheader->getTerminator());
314	}
315	}
316
317	// If we haven't found this binop, insert it.
318	// TODO: Use the Builder, which will make CreateBinOp below fold with
319	// InstSimplifyFolder.
320	Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
321	BO->setDebugLoc(Loc);
322	if (Flags & SCEV::FlagNUW)
323	BO->setHasNoUnsignedWrap();
324	if (Flags & SCEV::FlagNSW)
325	BO->setHasNoSignedWrap();
326
327	return BO;
328	}
329
330	/// expandAddToGEP - Expand an addition expression with a pointer type into
331	/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
332	/// BasicAliasAnalysis and other passes analyze the result. See the rules
333	/// for getelementptr vs. inttoptr in
334	/// http://llvm.org/docs/LangRef.html#pointeraliasing
335	/// for details.
336	///
337	/// Design note: The correctness of using getelementptr here depends on
338	/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
339	/// they may introduce pointer arithmetic which may not be safely converted
340	/// into getelementptr.
341	///
342	/// Design note: It might seem desirable for this function to be more
343	/// loop-aware. If some of the indices are loop-invariant while others
344	/// aren't, it might seem desirable to emit multiple GEPs, keeping the
345	/// loop-invariant portions of the overall computation outside the loop.
346	/// However, there are a few reasons this is not done here. Hoisting simple
347	/// arithmetic is a low-level optimization that often isn't very
348	/// important until late in the optimization process. In fact, passes
349	/// like InstructionCombining will combine GEPs, even if it means
350	/// pushing loop-invariant computation down into loops, so even if the
351	/// GEPs were split here, the work would quickly be undone. The
352	/// LoopStrengthReduction pass, which is usually run quite late (and
353	/// after the last InstructionCombining pass), takes care of hoisting
354	/// loop-invariant portions of expressions, after considering what
355	/// can be folded using target addressing modes.
356	///
357	Value SCEVExpander::expandAddToGEP(const* SCEV Offset, Value V,
358	SCEV::NoWrapFlags Flags) {
359	assert(!isa<Instruction>(V) \|\|
360	SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
361
362	Value *Idx = expand(S: Offset);
363	GEPNoWrapFlags NW = (Flags & SCEV::FlagNUW) ? GEPNoWrapFlags::noUnsignedWrap()
364	: GEPNoWrapFlags::none();
365
366	// Fold a GEP with constant operands.
367	if (Constant *CLHS = dyn_cast<Constant>(Val: V))
368	if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
369	return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS, Name: "", NW);
370
371	// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
372	unsigned ScanLimit = `6`;
373	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
374	// Scanning starts from the last instruction before the insertion point.
375	BasicBlock::iterator IP = Builder.GetInsertPoint();
376	if (IP != BlockBegin) {
377	--IP;
378	for (; ScanLimit; --IP, --ScanLimit) {
379	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val&: IP)) {
380	if (GEP->getPointerOperand() == V &&
381	GEP->getSourceElementType() == Builder.getInt8Ty() &&
382	GEP->getOperand(i_nocapture: `1`) == Idx) {
383	rememberFlags(I: GEP);
384	GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
385	return &*IP;
386	}
387	}
388	if (IP == BlockBegin) break;
389	}
390	}
391
392	// Save the original insertion point so we can restore it when we're done.
393	SCEVInsertPointGuard Guard(Builder, this);
394
395	// Move the insertion point out of as many loops as we can.
396	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
397	if (!L->isLoopInvariant(V) \|\| !L->isLoopInvariant(V: Idx)) break;
398	BasicBlock *Preheader = L->getLoopPreheader();
399	if (!Preheader) break;
400
401	// Ok, move up a level.
402	Builder.SetInsertPoint(Preheader->getTerminator());
403	}
404
405	// Emit a GEP.
406	return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep", NW);
407	}
408
409	/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
410	/// SCEV expansion. If they are nested, this is the most nested. If they are
411	/// neighboring, pick the later.
412	static const Loop PickMostRelevantLoop(const* Loop A, const* Loop *B,
413	DominatorTree &DT) {
414	if (!A) return B;
415	if (!B) return A;
416	if (A->contains(L: B)) return B;
417	if (B->contains(L: A)) return A;
418	if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
419	if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
420	return A; // Arbitrarily break the tie.
421	}
422
423	/// getRelevantLoop - Get the most relevant loop associated with the given
424	/// expression, according to PickMostRelevantLoop.
425	const Loop SCEVExpander::getRelevantLoop(const* SCEV *S) {
426	// Test whether we've already computed the most relevant loop for this SCEV.
427	auto Pair = RelevantLoops.try_emplace(Key: S);
428	if (!Pair.second)
429	return Pair.first ->second;
430
431	switch (S->getSCEVType()) {
432	case scConstant:
433	case scVScale:
434	return nullptr; // A constant has no relevant loops.
435	case scTruncate:
436	case scZeroExtend:
437	case scSignExtend:
438	case scPtrToInt:
439	case scAddExpr:
440	case scMulExpr:
441	case scUDivExpr:
442	case scAddRecExpr:
443	case scUMaxExpr:
444	case scSMaxExpr:
445	case scUMinExpr:
446	case scSMinExpr:
447	case scSequentialUMinExpr: {
448	const Loop L = nullptr*;
449	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
450	L = AR->getLoop();
451	for (const SCEV *Op : S->operands())
452	L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
453	return RelevantLoops [S] = L;
454	}
455	case scUnknown: {
456	const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
457	if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
458	return Pair.first ->second = SE.LI.getLoopFor(BB: I->getParent());
459	// A non-instruction has no relevant loops.
460	return nullptr;
461	}
462	case scCouldNotCompute:
463	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
464	}
465	llvm_unreachable("Unexpected SCEV type!");
466	}
467
468	namespace {
469
470	/// LoopCompare - Compare loops by PickMostRelevantLoop.
471	class LoopCompare {
472	DominatorTree &DT;
473	public:
474	explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
475
476	bool operator()(std::pair<const Loop , const* SCEV *> LHS,
477	std::pair<const Loop , const* SCEV > RHS) const* {
478	// Keep pointer operands sorted at the end.
479	if (LHS.second->getType()->isPointerTy() !=
480	RHS.second->getType()->isPointerTy())
481	return LHS.second->getType()->isPointerTy();
482
483	// Compare loops with PickMostRelevantLoop.
484	if (LHS.first != RHS.first)
485	return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
486
487	// If one operand is a non-constant negative and the other is not,
488	// put the non-constant negative on the right so that a sub can
489	// be used instead of a negate and add.
490	if (LHS.second->isNonConstantNegative()) {
491	if (!RHS.second->isNonConstantNegative())
492	return false;
493	} else if (RHS.second->isNonConstantNegative())
494	return true;
495
496	// Otherwise they are equivalent according to this comparison.
497	return false;
498	}
499	};
500
501	}
502
503	Value SCEVExpander::visitAddExpr(const* SCEVAddExpr *S) {
504	// Recognize the canonical representation of an unsimplifed urem.
505	const SCEV URemLHS = nullptr*;
506	const SCEV URemRHS = nullptr*;
507	if (SE.matchURem(Expr: S, LHS&: URemLHS, RHS&: URemRHS)) {
508	Value *LHS = expand(S: URemLHS);
509	Value *RHS = expand(S: URemRHS);
510	return InsertBinop(Opcode: Instruction::URem, LHS, RHS, Flags: SCEV::FlagAnyWrap,
511	/IsSafeToHoist/ false);
512	}
513
514	// Collect all the add operands in a loop, along with their associated loops.
515	// Iterate in reverse so that constants are emitted last, all else equal, and
516	// so that pointer operands are inserted first, which the code below relies on
517	// to form more involved GEPs.
518	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
519	for (const SCEV *Op : reverse(C: S->operands()))
520	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
521
522	// Sort by loop. Use a stable sort so that constants follow non-constants and
523	// pointer operands precede non-pointer operands.
524	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
525
526	// Emit instructions to add all the operands. Hoist as much as possible
527	// out of loops, and form meaningful getelementptrs where possible.
528	Value Sum = nullptr*;
529	for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
530	const Loop *CurLoop = I->first;
531	const SCEV *Op = I->second;
532	if (!Sum) {
533	// This is the first operand. Just expand it.
534	Sum = expand(S: Op);
535	++I;
536	continue;
537	}
538
539	assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
540	if (isa<PointerType>(Val: Sum->getType())) {
541	// The running sum expression is a pointer. Try to form a getelementptr
542	// at this level with that as the base.
543	SmallVector<const SCEV *, `4`> NewOps;
544	for (; I != E && I->first == CurLoop; ++I) {
545	// If the operand is SCEVUnknown and not instructions, peek through
546	// it, to enable more of it to be folded into the GEP.
547	const SCEV *X = I->second;
548	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
549	if (!isa<Instruction>(Val: U->getValue()))
550	X = SE.getSCEV(V: U->getValue());
551	NewOps.push_back(Elt: X);
552	}
553	Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum, Flags: S->getNoWrapFlags());
554	} else if (Op->isNonConstantNegative()) {
555	// Instead of doing a negate and add, just do a subtract.
556	Value *W = expand(S: SE.getNegativeSCEV(V: Op));
557	Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
558	/IsSafeToHoist/ true);
559	++I;
560	} else {
561	// A simple add.
562	Value *W = expand(S: Op);
563	// Canonicalize a constant to the RHS.
564	if (isa<Constant>(Val: Sum))
565	std::swap(a&: Sum, b&: W);
566	Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
567	/IsSafeToHoist/ true);
568	++I;
569	}
570	}
571
572	return Sum;
573	}
574
575	Value SCEVExpander::visitMulExpr(const* SCEVMulExpr *S) {
576	Type *Ty = S->getType();
577
578	// Collect all the mul operands in a loop, along with their associated loops.
579	// Iterate in reverse so that constants are emitted last, all else equal.
580	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
581	for (const SCEV *Op : reverse(C: S->operands()))
582	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
583
584	// Sort by loop. Use a stable sort so that constants follow non-constants.
585	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
586
587	// Emit instructions to mul all the operands. Hoist as much as possible
588	// out of loops.
589	Value Prod = nullptr*;
590	auto I = OpsAndLoops.begin();
591
592	// Expand the calculation of X pow N in the following manner:
593	// Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
594	// X pow N = (X pow P1) (X pow P2) * ... * (X pow PK).*
595	const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
596	auto E = I;
597	// Calculate how many times the same operand from the same loop is included
598	// into this power.
599	uint64_t Exponent = `0`;
600	const uint64_t MaxExponent = UINT64_MAX >> `1`;
601	// No one sane will ever try to calculate such huge exponents, but if we
602	// need this, we stop on UINT64_MAX / 2 because we need to exit the loop
603	// below when the power of 2 exceeds our Exponent, and we want it to be
604	// 1u << 31 at most to not deal with unsigned overflow.
605	while (E != OpsAndLoops.end() && I == E && Exponent != MaxExponent) {
606	++Exponent;
607	++E;
608	}
609	assert(Exponent > `0` && "Trying to calculate a zeroth exponent of operand?");
610
611	// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
612	// that are needed into the result.
613	Value *P = expand(S: I->second);
614	Value Result = nullptr*;
615	if (Exponent & `1`)
616	Result = P;
617	for (uint64_t BinExp = `2`; BinExp <= Exponent; BinExp <<= `1`) {
618	P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
619	/IsSafeToHoist/ true);
620	if (Exponent & BinExp)
621	Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
622	Flags: SCEV::FlagAnyWrap,
623	/IsSafeToHoist/ true)
624	: P;
625	}
626
627	I = E;
628	assert(Result && "Nothing was expanded?");
629	return Result;
630	};
631
632	while (I != OpsAndLoops.end()) {
633	if (!Prod) {
634	// This is the first operand. Just expand it.
635	Prod = ExpandOpBinPowN ();
636	} else if (I->second->isAllOnesValue()) {
637	// Instead of doing a multiply by negative one, just do a negate.
638	Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
639	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
640	++I;
641	} else {
642	// A simple mul.
643	Value *W = ExpandOpBinPowN ();
644	// Canonicalize a constant to the RHS.
645	if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
646	const APInt *RHS;
647	if (match(V: W, P: m_Power2(V&: RHS))) {
648	// Canonicalize Prod(1<<C) to Prod<<C.*
649	assert(!Ty->isVectorTy() && "vector types are not SCEVable");
650	auto NWFlags = S->getNoWrapFlags();
651	// clear nsw flag if shl will produce poison value.
652	if (RHS->logBase2() == RHS->getBitWidth() - `1`)
653	NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
654	Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
655	RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
656	/IsSafeToHoist/ true);
657	} else {
658	Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
659	/IsSafeToHoist/ true);
660	}
661	}
662	}
663
664	return Prod;
665	}
666
667	Value SCEVExpander::visitUDivExpr(const* SCEVUDivExpr *S) {
668	Value *LHS = expand(S: S->getLHS());
669	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
670	const APInt &RHS = SC->getAPInt();
671	if (RHS.isPowerOf2())
672	return InsertBinop(Opcode: Instruction::LShr, LHS,
673	RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
674	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
675	}
676
677	const SCEV *RHSExpr = S->getRHS();
678	Value *RHS = expand(S: RHSExpr);
679	if (SafeUDivMode) {
680	bool GuaranteedNotPoison =
681	ScalarEvolution::isGuaranteedNotToBePoison(Op: RHSExpr);
682	if (!GuaranteedNotPoison)
683	RHS = Builder.CreateFreeze(V: RHS);
684
685	// We need an umax if either RHSExpr is not known to be zero, or if it is
686	// not guaranteed to be non-poison. In the later case, the frozen poison may
687	// be 0.
688	if (!SE.isKnownNonZero(S: RHSExpr) \|\| !GuaranteedNotPoison)
689	RHS = Builder.CreateIntrinsic(RetTy: RHS->getType(), ID: Intrinsic::umax,
690	Args: {RHS, ConstantInt::get(Ty: RHS->getType(), V: `1`)});
691	}
692	return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
693	/IsSafeToHoist/ SE.isKnownNonZero(S: S->getRHS()));
694	}
695
696	/// Determine if this is a well-behaved chain of instructions leading back to
697	/// the PHI. If so, it may be reused by expanded expressions.
698	bool SCEVExpander::isNormalAddRecExprPHI(PHINode PN, Instruction IncV,
699	const Loop *L) {
700	if (IncV->getNumOperands() == `0` \|\| isa<PHINode>(Val: IncV) \|\|
701	(isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
702	return false;
703	// If any of the operands don't dominate the insert position, bail.
704	// Addrec operands are always loop-invariant, so this can only happen
705	// if there are instructions which haven't been hoisted.
706	if (L == IVIncInsertLoop) {
707	for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
708	if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
709	if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
710	return false;
711	}
712	// Advance to the next instruction.
713	IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
714	if (!IncV)
715	return false;
716
717	if (IncV->mayHaveSideEffects())
718	return false;
719
720	if (IncV == PN)
721	return true;
722
723	return isNormalAddRecExprPHI(PN, IncV, L);
724	}
725
726	/// getIVIncOperand returns an induction variable increment's induction
727	/// variable operand.
728	///
729	/// If allowScale is set, any type of GEP is allowed as long as the nonIV
730	/// operands dominate InsertPos.
731	///
732	/// If allowScale is not set, ensure that a GEP increment conforms to one of the
733	/// simple patterns generated by getAddRecExprPHILiterally and
734	/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
735	Instruction SCEVExpander::getIVIncOperand(Instruction IncV,
736	Instruction *InsertPos,
737	bool allowScale) {
738	if (IncV == InsertPos)
739	return nullptr;
740
741	switch (IncV->getOpcode()) {
742	default:
743	return nullptr;
744	// Check for a simple Add/Sub or GEP of a loop invariant step.
745	case Instruction::Add:
746	case Instruction::Sub: {
747	Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: `1`));
748	if (!OInst \|\| SE.DT.dominates(Def: OInst, User: InsertPos))
749	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
750	return nullptr;
751	}
752	case Instruction::BitCast:
753	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
754	case Instruction::GetElementPtr:
755	for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
756	if (isa<Constant>(Val: U))
757	continue;
758	if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
759	if (!SE.DT.dominates(Def: OInst, User: InsertPos))
760	return nullptr;
761	}
762	if (allowScale) {
763	// allow any kind of GEP as long as it can be hoisted.
764	continue;
765	}
766	// GEPs produced by SCEVExpander use i8 element type.
767	if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: `8`))
768	return nullptr;
769	break;
770	}
771	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
772	}
773	}
774
775	/// If the insert point of the current builder or any of the builders on the
776	/// stack of saved builders has 'I' as its insert point, update it to point to
777	/// the instruction after 'I'. This is intended to be used when the instruction
778	/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
779	/// different block, the inconsistent insert point (with a mismatched
780	/// Instruction and Block) can lead to an instruction being inserted in a block
781	/// other than its parent.
782	void SCEVExpander::fixupInsertPoints(Instruction *I) {
783	BasicBlock::iterator It(*I);
784	BasicBlock::iterator NewInsertPt = std::next(x: It);
785	if (Builder.GetInsertPoint() == It)
786	Builder.SetInsertPoint(&*NewInsertPt);
787	for (auto *InsertPtGuard : InsertPointGuards)
788	if (InsertPtGuard->GetInsertPoint() == It)
789	InsertPtGuard->SetInsertPoint(NewInsertPt);
790	}
791
792	/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
793	/// it available to other uses in this loop. Recursively hoist any operands,
794	/// until we reach a value that dominates InsertPos.
795	bool SCEVExpander::hoistIVInc(Instruction IncV, Instruction InsertPos,
796	bool RecomputePoisonFlags) {
797	auto FixupPoisonFlags = [this](Instruction *I) {
798	// Drop flags that are potentially inferred from old context and infer flags
799	// in new context.
800	rememberFlags(I);
801	I->dropPoisonGeneratingFlags();
802	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
803	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
804	auto *BO = cast<BinaryOperator>(Val: I);
805	BO->setHasNoUnsignedWrap(
806	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
807	BO->setHasNoSignedWrap(
808	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
809	}
810	};
811
812	if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
813	if (RecomputePoisonFlags)
814	FixupPoisonFlags (IncV);
815	return true;
816	}
817
818	// InsertPos must itself dominate IncV so that IncV's new position satisfies
819	// its existing users.
820	if (isa<PHINode>(Val: InsertPos) \|\|
821	!SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
822	return false;
823
824	if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
825	return false;
826
827	// Check that the chain of IV operands leading back to Phi can be hoisted.
828	SmallVector<Instruction*, `4`> IVIncs;
829	for(;;) {
830	Instruction Oper = getIVIncOperand(IncV, InsertPos, /allowScale/*true);
831	if (!Oper)
832	return false;
833	// IncV is safe to hoist.
834	IVIncs.push_back(Elt: IncV);
835	IncV = Oper;
836	if (SE.DT.dominates(Def: IncV, User: InsertPos))
837	break;
838	}
839	for (Instruction *I : llvm::reverse(C&: IVIncs)) {
840	fixupInsertPoints(I);
841	I->moveBefore(InsertPos: InsertPos->getIterator());
842	if (RecomputePoisonFlags)
843	FixupPoisonFlags (I);
844	}
845	return true;
846	}
847
848	bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
849	PHINode *WidePhi,
850	Instruction *OrigInc,
851	Instruction *WideInc) {
852	return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
853	match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
854	OrigInc->getOpcode() == WideInc->getOpcode();
855	}
856
857	/// Determine if this cyclic phi is in a form that would have been generated by
858	/// LSR. We don't care if the phi was actually expanded in this pass, as long
859	/// as it is in a low-cost form, for example, no implied multiplication. This
860	/// should match any patterns generated by getAddRecExprPHILiterally and
861	/// expandAddtoGEP.
862	bool SCEVExpander::isExpandedAddRecExprPHI(PHINode PN, Instruction IncV,
863	const Loop *L) {
864	for(Instruction *IVOper = IncV;
865	(IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
866	/allowScale=/false));) {
867	if (IVOper == PN)
868	return true;
869	}
870	return false;
871	}
872
873	/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
874	/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
875	/// need to materialize IV increments elsewhere to handle difficult situations.
876	Value SCEVExpander::expandIVInc(PHINode PN, Value StepV, const* Loop *L,
877	bool useSubtract) {
878	Value *IncV;
879	// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
880	if (PN->getType()->isPointerTy()) {
881	// TODO: Change name to IVName.iv.next.
882	IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
883	} else {
884	IncV = useSubtract ?
885	Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next") :
886	Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next");
887	}
888	return IncV;
889	}
890
891	/// Check whether we can cheaply express the requested SCEV in terms of
892	/// the available PHI SCEV by truncation and/or inversion of the step.
893	static bool canBeCheaplyTransformed(ScalarEvolution &SE,
894	const SCEVAddRecExpr *Phi,
895	const SCEVAddRecExpr *Requested,
896	bool &InvertStep) {
897	// We can't transform to match a pointer PHI.
898	Type *PhiTy = Phi->getType();
899	Type *RequestedTy = Requested->getType();
900	if (PhiTy->isPointerTy() \|\| RequestedTy->isPointerTy())
901	return false;
902
903	if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
904	return false;
905
906	// Try truncate it if necessary.
907	Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
908	if (!Phi)
909	return false;
910
911	// Check whether truncation will help.
912	if (Phi == Requested) {
913	InvertStep = false;
914	return true;
915	}
916
917	// Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
918	if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
919	InvertStep = true;
920	return true;
921	}
922
923	return false;
924	}
925
926	static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
927	if (!isa<IntegerType>(Val: AR->getType()))
928	return false;
929
930	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
931	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
932	const SCEV *Step = AR->getStepRecurrence(SE);
933	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
934	RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
935	const SCEV *ExtendAfterOp =
936	SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
937	return ExtendAfterOp == OpAfterExtend;
938	}
939
940	static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
941	if (!isa<IntegerType>(Val: AR->getType()))
942	return false;
943
944	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
945	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
946	const SCEV *Step = AR->getStepRecurrence(SE);
947	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
948	RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
949	const SCEV *ExtendAfterOp =
950	SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
951	return ExtendAfterOp == OpAfterExtend;
952	}
953
954	/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
955	/// the base addrec, which is the addrec without any non-loop-dominating
956	/// values, and return the PHI.
957	PHINode *
958	SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
959	const Loop L, Type &TruncTy,
960	bool &InvertStep) {
961	assert((!IVIncInsertLoop \|\| IVIncInsertPos) &&
962	"Uninitialized insert position");
963
964	// Reuse a previously-inserted PHI, if present.
965	BasicBlock *LatchBlock = L->getLoopLatch();
966	if (LatchBlock) {
967	PHINode AddRecPhiMatch = nullptr*;
968	Instruction IncV = nullptr*;
969	TruncTy = nullptr;
970	InvertStep = false;
971
972	// Only try partially matching scevs that need truncation and/or
973	// step-inversion if we know this loop is outside the current loop.
974	bool TryNonMatchingSCEV =
975	IVIncInsertLoop &&
976	SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
977
978	for (PHINode &PN : L->getHeader()->phis()) {
979	if (!SE.isSCEVable(Ty: PN.getType()))
980	continue;
981
982	// We should not look for a incomplete PHI. Getting SCEV for a incomplete
983	// PHI has no meaning at all.
984	if (!PN.isComplete()) {
985	SCEV_DEBUG_WITH_TYPE(
986	DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
987	continue;
988	}
989
990	const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
991	if (!PhiSCEV)
992	continue;
993
994	bool IsMatchingSCEV = PhiSCEV == Normalized;
995	// We only handle truncation and inversion of phi recurrences for the
996	// expanded expression if the expanded expression's loop dominates the
997	// loop we insert to. Check now, so we can bail out early.
998	if (!IsMatchingSCEV && !TryNonMatchingSCEV)
999	continue;
1000
1001	// TODO: this possibly can be reworked to avoid this cast at all.
1002	Instruction *TempIncV =
1003	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
1004	if (!TempIncV)
1005	continue;
1006
1007	// Check whether we can reuse this PHI node.
1008	if (LSRMode) {
1009	if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1010	continue;
1011	} else {
1012	if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1013	continue;
1014	}
1015
1016	// Stop if we have found an exact match SCEV.
1017	if (IsMatchingSCEV) {
1018	IncV = TempIncV;
1019	TruncTy = nullptr;
1020	InvertStep = false;
1021	AddRecPhiMatch = &PN;
1022	break;
1023	}
1024
1025	// Try whether the phi can be translated into the requested form
1026	// (truncated and/or offset by a constant).
1027	if ((!TruncTy \|\| InvertStep) &&
1028	canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
1029	// Record the phi node. But don't stop we might find an exact match
1030	// later.
1031	AddRecPhiMatch = &PN;
1032	IncV = TempIncV;
1033	TruncTy = Normalized->getType();
1034	}
1035	}
1036
1037	if (AddRecPhiMatch) {
1038	// Ok, the add recurrence looks usable.
1039	// Remember this PHI, even in post-inc mode.
1040	InsertedValues.insert(V: AddRecPhiMatch);
1041	// Remember the increment.
1042	rememberInstruction(I: IncV);
1043	// Those values were not actually inserted but re-used.
1044	ReusedValues.insert(Ptr: AddRecPhiMatch);
1045	ReusedValues.insert(Ptr: IncV);
1046	return AddRecPhiMatch;
1047	}
1048	}
1049
1050	// Save the original insertion point so we can restore it when we're done.
1051	SCEVInsertPointGuard Guard(Builder, this);
1052
1053	// Another AddRec may need to be recursively expanded below. For example, if
1054	// this AddRec is quadratic, the StepV may itself be an AddRec in this
1055	// loop. Remove this loop from the PostIncLoops set before expanding such
1056	// AddRecs. Otherwise, we cannot find a valid position for the step
1057	// (i.e. StepV can never dominate its loop header). Ideally, we could do
1058	// SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1059	// so it's not worth implementing SmallPtrSet::swap.
1060	PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1061	PostIncLoops.clear();
1062
1063	// Expand code for the start value into the loop preheader.
1064	assert(L->getLoopPreheader() &&
1065	"Can't expand add recurrences without a loop preheader!");
1066	Value *StartV =
1067	expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1068
1069	// StartV must have been be inserted into L's preheader to dominate the new
1070	// phi.
1071	assert(!isa<Instruction>(StartV) \|\|
1072	SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1073	L->getHeader()));
1074
1075	// Expand code for the step value. Do this before creating the PHI so that PHI
1076	// reuse code doesn't see an incomplete PHI.
1077	const SCEV *Step = Normalized->getStepRecurrence(SE);
1078	Type *ExpandTy = Normalized->getType();
1079	// If the stride is negative, insert a sub instead of an add for the increment
1080	// (unless it's a constant, because subtracts of constants are canonicalized
1081	// to adds).
1082	bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1083	if (useSubtract)
1084	Step = SE.getNegativeSCEV(V: Step);
1085	// Expand the step somewhere that dominates the loop header.
1086	Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1087
1088	// The no-wrap behavior proved by IsIncrement(NUW\|NSW) is only applicable if
1089	// we actually do emit an addition. It does not apply if we emit a
1090	// subtraction.
1091	bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1092	bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1093
1094	// Create the PHI.
1095	BasicBlock *Header = L->getHeader();
1096	Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1097	PHINode *PN =
1098	Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine (IVName) + ".iv");
1099
1100	// Create the step instructions and populate the PHI.
1101	for (BasicBlock *Pred : predecessors(BB: Header)) {
1102	// Add a start value.
1103	if (!L->contains(BB: Pred)) {
1104	PN->addIncoming(V: StartV, BB: Pred);
1105	continue;
1106	}
1107
1108	// Create a step value and add it to the PHI.
1109	// If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1110	// instructions at IVIncInsertPos.
1111	Instruction *InsertPos = L == IVIncInsertLoop ?
1112	IVIncInsertPos : Pred->getTerminator();
1113	Builder.SetInsertPoint(InsertPos);
1114	Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1115
1116	if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1117	if (IncrementIsNUW)
1118	cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1119	if (IncrementIsNSW)
1120	cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1121	}
1122	PN->addIncoming(V: IncV, BB: Pred);
1123	}
1124
1125	// After expanding subexpressions, restore the PostIncLoops set so the caller
1126	// can ensure that IVIncrement dominates the current uses.
1127	PostIncLoops = SavedPostIncLoops;
1128
1129	// Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1130	// effective when we are able to use an IV inserted here, so record it.
1131	InsertedValues.insert(V: PN);
1132	InsertedIVs.push_back(Elt: PN);
1133	return PN;
1134	}
1135
1136	Value SCEVExpander::expandAddRecExprLiterally(const* SCEVAddRecExpr *S) {
1137	const Loop *L = S->getLoop();
1138
1139	// Determine a normalized form of this expression, which is the expression
1140	// before any post-inc adjustment is made.
1141	const SCEVAddRecExpr *Normalized = S;
1142	if (PostIncLoops.count(Ptr: L)) {
1143	PostIncLoopSet Loops;
1144	Loops.insert(Ptr: L);
1145	Normalized = cast<SCEVAddRecExpr>(
1146	Val: normalizeForPostIncUse(S, Loops, SE, /CheckInvertible=/false));
1147	}
1148
1149	[[maybe_unused]] const SCEV *Start = Normalized->getStart();
1150	const SCEV *Step = Normalized->getStepRecurrence(SE);
1151	assert(SE.properlyDominates(Start, L->getHeader()) &&
1152	"Start does not properly dominate loop header");
1153	assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1154
1155	// In some cases, we decide to reuse an existing phi node but need to truncate
1156	// it and/or invert the step.
1157	Type TruncTy = nullptr*;
1158	bool InvertStep = false;
1159	PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1160
1161	// Accommodate post-inc mode, if necessary.
1162	Value *Result;
1163	if (!PostIncLoops.count(Ptr: L))
1164	Result = PN;
1165	else {
1166	// In PostInc mode, use the post-incremented value.
1167	BasicBlock *LatchBlock = L->getLoopLatch();
1168	assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1169	Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1170
1171	// We might be introducing a new use of the post-inc IV that is not poison
1172	// safe, in which case we should drop poison generating flags. Only keep
1173	// those flags for which SCEV has proven that they always hold.
1174	if (isa<OverflowingBinaryOperator>(Val: Result)) {
1175	auto *I = cast<Instruction>(Val: Result);
1176	if (!S->hasNoUnsignedWrap())
1177	I->setHasNoUnsignedWrap(false);
1178	if (!S->hasNoSignedWrap())
1179	I->setHasNoSignedWrap(false);
1180	}
1181
1182	// For an expansion to use the postinc form, the client must call
1183	// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1184	// or dominated by IVIncInsertPos.
1185	if (isa<Instruction>(Val: Result) &&
1186	!SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1187	User: &*Builder.GetInsertPoint())) {
1188	// The induction variable's postinc expansion does not dominate this use.
1189	// IVUsers tries to prevent this case, so it is rare. However, it can
1190	// happen when an IVUser outside the loop is not dominated by the latch
1191	// block. Adjusting IVIncInsertPos before expansion begins cannot handle
1192	// all cases. Consider a phi outside whose operand is replaced during
1193	// expansion with the value of the postinc user. Without fundamentally
1194	// changing the way postinc users are tracked, the only remedy is
1195	// inserting an extra IV increment. StepV might fold into PostLoopOffset,
1196	// but hopefully expandCodeFor handles that.
1197	bool useSubtract =
1198	!S->getType()->isPointerTy() && Step->isNonConstantNegative();
1199	if (useSubtract)
1200	Step = SE.getNegativeSCEV(V: Step);
1201	Value *StepV;
1202	{
1203	// Expand the step somewhere that dominates the loop header.
1204	SCEVInsertPointGuard Guard(Builder, this);
1205	StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1206	}
1207	Result = expandIVInc(PN, StepV, L, useSubtract);
1208	}
1209	}
1210
1211	// We have decided to reuse an induction variable of a dominating loop. Apply
1212	// truncation and/or inversion of the step.
1213	if (TruncTy) {
1214	// Truncate the result.
1215	if (TruncTy != Result->getType())
1216	Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1217
1218	// Invert the result.
1219	if (InvertStep)
1220	Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1221	}
1222
1223	return Result;
1224	}
1225
1226	Value SCEVExpander::visitAddRecExpr(const* SCEVAddRecExpr *S) {
1227	// In canonical mode we compute the addrec as an expression of a canonical IV
1228	// using evaluateAtIteration and expand the resulting SCEV expression. This
1229	// way we avoid introducing new IVs to carry on the computation of the addrec
1230	// throughout the loop.
1231	//
1232	// For nested addrecs evaluateAtIteration might need a canonical IV of a
1233	// type wider than the addrec itself. Emitting a canonical IV of the
1234	// proper type might produce non-legal types, for example expanding an i64
1235	// {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1236	// back to non-canonical mode for nested addrecs.
1237	if (!CanonicalMode \|\| (S->getNumOperands() > `2`))
1238	return expandAddRecExprLiterally(S);
1239
1240	Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1241	const Loop *L = S->getLoop();
1242
1243	// First check for an existing canonical IV in a suitable type.
1244	PHINode CanonicalIV = nullptr*;
1245	if (PHINode *PN = L->getCanonicalInductionVariable())
1246	if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1247	CanonicalIV = PN;
1248
1249	// Rewrite an AddRec in terms of the canonical induction variable, if
1250	// its type is more narrow.
1251	if (CanonicalIV &&
1252	SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1253	!S->getType()->isPointerTy()) {
1254	SmallVector<const SCEV *, `4`> NewOps(S->getNumOperands());
1255	for (unsigned i = `0`, e = S->getNumOperands(); i != e; ++i)
1256	NewOps [i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1257	Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1258	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1259	BasicBlock::iterator NewInsertPt =
1260	findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1261	V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1262	return V;
1263	}
1264
1265	// {X,+,F} --> X + {0,+,F}
1266	if (!S->getStart()->isZero()) {
1267	if (isa<PointerType>(Val: S->getType())) {
1268	Value *StartV = expand(S: SE.getPointerBase(V: S));
1269	return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV,
1270	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNUW));
1271	}
1272
1273	SmallVector<const SCEV *, `4`> NewOps(S->operands());
1274	NewOps [`0`] = SE.getConstant(Ty, V: `0`);
1275	const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1276	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1277
1278	// Just do a normal add. Pre-expand the operands to suppress folding.
1279	//
1280	// The LHS and RHS values are factored out of the expand call to make the
1281	// output independent of the argument evaluation order.
1282	const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1283	const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1284	return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1285	}
1286
1287	// If we don't yet have a canonical IV, create one.
1288	if (!CanonicalIV) {
1289	// Create and insert the PHI node for the induction variable in the
1290	// specified loop.
1291	BasicBlock *Header = L->getHeader();
1292	pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1293	CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1294	CanonicalIV->insertBefore(InsertPos: Header->begin());
1295	rememberInstruction(I: CanonicalIV);
1296
1297	SmallSet<BasicBlock *, `4`> PredSeen;
1298	Constant *One = ConstantInt::get(Ty, V: `1`);
1299	for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1300	BasicBlock HP = HPI;
1301	if (!PredSeen.insert(Ptr: HP).second) {
1302	// There must be an incoming value for each predecessor, even the
1303	// duplicates!
1304	CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1305	continue;
1306	}
1307
1308	if (L->contains(BB: HP)) {
1309	// Insert a unit add instruction right before the terminator
1310	// corresponding to the back-edge.
1311	Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1312	Name: "indvar.next",
1313	InsertBefore: HP->getTerminator()->getIterator());
1314	Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1315	rememberInstruction(I: Add);
1316	CanonicalIV->addIncoming(V: Add, BB: HP);
1317	} else {
1318	CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1319	}
1320	}
1321	}
1322
1323	// {0,+,1} --> Insert a canonical induction variable into the loop!
1324	if (S->isAffine() && S->getOperand(i: `1`)->isOne()) {
1325	assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1326	"IVs with types different from the canonical IV should "
1327	"already have been handled!");
1328	return CanonicalIV;
1329	}
1330
1331	// {0,+,F} --> {0,+,1} F*
1332
1333	// If this is a simple linear addrec, emit it now as a special case.
1334	if (S->isAffine()) // {0,+,F} --> iF*
1335	return
1336	expand(S: SE.getTruncateOrNoop(
1337	V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1338	RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: `1`),
1339	Ty: CanonicalIV->getType())),
1340	Ty));
1341
1342	// If this is a chain of recurrences, turn it into a closed form, using the
1343	// folders, then expandCodeFor the closed form. This allows the folders to
1344	// simplify the expression without having to build a bunch of special code
1345	// into this folder.
1346	const SCEV IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.*
1347
1348	// Promote S up to the canonical IV type, if the cast is foldable.
1349	const SCEV *NewS = S;
1350	const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1351	if (isa<SCEVAddRecExpr>(Val: Ext))
1352	NewS = Ext;
1353
1354	const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1355
1356	// Truncate the result down to the original type, if needed.
1357	const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1358	return expand(S: T);
1359	}
1360
1361	Value SCEVExpander::visitPtrToIntExpr(const* SCEVPtrToIntExpr *S) {
1362	Value *V = expand(S: S->getOperand());
1363	return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1364	IP: GetOptimalInsertionPointForCastOf(V));
1365	}
1366
1367	Value SCEVExpander::visitTruncateExpr(const* SCEVTruncateExpr *S) {
1368	Value *V = expand(S: S->getOperand());
1369	return Builder.CreateTrunc(V, DestTy: S->getType());
1370	}
1371
1372	Value SCEVExpander::visitZeroExtendExpr(const* SCEVZeroExtendExpr *S) {
1373	Value *V = expand(S: S->getOperand());
1374	return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1375	IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1376	}
1377
1378	Value SCEVExpander::visitSignExtendExpr(const* SCEVSignExtendExpr *S) {
1379	Value *V = expand(S: S->getOperand());
1380	return Builder.CreateSExt(V, DestTy: S->getType());
1381	}
1382
1383	Value SCEVExpander::expandMinMaxExpr(const* SCEVNAryExpr *S,
1384	Intrinsic::ID IntrinID, Twine Name,
1385	bool IsSequential) {
1386	bool PrevSafeMode = SafeUDivMode;
1387	SafeUDivMode \|= IsSequential;
1388	Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - `1`));
1389	Type *Ty = LHS->getType();
1390	if (IsSequential)
1391	LHS = Builder.CreateFreeze(V: LHS);
1392	for (int i = S->getNumOperands() - `2`; i >= `0`; --i) {
1393	SafeUDivMode = (IsSequential && i != `0`) \|\| PrevSafeMode;
1394	Value *RHS = expand(S: S->getOperand(i));
1395	if (IsSequential && i != `0`)
1396	RHS = Builder.CreateFreeze(V: RHS);
1397	Value *Sel;
1398	if (Ty->isIntegerTy())
1399	Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1400	/FMFSource=/nullptr, Name);
1401	else {
1402	Value *ICmp =
1403	Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1404	Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1405	}
1406	LHS = Sel;
1407	}
1408	SafeUDivMode = PrevSafeMode;
1409	return LHS;
1410	}
1411
1412	Value SCEVExpander::visitSMaxExpr(const* SCEVSMaxExpr *S) {
1413	return expandMinMaxExpr(S, IntrinID: Intrinsic::smax, Name: "smax");
1414	}
1415
1416	Value SCEVExpander::visitUMaxExpr(const* SCEVUMaxExpr *S) {
1417	return expandMinMaxExpr(S, IntrinID: Intrinsic::umax, Name: "umax");
1418	}
1419
1420	Value SCEVExpander::visitSMinExpr(const* SCEVSMinExpr *S) {
1421	return expandMinMaxExpr(S, IntrinID: Intrinsic::smin, Name: "smin");
1422	}
1423
1424	Value SCEVExpander::visitUMinExpr(const* SCEVUMinExpr *S) {
1425	return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin");
1426	}
1427
1428	Value SCEVExpander::visitSequentialUMinExpr(const* SCEVSequentialUMinExpr *S) {
1429	return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin", /IsSequential/true);
1430	}
1431
1432	Value SCEVExpander::visitVScale(const* SCEVVScale *S) {
1433	return Builder.CreateVScale(Ty: S->getType());
1434	}
1435
1436	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty,
1437	BasicBlock::iterator IP) {
1438	setInsertPoint(IP);
1439	Value *V = expandCodeFor(SH, Ty);
1440	return V;
1441	}
1442
1443	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty) {
1444	// Expand the code for this SCEV.
1445	Value *V = expand(S: SH);
1446
1447	if (Ty && Ty != V->getType()) {
1448	assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1449	"non-trivial casts should be done with the SCEVs directly!");
1450	V = InsertNoopCastOfTo(V, Ty);
1451	}
1452	return V;
1453	}
1454
1455	Value *SCEVExpander::FindValueInExprValueMap(
1456	const SCEV S, const* Instruction *InsertPt,
1457	SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1458	// If the expansion is not in CanonicalMode, and the SCEV contains any
1459	// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1460	if (!CanonicalMode && SE.containsAddRecurrence(S))
1461	return nullptr;
1462
1463	// If S is a constant or unknown, it may be worse to reuse an existing Value.
1464	if (isa<SCEVConstant>(Val: S) \|\| isa<SCEVUnknown>(Val: S))
1465	return nullptr;
1466
1467	for (Value *V : SE.getSCEVValues(S)) {
1468	Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1469	if (!EntInst)
1470	continue;
1471
1472	// Choose a Value from the set which dominates the InsertPt.
1473	// InsertPt should be inside the Value's parent loop so as not to break
1474	// the LCSSA form.
1475	assert(EntInst->getFunction() == InsertPt->getFunction());
1476	if (S->getType() != V->getType() \|\| !SE.DT.dominates(Def: EntInst, User: InsertPt) \|\|
1477	!(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr \|\|
1478	SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1479	continue;
1480
1481	// Make sure reusing the instruction is poison-safe.
1482	if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1483	return V;
1484	DropPoisonGeneratingInsts.clear();
1485	}
1486	return nullptr;
1487	}
1488
1489	// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1490	// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1491	// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1492	// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1493	// the expansion will try to reuse Value from ExprValueMap, and only when it
1494	// fails, expand the SCEV literally.
1495	Value SCEVExpander::expand(const* SCEV *S) {
1496	// Compute an insertion point for this SCEV object. Hoist the instructions
1497	// as far out in the loop nest as possible.
1498	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1499
1500	// We can move insertion point only if there is no div or rem operations
1501	// otherwise we are risky to move it over the check for zero denominator.
1502	auto SafeToHoist = [](const SCEV *S) {
1503	return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1504	if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1505	if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1506	// Division by non-zero constants can be hoisted.
1507	return SC->getValue()->isZero();
1508	// All other divisions should not be moved as they may be
1509	// divisions by zero and should be kept within the
1510	// conditions of the surrounding loops that guard their
1511	// execution (see PR35406).
1512	return true;
1513	}
1514	return false;
1515	});
1516	};
1517	if (SafeToHoist (S)) {
1518	for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1519	L = L->getParentLoop()) {
1520	if (SE.isLoopInvariant(S, L)) {
1521	if (!L) break;
1522	if (BasicBlock *Preheader = L->getLoopPreheader()) {
1523	InsertPt = Preheader->getTerminator()->getIterator();
1524	} else {
1525	// LSR sets the insertion point for AddRec start/step values to the
1526	// block start to simplify value reuse, even though it's an invalid
1527	// position. SCEVExpander must correct for this in all cases.
1528	InsertPt = L->getHeader()->getFirstInsertionPt();
1529	}
1530	} else {
1531	// If the SCEV is computable at this level, insert it into the header
1532	// after the PHIs (and after any other instructions that we've inserted
1533	// there) so that it is guaranteed to dominate any user inside the loop.
1534	if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1535	InsertPt = L->getHeader()->getFirstInsertionPt();
1536
1537	while (InsertPt != Builder.GetInsertPoint() &&
1538	(isInsertedInstruction(I: &*InsertPt))) {
1539	InsertPt = std::next(x: InsertPt);
1540	}
1541	break;
1542	}
1543	}
1544	}
1545
1546	// Check to see if we already expanded this here.
1547	auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1548	if (I != InsertedExpressions.end())
1549	return I ->second;
1550
1551	SCEVInsertPointGuard Guard(Builder, this);
1552	Builder.SetInsertPoint(TheBB: InsertPt ->getParent(), IP: InsertPt);
1553
1554	// Expand the expression into instructions.
1555	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1556	Value V = FindValueInExprValueMap(S, InsertPt: &InsertPt, DropPoisonGeneratingInsts);
1557	if (!V) {
1558	V = visit(S);
1559	V = fixupLCSSAFormFor(V);
1560	} else {
1561	for (Instruction *I : DropPoisonGeneratingInsts) {
1562	rememberFlags(I);
1563	I->dropPoisonGeneratingAnnotations();
1564	// See if we can re-infer from first principles any of the flags we just
1565	// dropped.
1566	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1567	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1568	auto *BO = cast<BinaryOperator>(Val: I);
1569	BO->setHasNoUnsignedWrap(
1570	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1571	BO->setHasNoSignedWrap(
1572	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1573	}
1574	if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1575	auto *Src = NNI->getOperand(i_nocapture: `0`);
1576	if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1577	RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1578	DL).value_or(u: false))
1579	NNI->setNonNeg(true);
1580	}
1581	}
1582	}
1583	// Remember the expanded value for this SCEV at this location.
1584	//
1585	// This is independent of PostIncLoops. The mapped value simply materializes
1586	// the expression at this insertion point. If the mapped value happened to be
1587	// a postinc expansion, it could be reused by a non-postinc user, but only if
1588	// its insertion point was already at the head of the loop.
1589	InsertedExpressions [std::make_pair(x&: S, y: &*InsertPt)] = V;
1590	return V;
1591	}
1592
1593	void SCEVExpander::rememberInstruction(Value *I) {
1594	auto DoInsert = [this](Value *V) {
1595	if (!PostIncLoops.empty())
1596	InsertedPostIncValues.insert(V);
1597	else
1598	InsertedValues.insert(V);
1599	};
1600	DoInsert (I);
1601	}
1602
1603	void SCEVExpander::rememberFlags(Instruction *I) {
1604	// If we already have flags for the instruction, keep the existing ones.
1605	OrigFlags.try_emplace(Key: I, Args: PoisonFlags (I));
1606	}
1607
1608	void SCEVExpander::replaceCongruentIVInc(
1609	PHINode &Phi, PHINode &OrigPhi, Loop L, const* DominatorTree *DT,
1610	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1611	BasicBlock *LatchBlock = L->getLoopLatch();
1612	if (!LatchBlock)
1613	return;
1614
1615	Instruction *OrigInc =
1616	dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1617	Instruction *IsomorphicInc =
1618	dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1619	if (!OrigInc \|\| !IsomorphicInc)
1620	return;
1621
1622	// If this phi has the same width but is more canonical, replace the
1623	// original with it. As part of the "more canonical" determination,
1624	// respect a prior decision to use an IV chain.
1625	if (OrigPhi->getType() == Phi->getType()) {
1626	bool Chained = ChainedPhis.contains(V: Phi);
1627	if (!(Chained \|\| isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1628	(Chained \|\| isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1629	std::swap(a&: OrigPhi, b&: Phi);
1630	std::swap(a&: OrigInc, b&: IsomorphicInc);
1631	}
1632	}
1633
1634	// Replacing the congruent phi is sufficient because acyclic
1635	// redundancy elimination, CSE/GVN, should handle the
1636	// rest. However, once SCEV proves that a phi is congruent,
1637	// it's often the head of an IV user cycle that is isomorphic
1638	// with the original phi. It's worth eagerly cleaning up the
1639	// common case of a single IV increment so that DeleteDeadPHIs
1640	// can remove cycles that had postinc uses.
1641	// Because we may potentially introduce a new use of OrigIV that didn't
1642	// exist before at this point, its poison flags need readjustment.
1643	const SCEV *TruncExpr =
1644	SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1645	if (OrigInc == IsomorphicInc \|\| TruncExpr != SE.getSCEV(V: IsomorphicInc) \|\|
1646	!SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1647	return;
1648
1649	bool BothHaveNUW = false;
1650	bool BothHaveNSW = false;
1651	auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1652	auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1653	if (OBOIncV && OBOIsomorphic) {
1654	BothHaveNUW =
1655	OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1656	BothHaveNSW =
1657	OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1658	}
1659
1660	if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1661	/RecomputePoisonFlags/ true))
1662	return;
1663
1664	// We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1665	// are NUW/NSW, then we can preserve them on the wider increment; the narrower
1666	// IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1667	// make IsomorphicInc's uses more poisonous.
1668	assert(OrigInc->getType()->getScalarSizeInBits() >=
1669	IsomorphicInc->getType()->getScalarSizeInBits() &&
1670	"Should only replace an increment with a wider one.");
1671	if (BothHaveNUW \|\| BothHaveNSW) {
1672	OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() \|\| BothHaveNUW);
1673	OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() \|\| BothHaveNSW);
1674	}
1675
1676	SCEV_DEBUG_WITH_TYPE(DebugType,
1677	dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1678	<< *IsomorphicInc << `'\n'`);
1679	Value *NewInc = OrigInc;
1680	if (OrigInc->getType() != IsomorphicInc->getType()) {
1681	BasicBlock::iterator IP;
1682	if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1683	IP = PN->getParent()->getFirstInsertionPt();
1684	else
1685	IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1686
1687	IRBuilder<> Builder(IP ->getParent(), IP);
1688	Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1689	NewInc =
1690	Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1691	}
1692	IsomorphicInc->replaceAllUsesWith(V: NewInc);
1693	DeadInsts.emplace_back(Args&: IsomorphicInc);
1694	}
1695
1696	/// replaceCongruentIVs - Check for congruent phis in this loop header and
1697	/// replace them with their most canonical representative. Return the number of
1698	/// phis eliminated.
1699	///
1700	/// This does not depend on any SCEVExpander state but should be used in
1701	/// the same context that SCEVExpander is used.
1702	unsigned
1703	SCEVExpander::replaceCongruentIVs(Loop L, const* DominatorTree *DT,
1704	SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1705	const TargetTransformInfo *TTI) {
1706	// Find integer phis in order of increasing width.
1707	SmallVector<PHINode *, `8`> Phis(
1708	llvm::make_pointer_range(Range: L->getHeader()->phis()));
1709
1710	if (TTI)
1711	// Use stable_sort to preserve order of equivalent PHIs, so the order
1712	// of the sorted Phis is the same from run to run on the same loop.
1713	llvm::stable_sort(Range&: Phis, C: [](Value LHS, Value RHS) {
1714	// Put pointers at the back and make sure pointer < pointer = false.
1715	if (!LHS->getType()->isIntegerTy() \|\| !RHS->getType()->isIntegerTy())
1716	return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1717	return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1718	LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1719	});
1720
1721	unsigned NumElim = `0`;
1722	DenseMap<const SCEV , PHINode > ExprToIVMap;
1723	// Process phis from wide to narrow. Map wide phis to their truncation
1724	// so narrow phis can reuse them.
1725	for (PHINode *Phi : Phis) {
1726	auto SimplifyPHINode = [&](PHINode PN) -> Value {
1727	if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1728	return V;
1729	if (!SE.isSCEVable(Ty: PN->getType()))
1730	return nullptr;
1731	auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1732	if (!Const)
1733	return nullptr;
1734	return Const->getValue();
1735	};
1736
1737	// Fold constant phis. They may be congruent to other constant phis and
1738	// would confuse the logic below that expects proper IVs.
1739	if (Value *V = SimplifyPHINode (Phi)) {
1740	if (V->getType() != Phi->getType())
1741	continue;
1742	SE.forgetValue(V: Phi);
1743	Phi->replaceAllUsesWith(V);
1744	DeadInsts.emplace_back(Args&: Phi);
1745	++NumElim;
1746	SCEV_DEBUG_WITH_TYPE(DebugType,
1747	dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1748	<< `'\n'`);
1749	continue;
1750	}
1751
1752	if (!SE.isSCEVable(Ty: Phi->getType()))
1753	continue;
1754
1755	PHINode *&OrigPhiRef = ExprToIVMap [SE.getSCEV(V: Phi)];
1756	if (!OrigPhiRef) {
1757	OrigPhiRef = Phi;
1758	if (Phi->getType()->isIntegerTy() && TTI &&
1759	TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1760	// Make sure we only rewrite using simple induction variables;
1761	// otherwise, we can make the trip count of a loop unanalyzable
1762	// to SCEV.
1763	const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1764	if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1765	// This phi can be freely truncated to the narrowest phi type. Map the
1766	// truncated expression to it so it will be reused for narrow types.
1767	const SCEV *TruncExpr =
1768	SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1769	ExprToIVMap [TruncExpr] = Phi;
1770	}
1771	}
1772	continue;
1773	}
1774
1775	// Replacing a pointer phi with an integer phi or vice-versa doesn't make
1776	// sense.
1777	if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1778	continue;
1779
1780	replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1781	SCEV_DEBUG_WITH_TYPE(DebugType,
1782	dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1783	<< `'\n'`);
1784	SCEV_DEBUG_WITH_TYPE(
1785	DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << `'\n'`);
1786	++NumElim;
1787	Value *NewIV = OrigPhiRef;
1788	if (OrigPhiRef->getType() != Phi->getType()) {
1789	IRBuilder<> Builder(L->getHeader(),
1790	L->getHeader()->getFirstInsertionPt());
1791	Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1792	NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1793	}
1794	Phi->replaceAllUsesWith(V: NewIV);
1795	DeadInsts.emplace_back(Args&: Phi);
1796	}
1797	return NumElim;
1798	}
1799
1800	bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1801	const Instruction *At,
1802	Loop *L) {
1803	using namespace llvm::PatternMatch;
1804
1805	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1806	L->getExitingBlocks(ExitingBlocks);
1807
1808	// Look for suitable value in simple conditions at the loop exits.
1809	for (BasicBlock *BB : ExitingBlocks) {
1810	CmpPredicate Pred;
1811	Instruction LHS, RHS;
1812
1813	if (!match(V: BB->getTerminator(),
1814	P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1815	T: m_BasicBlock(), F: m_BasicBlock())))
1816	continue;
1817
1818	if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1819	return true;
1820
1821	if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1822	return true;
1823	}
1824
1825	// Use expand's logic which is used for reusing a previous Value in
1826	// ExprValueMap. Note that we don't currently model the cost of
1827	// needing to drop poison generating flags on the instruction if we
1828	// want to reuse it. We effectively assume that has zero cost.
1829	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1830	return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1831	}
1832
1833	template<typename T> static InstructionCost costAndCollectOperands(
1834	const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1835	TargetTransformInfo::TargetCostKind CostKind,
1836	SmallVectorImpl<SCEVOperand> &Worklist) {
1837
1838	const T *S = cast<T>(WorkItem.S);
1839	InstructionCost Cost = `0`;
1840	// Object to help map SCEV operands to expanded IR instructions.
1841	struct OperationIndices {
1842	OperationIndices(unsigned Opc, size_t min, size_t max) :
1843	Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1844	unsigned Opcode;
1845	size_t MinIdx;
1846	size_t MaxIdx;
1847	};
1848
1849	// Collect the operations of all the instructions that will be needed to
1850	// expand the SCEVExpr. This is so that when we come to cost the operands,
1851	// we know what the generated user(s) will be.
1852	SmallVector<OperationIndices, `2`> Operations;
1853
1854	auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1855	Operations.emplace_back(Opcode, `0`, `0`);
1856	return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1857	Src: S->getOperand(`0`)->getType(),
1858	CCH: TTI::CastContextHint::None, CostKind);
1859	};
1860
1861	auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1862	unsigned MinIdx = `0`,
1863	unsigned MaxIdx = `1`) -> InstructionCost {
1864	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1865	return NumRequired *
1866	TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1867	};
1868
1869	auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1870	unsigned MaxIdx) -> InstructionCost {
1871	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1872	Type *OpType = S->getType();
1873	return NumRequired * TTI.getCmpSelInstrCost(
1874	Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1875	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1876	};
1877
1878	switch (S->getSCEVType()) {
1879	case scCouldNotCompute:
1880	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1881	case scUnknown:
1882	case scConstant:
1883	case scVScale:
1884	return `0`;
1885	case scPtrToInt:
1886	Cost = CastCost(Instruction::PtrToInt);
1887	break;
1888	case scTruncate:
1889	Cost = CastCost(Instruction::Trunc);
1890	break;
1891	case scZeroExtend:
1892	Cost = CastCost(Instruction::ZExt);
1893	break;
1894	case scSignExtend:
1895	Cost = CastCost(Instruction::SExt);
1896	break;
1897	case scUDivExpr: {
1898	unsigned Opcode = Instruction::UDiv;
1899	if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(`1`)))
1900	if (SC->getAPInt().isPowerOf2())
1901	Opcode = Instruction::LShr;
1902	Cost = ArithCost(Opcode, `1`);
1903	break;
1904	}
1905	case scAddExpr:
1906	Cost = ArithCost(Instruction::Add, S->getNumOperands() - `1`);
1907	break;
1908	case scMulExpr:
1909	// TODO: this is a very pessimistic cost modelling for Mul,
1910	// because of Bin Pow algorithm actually used by the expander,
1911	// see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1912	Cost = ArithCost(Instruction::Mul, S->getNumOperands() - `1`);
1913	break;
1914	case scSMaxExpr:
1915	case scUMaxExpr:
1916	case scSMinExpr:
1917	case scUMinExpr:
1918	case scSequentialUMinExpr: {
1919	// FIXME: should this ask the cost for Intrinsic's?
1920	// The reduction tree.
1921	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `1`);
1922	Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - `1`, `0`, `2`);
1923	switch (S->getSCEVType()) {
1924	case scSequentialUMinExpr: {
1925	// The safety net against poison.
1926	// FIXME: this is broken.
1927	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `0`);
1928	Cost += ArithCost(Instruction::Or,
1929	S->getNumOperands() > `2` ? S->getNumOperands() - `2` : `0`);
1930	Cost += CmpSelCost(Instruction::Select, `1`, `0`, `1`);
1931	break;
1932	}
1933	default:
1934	assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1935	"Unhandled SCEV expression type?");
1936	break;
1937	}
1938	break;
1939	}
1940	case scAddRecExpr: {
1941	// Addrec expands to a phi and add per recurrence.
1942	unsigned NumRecurrences = S->getNumOperands() - `1`;
1943	Cost += TTI.getCFInstrCost(Opcode: Instruction::PHI, CostKind) * NumRecurrences;
1944	Cost +=
1945	TTI.getArithmeticInstrCost(Opcode: Instruction::Add, Ty: S->getType(), CostKind) *
1946	NumRecurrences;
1947	// AR start is used in phi.
1948	Worklist.emplace_back(Instruction::PHI, `0`, S->getOperand(`0`));
1949	// Other operands are used in add.
1950	for (const SCEV *Op : S->operands().drop_front())
1951	Worklist.emplace_back(Args: Instruction::Add, Args: `1`, Args&: Op);
1952	break;
1953	}
1954	}
1955
1956	for (auto &CostOp : Operations) {
1957	for (auto SCEVOp : enumerate(S->operands())) {
1958	// Clamp the index to account for multiple IR operations being chained.
1959	size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1960	size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1961	Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1962	}
1963	}
1964	return Cost;
1965	}
1966
1967	bool SCEVExpander::isHighCostExpansionHelper(
1968	const SCEVOperand &WorkItem, Loop L, const* Instruction &At,
1969	InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1970	SmallPtrSetImpl<const SCEV *> &Processed,
1971	SmallVectorImpl<SCEVOperand> &Worklist) {
1972	if (Cost > Budget)
1973	return true; // Already run out of budget, give up.
1974
1975	const SCEV *S = WorkItem.S;
1976	// Was the cost of expansion of this expression already accounted for?
1977	if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
1978	return false; // We have already accounted for this expression.
1979
1980	// If we can find an existing value for this scev available at the point "At"
1981	// then consider the expression cheap.
1982	if (hasRelatedExistingExpansion(S, At: &At, L))
1983	return false; // Consider the expression to be free.
1984
1985	TargetTransformInfo::TargetCostKind CostKind =
1986	L->getHeader()->getParent()->hasMinSize()
1987	? TargetTransformInfo::TCK_CodeSize
1988	: TargetTransformInfo::TCK_RecipThroughput;
1989
1990	switch (S->getSCEVType()) {
1991	case scCouldNotCompute:
1992	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1993	case scUnknown:
1994	case scVScale:
1995	// Assume to be zero-cost.
1996	return false;
1997	case scConstant: {
1998	// Only evalulate the costs of constants when optimizing for size.
1999	if (CostKind != TargetTransformInfo::TCK_CodeSize)
2000	return false;
2001	const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
2002	Type *Ty = S->getType();
2003	Cost += TTI.getIntImmCostInst(
2004	Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
2005	return Cost > Budget;
2006	}
2007	case scTruncate:
2008	case scPtrToInt:
2009	case scZeroExtend:
2010	case scSignExtend: {
2011	Cost +=
2012	costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2013	return false; // Will answer upon next entry into this function.
2014	}
2015	case scUDivExpr: {
2016	// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2017	// HowManyLessThans produced to compute a precise expression, rather than a
2018	// UDiv from the user's code. If we can't find a UDiv in the code with some
2019	// simple searching, we need to account for it's cost.
2020
2021	// At the beginning of this function we already tried to find existing
2022	// value for plain 'S'. Now try to lookup 'S + 1' since it is common
2023	// pattern involving division. This is just a simple search heuristic.
2024	if (hasRelatedExistingExpansion(
2025	S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: `1`)), At: &At, L))
2026	return false; // Consider it to be free.
2027
2028	Cost +=
2029	costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2030	return false; // Will answer upon next entry into this function.
2031	}
2032	case scAddExpr:
2033	case scMulExpr:
2034	case scUMaxExpr:
2035	case scSMaxExpr:
2036	case scUMinExpr:
2037	case scSMinExpr:
2038	case scSequentialUMinExpr: {
2039	assert(cast<SCEVNAryExpr>(S)->getNumOperands() > `1` &&
2040	"Nary expr should have more than 1 operand.");
2041	// The simple nary expr will require one less op (or pair of ops)
2042	// than the number of it's terms.
2043	Cost +=
2044	costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2045	return Cost > Budget;
2046	}
2047	case scAddRecExpr: {
2048	assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= `2` &&
2049	"Polynomial should be at least linear");
2050	Cost += costAndCollectOperands<SCEVAddRecExpr>(
2051	WorkItem, TTI, CostKind, Worklist);
2052	return Cost > Budget;
2053	}
2054	}
2055	llvm_unreachable("Unknown SCEV kind!");
2056	}
2057
2058	Value SCEVExpander::expandCodeForPredicate(const* SCEVPredicate *Pred,
2059	Instruction *IP) {
2060	assert(IP);
2061	switch (Pred->getKind()) {
2062	case SCEVPredicate::P_Union:
2063	return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2064	case SCEVPredicate::P_Compare:
2065	return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2066	case SCEVPredicate::P_Wrap: {
2067	auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2068	return expandWrapPredicate(P: AddRecPred, Loc: IP);
2069	}
2070	}
2071	llvm_unreachable("Unknown SCEV predicate type");
2072	}
2073
2074	Value SCEVExpander::expandComparePredicate(const* SCEVComparePredicate *Pred,
2075	Instruction *IP) {
2076	Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2077	Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2078
2079	Builder.SetInsertPoint(IP);
2080	auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2081	auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2082	return I;
2083	}
2084
2085	Value SCEVExpander::generateOverflowCheck(const* SCEVAddRecExpr *AR,
2086	Instruction Loc, bool* Signed) {
2087	assert(AR->isAffine() && "Cannot generate RT check for "
2088	"non-affine expression");
2089
2090	// FIXME: It is highly suspicious that we're ignoring the predicates here.
2091	SmallVector<const SCEVPredicate *, `4`> Pred;
2092	const SCEV *ExitCount =
2093	SE.getPredicatedSymbolicMaxBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2094
2095	assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2096
2097	const SCEV *Step = AR->getStepRecurrence(SE);
2098	const SCEV *Start = AR->getStart();
2099
2100	Type *ARTy = AR->getType();
2101	unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2102	unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2103
2104	// The expression {Start,+,Step} has nusw/nssw if
2105	// Step < 0, Start - \|Step\| Backedge <= Start*
2106	// Step >= 0, Start + \|Step\| Backedge > Start*
2107	// and \|Step\| Backedge doesn't unsigned overflow.*
2108
2109	Builder.SetInsertPoint(Loc);
2110	Value *TripCountVal = expand(S: ExitCount, I: Loc);
2111
2112	IntegerType *Ty =
2113	IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2114
2115	Value *StepValue = expand(S: Step, I: Loc);
2116	Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2117	Value *StartValue = expand(S: Start, I: Loc);
2118
2119	ConstantInt *Zero =
2120	ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2121
2122	Builder.SetInsertPoint(Loc);
2123	// Compute \|Step\|
2124	Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2125	Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2126
2127	// Compute \|Step\| Backedge*
2128	// Compute:
2129	// 1. Start + \|Step\| Backedge < Start*
2130	// 2. Start - \|Step\| Backedge > Start*
2131	//
2132	// And select either 1. or 2. depending on whether step is positive or
2133	// negative. If Step is known to be positive or negative, only create
2134	// either 1. or 2.
2135	auto ComputeEndCheck = [&]() -> Value * {
2136	// Checking <u 0 is always false.
2137	if (!Signed && Start->isZero() && SE.isKnownPositive(S: Step))
2138	return ConstantInt::getFalse(Context&: Loc->getContext());
2139
2140	// Get the backedge taken count and truncate or extended to the AR type.
2141	Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2142
2143	Value MulV, OfMul;
2144	if (Step->isOne()) {
2145	// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2146	// needed, there is never an overflow, so to avoid artificially inflating
2147	// the cost of the check, directly emit the optimized IR.
2148	MulV = TruncTripCount;
2149	OfMul = ConstantInt::getFalse(Context&: MulV->getContext());
2150	} else {
2151	CallInst *Mul = Builder.CreateIntrinsic(ID: Intrinsic::umul_with_overflow, Types: Ty,
2152	Args: {AbsStep, TruncTripCount},
2153	/FMFSource=/nullptr, Name: "mul");
2154	MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: `0`, Name: "mul.result");
2155	OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: `1`, Name: "mul.overflow");
2156	}
2157
2158	Value Add = nullptr, Sub = nullptr;
2159	bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2160	bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2161
2162	if (isa<PointerType>(Val: ARTy)) {
2163	Value *NegMulV = Builder.CreateNeg(V: MulV);
2164	if (NeedPosCheck)
2165	Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2166	if (NeedNegCheck)
2167	Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2168	} else {
2169	if (NeedPosCheck)
2170	Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2171	if (NeedNegCheck)
2172	Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2173	}
2174
2175	Value EndCompareLT = nullptr*;
2176	Value EndCompareGT = nullptr*;
2177	Value EndCheck = nullptr*;
2178	if (NeedPosCheck)
2179	EndCheck = EndCompareLT = Builder.CreateICmp(
2180	P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2181	if (NeedNegCheck)
2182	EndCheck = EndCompareGT = Builder.CreateICmp(
2183	P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2184	if (NeedPosCheck && NeedNegCheck) {
2185	// Select the answer based on the sign of Step.
2186	EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2187	}
2188	return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2189	};
2190	Value *EndCheck = ComputeEndCheck ();
2191
2192	// If the backedge taken count type is larger than the AR type,
2193	// check that we don't drop any bits by truncating it. If we are
2194	// dropping bits, then we have overflow (unless the step is zero).
2195	if (SrcBits > DstBits) {
2196	auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2197	auto *BackedgeCheck =
2198	Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2199	RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2200	BackedgeCheck = Builder.CreateAnd(
2201	LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2202
2203	EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2204	}
2205
2206	return EndCheck;
2207	}
2208
2209	Value SCEVExpander::expandWrapPredicate(const* SCEVWrapPredicate *Pred,
2210	Instruction *IP) {
2211	const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2212	Value NSSWCheck = nullptr, NUSWCheck = nullptr;
2213
2214	// Add a check for NUSW
2215	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2216	NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2217
2218	// Add a check for NSSW
2219	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2220	NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2221
2222	if (NUSWCheck && NSSWCheck)
2223	return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2224
2225	if (NUSWCheck)
2226	return NUSWCheck;
2227
2228	if (NSSWCheck)
2229	return NSSWCheck;
2230
2231	return ConstantInt::getFalse(Context&: IP->getContext());
2232	}
2233
2234	Value SCEVExpander::expandUnionPredicate(const* SCEVUnionPredicate *Union,
2235	Instruction *IP) {
2236	// Loop over all checks in this set.
2237	SmallVector<Value *> Checks;
2238	for (const auto *Pred : Union->getPredicates()) {
2239	Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2240	Builder.SetInsertPoint(IP);
2241	}
2242
2243	if (Checks.empty())
2244	return ConstantInt::getFalse(Context&: IP->getContext());
2245	return Builder.CreateOr(Ops: Checks);
2246	}
2247
2248	Value SCEVExpander::fixupLCSSAFormFor(Value V) {
2249	auto *DefI = dyn_cast<Instruction>(Val: V);
2250	if (!PreserveLCSSA \|\| !DefI)
2251	return V;
2252
2253	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2254	Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2255	Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt ->getParent());
2256	if (!DefLoop \|\| UseLoop == DefLoop \|\| DefLoop->contains(L: UseLoop))
2257	return V;
2258
2259	// Create a temporary instruction to at the current insertion point, so we
2260	// can hand it off to the helper to create LCSSA PHIs if required for the
2261	// new use.
2262	// FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2263	// would accept a insertion point and return an LCSSA phi for that
2264	// insertion point, so there is no need to insert & remove the temporary
2265	// instruction.
2266	Type *ToTy;
2267	if (DefI->getType()->isIntegerTy())
2268	ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: `0`);
2269	else
2270	ToTy = Type::getInt32Ty(C&: DefI->getContext());
2271	Instruction *User =
2272	CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2273	auto RemoveUserOnExit =
2274	make_scope_exit(F: [User]() { User->eraseFromParent(); });
2275
2276	SmallVector<Instruction *, `1`> ToUpdate;
2277	ToUpdate.push_back(Elt: DefI);
2278	SmallVector<PHINode *, `16`> PHIsToRemove;
2279	SmallVector<PHINode *, `16`> InsertedPHIs;
2280	formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2281	InsertedPHIs: &InsertedPHIs);
2282	for (PHINode *PN : InsertedPHIs)
2283	rememberInstruction(I: PN);
2284	for (PHINode *PN : PHIsToRemove) {
2285	if (!PN->use_empty())
2286	continue;
2287	InsertedValues.erase(V: PN);
2288	InsertedPostIncValues.erase(V: PN);
2289	PN->eraseFromParent();
2290	}
2291
2292	return User->getOperand(i: `0`);
2293	}
2294
2295	namespace {
2296	// Search for a SCEV subexpression that is not safe to expand. Any expression
2297	// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2298	// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2299	// instruction, but the important thing is that we prove the denominator is
2300	// nonzero before expansion.
2301	//
2302	// IVUsers already checks that IV-derived expressions are safe. So this check is
2303	// only needed when the expression includes some subexpression that is not IV
2304	// derived.
2305	//
2306	// Currently, we only allow division by a value provably non-zero here.
2307	//
2308	// We cannot generally expand recurrences unless the step dominates the loop
2309	// header. The expander handles the special case of affine recurrences by
2310	// scaling the recurrence outside the loop, but this technique isn't generally
2311	// applicable. Expanding a nested recurrence outside a loop requires computing
2312	// binomial coefficients. This could be done, but the recurrence has to be in a
2313	// perfectly reduced form, which can't be guaranteed.
2314	struct SCEVFindUnsafe {
2315	ScalarEvolution &SE;
2316	bool CanonicalMode;
2317	bool IsUnsafe = false;
2318
2319	SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2320	: SE(SE), CanonicalMode(CanonicalMode) {}
2321
2322	bool follow(const SCEV *S) {
2323	if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2324	if (!SE.isKnownNonZero(S: D->getRHS())) {
2325	IsUnsafe = true;
2326	return false;
2327	}
2328	}
2329	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2330	// For non-affine addrecs or in non-canonical mode we need a preheader
2331	// to insert into.
2332	if (!AR->getLoop()->getLoopPreheader() &&
2333	(!CanonicalMode \|\| !AR->isAffine())) {
2334	IsUnsafe = true;
2335	return false;
2336	}
2337	}
2338	return true;
2339	}
2340	bool isDone() const { return IsUnsafe; }
2341	};
2342	} // namespace
2343
2344	bool SCEVExpander::isSafeToExpand(const SCEV S) const* {
2345	SCEVFindUnsafe Search(SE, CanonicalMode);
2346	visitAll(Root: S, Visitor&: Search);
2347	return !Search.IsUnsafe;
2348	}
2349
2350	bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2351	const Instruction InsertionPoint) const* {
2352	if (!isSafeToExpand(S))
2353	return false;
2354	// We have to prove that the expanded site of S dominates InsertionPoint.
2355	// This is easy when not in the same block, but hard when S is an instruction
2356	// to be expanded somewhere inside the same block as our insertion point.
2357	// What we really need here is something analogous to an OrderedBasicBlock,
2358	// but for the moment, we paper over the problem by handling two common and
2359	// cheap to check cases.
2360	if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2361	return true;
2362	if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2363	if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2364	return true;
2365	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2366	if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2367	return true;
2368	}
2369	return false;
2370	}
2371
2372	void SCEVExpanderCleaner::cleanup() {
2373	// Result is used, nothing to remove.
2374	if (ResultUsed)
2375	return;
2376
2377	// Restore original poison flags.
2378	for (auto [I, Flags] : Expander.OrigFlags)
2379	Flags.apply(I);
2380
2381	auto InsertedInstructions = Expander.getAllInsertedInstructions();
2382	#ifndef NDEBUG
2383	SmallPtrSet<Instruction *, `8`> InsertedSet(llvm::from_range,
2384	InsertedInstructions);
2385	(void)InsertedSet;
2386	#endif
2387	// Remove sets with value handles.
2388	Expander.clear();
2389
2390	// Remove all inserted instructions.
2391	for (Instruction *I : reverse(C&: InsertedInstructions)) {
2392	#ifndef NDEBUG
2393	assert(all_of(I->users(),
2394	[&InsertedSet](Value *U) {
2395	return InsertedSet.contains(cast<Instruction>(U));
2396	}) &&
2397	"removed instruction should only be used by instructions inserted "
2398	"during expansion");
2399	#endif
2400	assert(!I->getType()->isVoidTy() &&
2401	"inserted instruction should have non-void types");
2402	I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2403	I->eraseFromParent();
2404	}
2405	}
2406

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp