ScalarEvolutionExpander.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp]

1	//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the implementation of the scalar evolution expander,
10	// which is used to generate the code corresponding to a given scalar evolution
11	// expression.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/SmallSet.h"
19	#include "llvm/Analysis/InstructionSimplify.h"
20	#include "llvm/Analysis/LoopInfo.h"
21	#include "llvm/Analysis/TargetTransformInfo.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/IR/DataLayout.h"
24	#include "llvm/IR/Dominators.h"
25	#include "llvm/IR/IntrinsicInst.h"
26	#include "llvm/IR/PatternMatch.h"
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/raw_ostream.h"
29	#include "llvm/Transforms/Utils/LoopUtils.h"
30
31	#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32	#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33	#else
34	#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35	#endif
36
37	using namespace llvm;
38
39	cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
40	"scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: `4`),
41	cl::desc ("When performing SCEV expansion only if it is cheap to do, this "
42	"controls the budget that is considered cheap (default = 4)"));
43
44	using namespace PatternMatch;
45
46	PoisonFlags::PoisonFlags(const Instruction *I) {
47	NUW = false;
48	NSW = false;
49	Exact = false;
50	Disjoint = false;
51	NNeg = false;
52	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
53	NUW = OBO->hasNoUnsignedWrap();
54	NSW = OBO->hasNoSignedWrap();
55	}
56	if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
57	Exact = PEO->isExact();
58	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
59	Disjoint = PDI->isDisjoint();
60	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
61	NNeg = PNI->hasNonNeg();
62	if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
63	NUW = TI->hasNoUnsignedWrap();
64	NSW = TI->hasNoSignedWrap();
65	}
66	}
67
68	void PoisonFlags::apply(Instruction *I) {
69	if (isa<OverflowingBinaryOperator>(Val: I)) {
70	I->setHasNoUnsignedWrap(NUW);
71	I->setHasNoSignedWrap(NSW);
72	}
73	if (isa<PossiblyExactOperator>(Val: I))
74	I->setIsExact(Exact);
75	if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
76	PDI->setIsDisjoint(Disjoint);
77	if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
78	PNI->setNonNeg(NNeg);
79	if (isa<TruncInst>(Val: I)) {
80	I->setHasNoUnsignedWrap(NUW);
81	I->setHasNoSignedWrap(NSW);
82	}
83	}
84
85	/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
86	/// reusing an existing cast if a suitable one (= dominating IP) exists, or
87	/// creating a new one.
88	Value SCEVExpander::ReuseOrCreateCast(Value V, Type *Ty,
89	Instruction::CastOps Op,
90	BasicBlock::iterator IP) {
91	// This function must be called with the builder having a valid insertion
92	// point. It doesn't need to be the actual IP where the uses of the returned
93	// cast will be added, but it must dominate such IP.
94	// We use this precondition to produce a cast that will dominate all its
95	// uses. In particular, this is crucial for the case where the builder's
96	// insertion point is* the point where we were asked to put the cast.*
97	// Since we don't know the builder's insertion point is actually
98	// where the uses will be added (only that it dominates it), we are
99	// not allowed to move it.
100	BasicBlock::iterator BIP = Builder.GetInsertPoint();
101
102	Value Ret = nullptr*;
103
104	// Check to see if there is already a cast!
105	for (User *U : V->users()) {
106	if (U->getType() != Ty)
107	continue;
108	CastInst *CI = dyn_cast<CastInst>(Val: U);
109	if (!CI \|\| CI->getOpcode() != Op)
110	continue;
111
112	// Found a suitable cast that is at IP or comes before IP. Use it. Note that
113	// the cast must also properly dominate the Builder's insertion point.
114	if (IP ->getParent() == CI->getParent() && &*BIP != CI &&
115	(&IP == CI \|\| CI->comesBefore(Other: &IP))) {
116	Ret = CI;
117	break;
118	}
119	}
120
121	// Create a new cast.
122	if (!Ret) {
123	SCEVInsertPointGuard Guard(Builder, this);
124	Builder.SetInsertPoint(&*IP);
125	Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
126	}
127
128	// We assert at the end of the function since IP might point to an
129	// instruction with different dominance properties than a cast
130	// (an invoke for example) and not dominate BIP (but the cast does).
131	assert(!isa<Instruction>(Ret) \|\|
132	SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
133
134	return Ret;
135	}
136
137	BasicBlock::iterator
138	SCEVExpander::findInsertPointAfter(Instruction *I,
139	Instruction MustDominate) const* {
140	BasicBlock::iterator IP = ++I->getIterator();
141	if (auto *II = dyn_cast<InvokeInst>(Val: I))
142	IP = II->getNormalDest()->begin();
143
144	while (isa<PHINode>(Val: IP))
145	++IP;
146
147	if (isa<FuncletPadInst>(Val: IP) \|\| isa<LandingPadInst>(Val: IP)) {
148	++IP;
149	} else if (isa<CatchSwitchInst>(Val: IP)) {
150	IP = MustDominate->getParent()->getFirstInsertionPt();
151	} else {
152	assert(!IP->isEHPad() && "unexpected eh pad!");
153	}
154
155	// Adjust insert point to be after instructions inserted by the expander, so
156	// we can re-use already inserted instructions. Avoid skipping past the
157	// original \p MustDominate, in case it is an inserted instruction.
158	while (isInsertedInstruction(I: &IP) && &IP != MustDominate)
159	++IP;
160
161	return IP;
162	}
163
164	BasicBlock::iterator
165	SCEVExpander::GetOptimalInsertionPointForCastOf(Value V) const* {
166	// Cast the argument at the beginning of the entry block, after
167	// any bitcasts of other arguments.
168	if (Argument *A = dyn_cast<Argument>(Val: V)) {
169	BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
170	while ((isa<BitCastInst>(Val: IP) &&
171	isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`)) &&
172	cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: `0`) != A) \|\|
173	isa<DbgInfoIntrinsic>(Val: IP))
174	++IP;
175	return IP;
176	}
177
178	// Cast the instruction immediately after the instruction.
179	if (Instruction *I = dyn_cast<Instruction>(Val: V))
180	return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
181
182	// Otherwise, this must be some kind of a constant,
183	// so let's plop this cast into the function's entry block.
184	assert(isa<Constant>(V) &&
185	"Expected the cast argument to be a global/constant");
186	return Builder.GetInsertBlock()
187	->getParent()
188	->getEntryBlock()
189	.getFirstInsertionPt();
190	}
191
192	/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
193	/// which must be possible with a noop cast, doing what we can to share
194	/// the casts.
195	Value SCEVExpander::InsertNoopCastOfTo(Value V, Type *Ty) {
196	Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
197	assert((Op == Instruction::BitCast \|\|
198	Op == Instruction::PtrToInt \|\|
199	Op == Instruction::IntToPtr) &&
200	"InsertNoopCastOfTo cannot perform non-noop casts!");
201	assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
202	"InsertNoopCastOfTo cannot change sizes!");
203
204	// inttoptr only works for integral pointers. For non-integral pointers, we
205	// can create a GEP on null with the integral value as index. Note that
206	// it is safe to use GEP of null instead of inttoptr here, because only
207	// expressions already based on a GEP of null should be converted to pointers
208	// during expansion.
209	if (Op == Instruction::IntToPtr) {
210	auto *PtrTy = cast<PointerType>(Val: Ty);
211	if (DL.isNonIntegralPointerType(PT: PtrTy))
212	return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
213	}
214	// Short-circuit unnecessary bitcasts.
215	if (Op == Instruction::BitCast) {
216	if (V->getType() == Ty)
217	return V;
218	if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
219	if (CI->getOperand(i_nocapture: `0`)->getType() == Ty)
220	return CI->getOperand(i_nocapture: `0`);
221	}
222	}
223	// Short-circuit unnecessary inttoptr<->ptrtoint casts.
224	if ((Op == Instruction::PtrToInt \|\| Op == Instruction::IntToPtr) &&
225	SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
226	if (CastInst *CI = dyn_cast<CastInst>(Val: V))
227	if ((CI->getOpcode() == Instruction::PtrToInt \|\|
228	CI->getOpcode() == Instruction::IntToPtr) &&
229	SE.getTypeSizeInBits(Ty: CI->getType()) ==
230	SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: `0`)->getType()))
231	return CI->getOperand(i_nocapture: `0`);
232	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
233	if ((CE->getOpcode() == Instruction::PtrToInt \|\|
234	CE->getOpcode() == Instruction::IntToPtr) &&
235	SE.getTypeSizeInBits(Ty: CE->getType()) ==
236	SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: `0`)->getType()))
237	return CE->getOperand(i_nocapture: `0`);
238	}
239
240	// Fold a cast of a constant.
241	if (Constant *C = dyn_cast<Constant>(Val: V))
242	return ConstantExpr::getCast(ops: Op, C, Ty);
243
244	// Try to reuse existing cast, or insert one.
245	return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
246	}
247
248	/// InsertBinop - Insert the specified binary operator, doing a small amount
249	/// of work to avoid inserting an obviously redundant operation, and hoisting
250	/// to an outer loop when the opportunity is there and it is safe.
251	Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
252	Value LHS, Value RHS,
253	SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
254	// Fold a binop with constant operands.
255	if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
256	if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
257	if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
258	return Res;
259
260	// Do a quick scan to see if we have this binop nearby. If so, reuse it.
261	unsigned ScanLimit = `6`;
262	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
263	// Scanning starts from the last instruction before the insertion point.
264	BasicBlock::iterator IP = Builder.GetInsertPoint();
265	if (IP != BlockBegin) {
266	--IP;
267	for (; ScanLimit; --IP, --ScanLimit) {
268	// Don't count dbg.value against the ScanLimit, to avoid perturbing the
269	// generated code.
270	if (isa<DbgInfoIntrinsic>(Val: IP))
271	ScanLimit++;
272
273	auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
274	// Ensure that no-wrap flags match.
275	if (isa<OverflowingBinaryOperator>(Val: I)) {
276	if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
277	return true;
278	if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
279	return true;
280	}
281	// Conservatively, do not use any instruction which has any of exact
282	// flags installed.
283	if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
284	return true;
285	return false;
286	};
287	if (IP ->getOpcode() == (unsigned)Opcode && IP ->getOperand(i: `0`) == LHS &&
288	IP ->getOperand(i: `1`) == RHS && !canGenerateIncompatiblePoison (&*IP))
289	return &*IP;
290	if (IP == BlockBegin) break;
291	}
292	}
293
294	// Save the original insertion point so we can restore it when we're done.
295	DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
296	SCEVInsertPointGuard Guard(Builder, this);
297
298	if (IsSafeToHoist) {
299	// Move the insertion point out of as many loops as we can.
300	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
301	if (!L->isLoopInvariant(V: LHS) \|\| !L->isLoopInvariant(V: RHS)) break;
302	BasicBlock *Preheader = L->getLoopPreheader();
303	if (!Preheader) break;
304
305	// Ok, move up a level.
306	Builder.SetInsertPoint(Preheader->getTerminator());
307	}
308	}
309
310	// If we haven't found this binop, insert it.
311	// TODO: Use the Builder, which will make CreateBinOp below fold with
312	// InstSimplifyFolder.
313	Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
314	BO->setDebugLoc(Loc);
315	if (Flags & SCEV::FlagNUW)
316	BO->setHasNoUnsignedWrap();
317	if (Flags & SCEV::FlagNSW)
318	BO->setHasNoSignedWrap();
319
320	return BO;
321	}
322
323	/// expandAddToGEP - Expand an addition expression with a pointer type into
324	/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
325	/// BasicAliasAnalysis and other passes analyze the result. See the rules
326	/// for getelementptr vs. inttoptr in
327	/// http://llvm.org/docs/LangRef.html#pointeraliasing
328	/// for details.
329	///
330	/// Design note: The correctness of using getelementptr here depends on
331	/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
332	/// they may introduce pointer arithmetic which may not be safely converted
333	/// into getelementptr.
334	///
335	/// Design note: It might seem desirable for this function to be more
336	/// loop-aware. If some of the indices are loop-invariant while others
337	/// aren't, it might seem desirable to emit multiple GEPs, keeping the
338	/// loop-invariant portions of the overall computation outside the loop.
339	/// However, there are a few reasons this is not done here. Hoisting simple
340	/// arithmetic is a low-level optimization that often isn't very
341	/// important until late in the optimization process. In fact, passes
342	/// like InstructionCombining will combine GEPs, even if it means
343	/// pushing loop-invariant computation down into loops, so even if the
344	/// GEPs were split here, the work would quickly be undone. The
345	/// LoopStrengthReduction pass, which is usually run quite late (and
346	/// after the last InstructionCombining pass), takes care of hoisting
347	/// loop-invariant portions of expressions, after considering what
348	/// can be folded using target addressing modes.
349	///
350	Value SCEVExpander::expandAddToGEP(const* SCEV Offset, Value V) {
351	assert(!isa<Instruction>(V) \|\|
352	SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
353
354	Value *Idx = expand(S: Offset);
355
356	// Fold a GEP with constant operands.
357	if (Constant *CLHS = dyn_cast<Constant>(Val: V))
358	if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
359	return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS);
360
361	// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
362	unsigned ScanLimit = `6`;
363	BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
364	// Scanning starts from the last instruction before the insertion point.
365	BasicBlock::iterator IP = Builder.GetInsertPoint();
366	if (IP != BlockBegin) {
367	--IP;
368	for (; ScanLimit; --IP, --ScanLimit) {
369	// Don't count dbg.value against the ScanLimit, to avoid perturbing the
370	// generated code.
371	if (isa<DbgInfoIntrinsic>(Val: IP))
372	ScanLimit++;
373	if (IP ->getOpcode() == Instruction::GetElementPtr &&
374	IP ->getOperand(i: `0`) == V && IP ->getOperand(i: `1`) == Idx &&
375	cast<GEPOperator>(Val: &*IP)->getSourceElementType() ==
376	Builder.getInt8Ty())
377	return &*IP;
378	if (IP == BlockBegin) break;
379	}
380	}
381
382	// Save the original insertion point so we can restore it when we're done.
383	SCEVInsertPointGuard Guard(Builder, this);
384
385	// Move the insertion point out of as many loops as we can.
386	while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
387	if (!L->isLoopInvariant(V) \|\| !L->isLoopInvariant(V: Idx)) break;
388	BasicBlock *Preheader = L->getLoopPreheader();
389	if (!Preheader) break;
390
391	// Ok, move up a level.
392	Builder.SetInsertPoint(Preheader->getTerminator());
393	}
394
395	// Emit a GEP.
396	return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep");
397	}
398
399	/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
400	/// SCEV expansion. If they are nested, this is the most nested. If they are
401	/// neighboring, pick the later.
402	static const Loop PickMostRelevantLoop(const* Loop A, const* Loop *B,
403	DominatorTree &DT) {
404	if (!A) return B;
405	if (!B) return A;
406	if (A->contains(L: B)) return B;
407	if (B->contains(L: A)) return A;
408	if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
409	if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
410	return A; // Arbitrarily break the tie.
411	}
412
413	/// getRelevantLoop - Get the most relevant loop associated with the given
414	/// expression, according to PickMostRelevantLoop.
415	const Loop SCEVExpander::getRelevantLoop(const* SCEV *S) {
416	// Test whether we've already computed the most relevant loop for this SCEV.
417	auto Pair = RelevantLoops.insert(KV: std::make_pair(x&: S, y: nullptr));
418	if (!Pair.second)
419	return Pair.first ->second;
420
421	switch (S->getSCEVType()) {
422	case scConstant:
423	case scVScale:
424	return nullptr; // A constant has no relevant loops.
425	case scTruncate:
426	case scZeroExtend:
427	case scSignExtend:
428	case scPtrToInt:
429	case scAddExpr:
430	case scMulExpr:
431	case scUDivExpr:
432	case scAddRecExpr:
433	case scUMaxExpr:
434	case scSMaxExpr:
435	case scUMinExpr:
436	case scSMinExpr:
437	case scSequentialUMinExpr: {
438	const Loop L = nullptr*;
439	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
440	L = AR->getLoop();
441	for (const SCEV *Op : S->operands())
442	L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
443	return RelevantLoops [S] = L;
444	}
445	case scUnknown: {
446	const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
447	if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
448	return Pair.first ->second = SE.LI.getLoopFor(BB: I->getParent());
449	// A non-instruction has no relevant loops.
450	return nullptr;
451	}
452	case scCouldNotCompute:
453	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
454	}
455	llvm_unreachable("Unexpected SCEV type!");
456	}
457
458	namespace {
459
460	/// LoopCompare - Compare loops by PickMostRelevantLoop.
461	class LoopCompare {
462	DominatorTree &DT;
463	public:
464	explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
465
466	bool operator()(std::pair<const Loop , const* SCEV *> LHS,
467	std::pair<const Loop , const* SCEV > RHS) const* {
468	// Keep pointer operands sorted at the end.
469	if (LHS.second->getType()->isPointerTy() !=
470	RHS.second->getType()->isPointerTy())
471	return LHS.second->getType()->isPointerTy();
472
473	// Compare loops with PickMostRelevantLoop.
474	if (LHS.first != RHS.first)
475	return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
476
477	// If one operand is a non-constant negative and the other is not,
478	// put the non-constant negative on the right so that a sub can
479	// be used instead of a negate and add.
480	if (LHS.second->isNonConstantNegative()) {
481	if (!RHS.second->isNonConstantNegative())
482	return false;
483	} else if (RHS.second->isNonConstantNegative())
484	return true;
485
486	// Otherwise they are equivalent according to this comparison.
487	return false;
488	}
489	};
490
491	}
492
493	Value SCEVExpander::visitAddExpr(const* SCEVAddExpr *S) {
494	// Recognize the canonical representation of an unsimplifed urem.
495	const SCEV URemLHS = nullptr*;
496	const SCEV URemRHS = nullptr*;
497	if (SE.matchURem(Expr: S, LHS&: URemLHS, RHS&: URemRHS)) {
498	Value *LHS = expand(S: URemLHS);
499	Value *RHS = expand(S: URemRHS);
500	return InsertBinop(Opcode: Instruction::URem, LHS, RHS, Flags: SCEV::FlagAnyWrap,
501	/IsSafeToHoist/ false);
502	}
503
504	// Collect all the add operands in a loop, along with their associated loops.
505	// Iterate in reverse so that constants are emitted last, all else equal, and
506	// so that pointer operands are inserted first, which the code below relies on
507	// to form more involved GEPs.
508	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
509	for (const SCEV *Op : reverse(C: S->operands()))
510	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
511
512	// Sort by loop. Use a stable sort so that constants follow non-constants and
513	// pointer operands precede non-pointer operands.
514	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
515
516	// Emit instructions to add all the operands. Hoist as much as possible
517	// out of loops, and form meaningful getelementptrs where possible.
518	Value Sum = nullptr*;
519	for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
520	const Loop *CurLoop = I->first;
521	const SCEV *Op = I->second;
522	if (!Sum) {
523	// This is the first operand. Just expand it.
524	Sum = expand(S: Op);
525	++I;
526	continue;
527	}
528
529	assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
530	if (isa<PointerType>(Val: Sum->getType())) {
531	// The running sum expression is a pointer. Try to form a getelementptr
532	// at this level with that as the base.
533	SmallVector<const SCEV *, `4`> NewOps;
534	for (; I != E && I->first == CurLoop; ++I) {
535	// If the operand is SCEVUnknown and not instructions, peek through
536	// it, to enable more of it to be folded into the GEP.
537	const SCEV *X = I->second;
538	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
539	if (!isa<Instruction>(Val: U->getValue()))
540	X = SE.getSCEV(V: U->getValue());
541	NewOps.push_back(Elt: X);
542	}
543	Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum);
544	} else if (Op->isNonConstantNegative()) {
545	// Instead of doing a negate and add, just do a subtract.
546	Value *W = expand(S: SE.getNegativeSCEV(V: Op));
547	Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
548	/IsSafeToHoist/ true);
549	++I;
550	} else {
551	// A simple add.
552	Value *W = expand(S: Op);
553	// Canonicalize a constant to the RHS.
554	if (isa<Constant>(Val: Sum))
555	std::swap(a&: Sum, b&: W);
556	Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
557	/IsSafeToHoist/ true);
558	++I;
559	}
560	}
561
562	return Sum;
563	}
564
565	Value SCEVExpander::visitMulExpr(const* SCEVMulExpr *S) {
566	Type *Ty = S->getType();
567
568	// Collect all the mul operands in a loop, along with their associated loops.
569	// Iterate in reverse so that constants are emitted last, all else equal.
570	SmallVector<std::pair<const Loop , const* SCEV *>, `8`> OpsAndLoops;
571	for (const SCEV *Op : reverse(C: S->operands()))
572	OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
573
574	// Sort by loop. Use a stable sort so that constants follow non-constants.
575	llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare (SE.DT));
576
577	// Emit instructions to mul all the operands. Hoist as much as possible
578	// out of loops.
579	Value Prod = nullptr*;
580	auto I = OpsAndLoops.begin();
581
582	// Expand the calculation of X pow N in the following manner:
583	// Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
584	// X pow N = (X pow P1) (X pow P2) * ... * (X pow PK).*
585	const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
586	auto E = I;
587	// Calculate how many times the same operand from the same loop is included
588	// into this power.
589	uint64_t Exponent = `0`;
590	const uint64_t MaxExponent = UINT64_MAX >> `1`;
591	// No one sane will ever try to calculate such huge exponents, but if we
592	// need this, we stop on UINT64_MAX / 2 because we need to exit the loop
593	// below when the power of 2 exceeds our Exponent, and we want it to be
594	// 1u << 31 at most to not deal with unsigned overflow.
595	while (E != OpsAndLoops.end() && I == E && Exponent != MaxExponent) {
596	++Exponent;
597	++E;
598	}
599	assert(Exponent > `0` && "Trying to calculate a zeroth exponent of operand?");
600
601	// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
602	// that are needed into the result.
603	Value *P = expand(S: I->second);
604	Value Result = nullptr*;
605	if (Exponent & `1`)
606	Result = P;
607	for (uint64_t BinExp = `2`; BinExp <= Exponent; BinExp <<= `1`) {
608	P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
609	/IsSafeToHoist/ true);
610	if (Exponent & BinExp)
611	Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
612	Flags: SCEV::FlagAnyWrap,
613	/IsSafeToHoist/ true)
614	: P;
615	}
616
617	I = E;
618	assert(Result && "Nothing was expanded?");
619	return Result;
620	};
621
622	while (I != OpsAndLoops.end()) {
623	if (!Prod) {
624	// This is the first operand. Just expand it.
625	Prod = ExpandOpBinPowN ();
626	} else if (I->second->isAllOnesValue()) {
627	// Instead of doing a multiply by negative one, just do a negate.
628	Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
629	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
630	++I;
631	} else {
632	// A simple mul.
633	Value *W = ExpandOpBinPowN ();
634	// Canonicalize a constant to the RHS.
635	if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
636	const APInt *RHS;
637	if (match(V: W, P: m_Power2(V&: RHS))) {
638	// Canonicalize Prod(1<<C) to Prod<<C.*
639	assert(!Ty->isVectorTy() && "vector types are not SCEVable");
640	auto NWFlags = S->getNoWrapFlags();
641	// clear nsw flag if shl will produce poison value.
642	if (RHS->logBase2() == RHS->getBitWidth() - `1`)
643	NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
644	Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
645	RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
646	/IsSafeToHoist/ true);
647	} else {
648	Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
649	/IsSafeToHoist/ true);
650	}
651	}
652	}
653
654	return Prod;
655	}
656
657	Value SCEVExpander::visitUDivExpr(const* SCEVUDivExpr *S) {
658	Value *LHS = expand(S: S->getLHS());
659	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
660	const APInt &RHS = SC->getAPInt();
661	if (RHS.isPowerOf2())
662	return InsertBinop(Opcode: Instruction::LShr, LHS,
663	RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
664	Flags: SCEV::FlagAnyWrap, /IsSafeToHoist/ true);
665	}
666
667	Value *RHS = expand(S: S->getRHS());
668	return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
669	/IsSafeToHoist/ SE.isKnownNonZero(S: S->getRHS()));
670	}
671
672	/// Determine if this is a well-behaved chain of instructions leading back to
673	/// the PHI. If so, it may be reused by expanded expressions.
674	bool SCEVExpander::isNormalAddRecExprPHI(PHINode PN, Instruction IncV,
675	const Loop *L) {
676	if (IncV->getNumOperands() == `0` \|\| isa<PHINode>(Val: IncV) \|\|
677	(isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
678	return false;
679	// If any of the operands don't dominate the insert position, bail.
680	// Addrec operands are always loop-invariant, so this can only happen
681	// if there are instructions which haven't been hoisted.
682	if (L == IVIncInsertLoop) {
683	for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
684	if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
685	if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
686	return false;
687	}
688	// Advance to the next instruction.
689	IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
690	if (!IncV)
691	return false;
692
693	if (IncV->mayHaveSideEffects())
694	return false;
695
696	if (IncV == PN)
697	return true;
698
699	return isNormalAddRecExprPHI(PN, IncV, L);
700	}
701
702	/// getIVIncOperand returns an induction variable increment's induction
703	/// variable operand.
704	///
705	/// If allowScale is set, any type of GEP is allowed as long as the nonIV
706	/// operands dominate InsertPos.
707	///
708	/// If allowScale is not set, ensure that a GEP increment conforms to one of the
709	/// simple patterns generated by getAddRecExprPHILiterally and
710	/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
711	Instruction SCEVExpander::getIVIncOperand(Instruction IncV,
712	Instruction *InsertPos,
713	bool allowScale) {
714	if (IncV == InsertPos)
715	return nullptr;
716
717	switch (IncV->getOpcode()) {
718	default:
719	return nullptr;
720	// Check for a simple Add/Sub or GEP of a loop invariant step.
721	case Instruction::Add:
722	case Instruction::Sub: {
723	Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: `1`));
724	if (!OInst \|\| SE.DT.dominates(Def: OInst, User: InsertPos))
725	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
726	return nullptr;
727	}
728	case Instruction::BitCast:
729	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
730	case Instruction::GetElementPtr:
731	for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
732	if (isa<Constant>(Val: U))
733	continue;
734	if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
735	if (!SE.DT.dominates(Def: OInst, User: InsertPos))
736	return nullptr;
737	}
738	if (allowScale) {
739	// allow any kind of GEP as long as it can be hoisted.
740	continue;
741	}
742	// GEPs produced by SCEVExpander use i8 element type.
743	if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: `8`))
744	return nullptr;
745	break;
746	}
747	return dyn_cast<Instruction>(Val: IncV->getOperand(i: `0`));
748	}
749	}
750
751	/// If the insert point of the current builder or any of the builders on the
752	/// stack of saved builders has 'I' as its insert point, update it to point to
753	/// the instruction after 'I'. This is intended to be used when the instruction
754	/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
755	/// different block, the inconsistent insert point (with a mismatched
756	/// Instruction and Block) can lead to an instruction being inserted in a block
757	/// other than its parent.
758	void SCEVExpander::fixupInsertPoints(Instruction *I) {
759	BasicBlock::iterator It(*I);
760	BasicBlock::iterator NewInsertPt = std::next(x: It);
761	if (Builder.GetInsertPoint() == It)
762	Builder.SetInsertPoint(&*NewInsertPt);
763	for (auto *InsertPtGuard : InsertPointGuards)
764	if (InsertPtGuard->GetInsertPoint() == It)
765	InsertPtGuard->SetInsertPoint(NewInsertPt);
766	}
767
768	/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
769	/// it available to other uses in this loop. Recursively hoist any operands,
770	/// until we reach a value that dominates InsertPos.
771	bool SCEVExpander::hoistIVInc(Instruction IncV, Instruction InsertPos,
772	bool RecomputePoisonFlags) {
773	auto FixupPoisonFlags = [this](Instruction *I) {
774	// Drop flags that are potentially inferred from old context and infer flags
775	// in new context.
776	rememberFlags(I);
777	I->dropPoisonGeneratingFlags();
778	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
779	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
780	auto *BO = cast<BinaryOperator>(Val: I);
781	BO->setHasNoUnsignedWrap(
782	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
783	BO->setHasNoSignedWrap(
784	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
785	}
786	};
787
788	if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
789	if (RecomputePoisonFlags)
790	FixupPoisonFlags (IncV);
791	return true;
792	}
793
794	// InsertPos must itself dominate IncV so that IncV's new position satisfies
795	// its existing users.
796	if (isa<PHINode>(Val: InsertPos) \|\|
797	!SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
798	return false;
799
800	if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
801	return false;
802
803	// Check that the chain of IV operands leading back to Phi can be hoisted.
804	SmallVector<Instruction*, `4`> IVIncs;
805	for(;;) {
806	Instruction Oper = getIVIncOperand(IncV, InsertPos, /allowScale/*true);
807	if (!Oper)
808	return false;
809	// IncV is safe to hoist.
810	IVIncs.push_back(Elt: IncV);
811	IncV = Oper;
812	if (SE.DT.dominates(Def: IncV, User: InsertPos))
813	break;
814	}
815	for (Instruction *I : llvm::reverse(C&: IVIncs)) {
816	fixupInsertPoints(I);
817	I->moveBefore(MovePos: InsertPos);
818	if (RecomputePoisonFlags)
819	FixupPoisonFlags (I);
820	}
821	return true;
822	}
823
824	bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
825	PHINode *WidePhi,
826	Instruction *OrigInc,
827	Instruction *WideInc) {
828	return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
829	match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
830	OrigInc->getOpcode() == WideInc->getOpcode();
831	}
832
833	/// Determine if this cyclic phi is in a form that would have been generated by
834	/// LSR. We don't care if the phi was actually expanded in this pass, as long
835	/// as it is in a low-cost form, for example, no implied multiplication. This
836	/// should match any patterns generated by getAddRecExprPHILiterally and
837	/// expandAddtoGEP.
838	bool SCEVExpander::isExpandedAddRecExprPHI(PHINode PN, Instruction IncV,
839	const Loop *L) {
840	for(Instruction *IVOper = IncV;
841	(IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
842	/allowScale=/false));) {
843	if (IVOper == PN)
844	return true;
845	}
846	return false;
847	}
848
849	/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
850	/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
851	/// need to materialize IV increments elsewhere to handle difficult situations.
852	Value SCEVExpander::expandIVInc(PHINode PN, Value StepV, const* Loop *L,
853	bool useSubtract) {
854	Value *IncV;
855	// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
856	if (PN->getType()->isPointerTy()) {
857	// TODO: Change name to IVName.iv.next.
858	IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
859	} else {
860	IncV = useSubtract ?
861	Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next") :
862	Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine (IVName) + ".iv.next");
863	}
864	return IncV;
865	}
866
867	/// Check whether we can cheaply express the requested SCEV in terms of
868	/// the available PHI SCEV by truncation and/or inversion of the step.
869	static bool canBeCheaplyTransformed(ScalarEvolution &SE,
870	const SCEVAddRecExpr *Phi,
871	const SCEVAddRecExpr *Requested,
872	bool &InvertStep) {
873	// We can't transform to match a pointer PHI.
874	Type *PhiTy = Phi->getType();
875	Type *RequestedTy = Requested->getType();
876	if (PhiTy->isPointerTy() \|\| RequestedTy->isPointerTy())
877	return false;
878
879	if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
880	return false;
881
882	// Try truncate it if necessary.
883	Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
884	if (!Phi)
885	return false;
886
887	// Check whether truncation will help.
888	if (Phi == Requested) {
889	InvertStep = false;
890	return true;
891	}
892
893	// Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
894	if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
895	InvertStep = true;
896	return true;
897	}
898
899	return false;
900	}
901
902	static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
903	if (!isa<IntegerType>(Val: AR->getType()))
904	return false;
905
906	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
907	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
908	const SCEV *Step = AR->getStepRecurrence(SE);
909	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
910	RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
911	const SCEV *ExtendAfterOp =
912	SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
913	return ExtendAfterOp == OpAfterExtend;
914	}
915
916	static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
917	if (!isa<IntegerType>(Val: AR->getType()))
918	return false;
919
920	unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
921	Type WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth `2`);
922	const SCEV *Step = AR->getStepRecurrence(SE);
923	const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
924	RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
925	const SCEV *ExtendAfterOp =
926	SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
927	return ExtendAfterOp == OpAfterExtend;
928	}
929
930	/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
931	/// the base addrec, which is the addrec without any non-loop-dominating
932	/// values, and return the PHI.
933	PHINode *
934	SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
935	const Loop L, Type &TruncTy,
936	bool &InvertStep) {
937	assert((!IVIncInsertLoop \|\| IVIncInsertPos) &&
938	"Uninitialized insert position");
939
940	// Reuse a previously-inserted PHI, if present.
941	BasicBlock *LatchBlock = L->getLoopLatch();
942	if (LatchBlock) {
943	PHINode AddRecPhiMatch = nullptr*;
944	Instruction IncV = nullptr*;
945	TruncTy = nullptr;
946	InvertStep = false;
947
948	// Only try partially matching scevs that need truncation and/or
949	// step-inversion if we know this loop is outside the current loop.
950	bool TryNonMatchingSCEV =
951	IVIncInsertLoop &&
952	SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
953
954	for (PHINode &PN : L->getHeader()->phis()) {
955	if (!SE.isSCEVable(Ty: PN.getType()))
956	continue;
957
958	// We should not look for a incomplete PHI. Getting SCEV for a incomplete
959	// PHI has no meaning at all.
960	if (!PN.isComplete()) {
961	SCEV_DEBUG_WITH_TYPE(
962	DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
963	continue;
964	}
965
966	const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
967	if (!PhiSCEV)
968	continue;
969
970	bool IsMatchingSCEV = PhiSCEV == Normalized;
971	// We only handle truncation and inversion of phi recurrences for the
972	// expanded expression if the expanded expression's loop dominates the
973	// loop we insert to. Check now, so we can bail out early.
974	if (!IsMatchingSCEV && !TryNonMatchingSCEV)
975	continue;
976
977	// TODO: this possibly can be reworked to avoid this cast at all.
978	Instruction *TempIncV =
979	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
980	if (!TempIncV)
981	continue;
982
983	// Check whether we can reuse this PHI node.
984	if (LSRMode) {
985	if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
986	continue;
987	} else {
988	if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
989	continue;
990	}
991
992	// Stop if we have found an exact match SCEV.
993	if (IsMatchingSCEV) {
994	IncV = TempIncV;
995	TruncTy = nullptr;
996	InvertStep = false;
997	AddRecPhiMatch = &PN;
998	break;
999	}
1000
1001	// Try whether the phi can be translated into the requested form
1002	// (truncated and/or offset by a constant).
1003	if ((!TruncTy \|\| InvertStep) &&
1004	canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
1005	// Record the phi node. But don't stop we might find an exact match
1006	// later.
1007	AddRecPhiMatch = &PN;
1008	IncV = TempIncV;
1009	TruncTy = Normalized->getType();
1010	}
1011	}
1012
1013	if (AddRecPhiMatch) {
1014	// Ok, the add recurrence looks usable.
1015	// Remember this PHI, even in post-inc mode.
1016	InsertedValues.insert(V: AddRecPhiMatch);
1017	// Remember the increment.
1018	rememberInstruction(I: IncV);
1019	// Those values were not actually inserted but re-used.
1020	ReusedValues.insert(Ptr: AddRecPhiMatch);
1021	ReusedValues.insert(Ptr: IncV);
1022	return AddRecPhiMatch;
1023	}
1024	}
1025
1026	// Save the original insertion point so we can restore it when we're done.
1027	SCEVInsertPointGuard Guard(Builder, this);
1028
1029	// Another AddRec may need to be recursively expanded below. For example, if
1030	// this AddRec is quadratic, the StepV may itself be an AddRec in this
1031	// loop. Remove this loop from the PostIncLoops set before expanding such
1032	// AddRecs. Otherwise, we cannot find a valid position for the step
1033	// (i.e. StepV can never dominate its loop header). Ideally, we could do
1034	// SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1035	// so it's not worth implementing SmallPtrSet::swap.
1036	PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1037	PostIncLoops.clear();
1038
1039	// Expand code for the start value into the loop preheader.
1040	assert(L->getLoopPreheader() &&
1041	"Can't expand add recurrences without a loop preheader!");
1042	Value *StartV =
1043	expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1044
1045	// StartV must have been be inserted into L's preheader to dominate the new
1046	// phi.
1047	assert(!isa<Instruction>(StartV) \|\|
1048	SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1049	L->getHeader()));
1050
1051	// Expand code for the step value. Do this before creating the PHI so that PHI
1052	// reuse code doesn't see an incomplete PHI.
1053	const SCEV *Step = Normalized->getStepRecurrence(SE);
1054	Type *ExpandTy = Normalized->getType();
1055	// If the stride is negative, insert a sub instead of an add for the increment
1056	// (unless it's a constant, because subtracts of constants are canonicalized
1057	// to adds).
1058	bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1059	if (useSubtract)
1060	Step = SE.getNegativeSCEV(V: Step);
1061	// Expand the step somewhere that dominates the loop header.
1062	Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1063
1064	// The no-wrap behavior proved by IsIncrement(NUW\|NSW) is only applicable if
1065	// we actually do emit an addition. It does not apply if we emit a
1066	// subtraction.
1067	bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1068	bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1069
1070	// Create the PHI.
1071	BasicBlock *Header = L->getHeader();
1072	Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1073	PHINode *PN =
1074	Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine (IVName) + ".iv");
1075
1076	// Create the step instructions and populate the PHI.
1077	for (BasicBlock *Pred : predecessors(BB: Header)) {
1078	// Add a start value.
1079	if (!L->contains(BB: Pred)) {
1080	PN->addIncoming(V: StartV, BB: Pred);
1081	continue;
1082	}
1083
1084	// Create a step value and add it to the PHI.
1085	// If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1086	// instructions at IVIncInsertPos.
1087	Instruction *InsertPos = L == IVIncInsertLoop ?
1088	IVIncInsertPos : Pred->getTerminator();
1089	Builder.SetInsertPoint(InsertPos);
1090	Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1091
1092	if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1093	if (IncrementIsNUW)
1094	cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1095	if (IncrementIsNSW)
1096	cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1097	}
1098	PN->addIncoming(V: IncV, BB: Pred);
1099	}
1100
1101	// After expanding subexpressions, restore the PostIncLoops set so the caller
1102	// can ensure that IVIncrement dominates the current uses.
1103	PostIncLoops = SavedPostIncLoops;
1104
1105	// Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1106	// effective when we are able to use an IV inserted here, so record it.
1107	InsertedValues.insert(V: PN);
1108	InsertedIVs.push_back(Elt: PN);
1109	return PN;
1110	}
1111
1112	Value SCEVExpander::expandAddRecExprLiterally(const* SCEVAddRecExpr *S) {
1113	const Loop *L = S->getLoop();
1114
1115	// Determine a normalized form of this expression, which is the expression
1116	// before any post-inc adjustment is made.
1117	const SCEVAddRecExpr *Normalized = S;
1118	if (PostIncLoops.count(Ptr: L)) {
1119	PostIncLoopSet Loops;
1120	Loops.insert(Ptr: L);
1121	Normalized = cast<SCEVAddRecExpr>(
1122	Val: normalizeForPostIncUse(S, Loops, SE, /CheckInvertible=/false));
1123	}
1124
1125	[[maybe_unused]] const SCEV *Start = Normalized->getStart();
1126	const SCEV *Step = Normalized->getStepRecurrence(SE);
1127	assert(SE.properlyDominates(Start, L->getHeader()) &&
1128	"Start does not properly dominate loop header");
1129	assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1130
1131	// In some cases, we decide to reuse an existing phi node but need to truncate
1132	// it and/or invert the step.
1133	Type TruncTy = nullptr*;
1134	bool InvertStep = false;
1135	PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1136
1137	// Accommodate post-inc mode, if necessary.
1138	Value *Result;
1139	if (!PostIncLoops.count(Ptr: L))
1140	Result = PN;
1141	else {
1142	// In PostInc mode, use the post-incremented value.
1143	BasicBlock *LatchBlock = L->getLoopLatch();
1144	assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1145	Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1146
1147	// We might be introducing a new use of the post-inc IV that is not poison
1148	// safe, in which case we should drop poison generating flags. Only keep
1149	// those flags for which SCEV has proven that they always hold.
1150	if (isa<OverflowingBinaryOperator>(Val: Result)) {
1151	auto *I = cast<Instruction>(Val: Result);
1152	if (!S->hasNoUnsignedWrap())
1153	I->setHasNoUnsignedWrap(false);
1154	if (!S->hasNoSignedWrap())
1155	I->setHasNoSignedWrap(false);
1156	}
1157
1158	// For an expansion to use the postinc form, the client must call
1159	// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1160	// or dominated by IVIncInsertPos.
1161	if (isa<Instruction>(Val: Result) &&
1162	!SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1163	User: &*Builder.GetInsertPoint())) {
1164	// The induction variable's postinc expansion does not dominate this use.
1165	// IVUsers tries to prevent this case, so it is rare. However, it can
1166	// happen when an IVUser outside the loop is not dominated by the latch
1167	// block. Adjusting IVIncInsertPos before expansion begins cannot handle
1168	// all cases. Consider a phi outside whose operand is replaced during
1169	// expansion with the value of the postinc user. Without fundamentally
1170	// changing the way postinc users are tracked, the only remedy is
1171	// inserting an extra IV increment. StepV might fold into PostLoopOffset,
1172	// but hopefully expandCodeFor handles that.
1173	bool useSubtract =
1174	!S->getType()->isPointerTy() && Step->isNonConstantNegative();
1175	if (useSubtract)
1176	Step = SE.getNegativeSCEV(V: Step);
1177	Value *StepV;
1178	{
1179	// Expand the step somewhere that dominates the loop header.
1180	SCEVInsertPointGuard Guard(Builder, this);
1181	StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1182	}
1183	Result = expandIVInc(PN, StepV, L, useSubtract);
1184	}
1185	}
1186
1187	// We have decided to reuse an induction variable of a dominating loop. Apply
1188	// truncation and/or inversion of the step.
1189	if (TruncTy) {
1190	// Truncate the result.
1191	if (TruncTy != Result->getType())
1192	Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1193
1194	// Invert the result.
1195	if (InvertStep)
1196	Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1197	}
1198
1199	return Result;
1200	}
1201
1202	Value SCEVExpander::visitAddRecExpr(const* SCEVAddRecExpr *S) {
1203	// In canonical mode we compute the addrec as an expression of a canonical IV
1204	// using evaluateAtIteration and expand the resulting SCEV expression. This
1205	// way we avoid introducing new IVs to carry on the computation of the addrec
1206	// throughout the loop.
1207	//
1208	// For nested addrecs evaluateAtIteration might need a canonical IV of a
1209	// type wider than the addrec itself. Emitting a canonical IV of the
1210	// proper type might produce non-legal types, for example expanding an i64
1211	// {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1212	// back to non-canonical mode for nested addrecs.
1213	if (!CanonicalMode \|\| (S->getNumOperands() > `2`))
1214	return expandAddRecExprLiterally(S);
1215
1216	Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1217	const Loop *L = S->getLoop();
1218
1219	// First check for an existing canonical IV in a suitable type.
1220	PHINode CanonicalIV = nullptr*;
1221	if (PHINode *PN = L->getCanonicalInductionVariable())
1222	if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1223	CanonicalIV = PN;
1224
1225	// Rewrite an AddRec in terms of the canonical induction variable, if
1226	// its type is more narrow.
1227	if (CanonicalIV &&
1228	SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1229	!S->getType()->isPointerTy()) {
1230	SmallVector<const SCEV *, `4`> NewOps(S->getNumOperands());
1231	for (unsigned i = `0`, e = S->getNumOperands(); i != e; ++i)
1232	NewOps [i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1233	Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1234	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1235	BasicBlock::iterator NewInsertPt =
1236	findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1237	V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1238	return V;
1239	}
1240
1241	// {X,+,F} --> X + {0,+,F}
1242	if (!S->getStart()->isZero()) {
1243	if (isa<PointerType>(Val: S->getType())) {
1244	Value *StartV = expand(S: SE.getPointerBase(V: S));
1245	return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV);
1246	}
1247
1248	SmallVector<const SCEV *, `4`> NewOps(S->operands());
1249	NewOps [`0`] = SE.getConstant(Ty, V: `0`);
1250	const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1251	Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1252
1253	// Just do a normal add. Pre-expand the operands to suppress folding.
1254	//
1255	// The LHS and RHS values are factored out of the expand call to make the
1256	// output independent of the argument evaluation order.
1257	const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1258	const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1259	return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1260	}
1261
1262	// If we don't yet have a canonical IV, create one.
1263	if (!CanonicalIV) {
1264	// Create and insert the PHI node for the induction variable in the
1265	// specified loop.
1266	BasicBlock *Header = L->getHeader();
1267	pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1268	CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1269	CanonicalIV->insertBefore(InsertPos: Header->begin());
1270	rememberInstruction(I: CanonicalIV);
1271
1272	SmallSet<BasicBlock *, `4`> PredSeen;
1273	Constant *One = ConstantInt::get(Ty, V: `1`);
1274	for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1275	BasicBlock HP = HPI;
1276	if (!PredSeen.insert(Ptr: HP).second) {
1277	// There must be an incoming value for each predecessor, even the
1278	// duplicates!
1279	CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1280	continue;
1281	}
1282
1283	if (L->contains(BB: HP)) {
1284	// Insert a unit add instruction right before the terminator
1285	// corresponding to the back-edge.
1286	Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1287	Name: "indvar.next",
1288	It: HP->getTerminator()->getIterator());
1289	Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1290	rememberInstruction(I: Add);
1291	CanonicalIV->addIncoming(V: Add, BB: HP);
1292	} else {
1293	CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1294	}
1295	}
1296	}
1297
1298	// {0,+,1} --> Insert a canonical induction variable into the loop!
1299	if (S->isAffine() && S->getOperand(i: `1`)->isOne()) {
1300	assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1301	"IVs with types different from the canonical IV should "
1302	"already have been handled!");
1303	return CanonicalIV;
1304	}
1305
1306	// {0,+,F} --> {0,+,1} F*
1307
1308	// If this is a simple linear addrec, emit it now as a special case.
1309	if (S->isAffine()) // {0,+,F} --> iF*
1310	return
1311	expand(S: SE.getTruncateOrNoop(
1312	V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1313	RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: `1`),
1314	Ty: CanonicalIV->getType())),
1315	Ty));
1316
1317	// If this is a chain of recurrences, turn it into a closed form, using the
1318	// folders, then expandCodeFor the closed form. This allows the folders to
1319	// simplify the expression without having to build a bunch of special code
1320	// into this folder.
1321	const SCEV IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.*
1322
1323	// Promote S up to the canonical IV type, if the cast is foldable.
1324	const SCEV *NewS = S;
1325	const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1326	if (isa<SCEVAddRecExpr>(Val: Ext))
1327	NewS = Ext;
1328
1329	const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1330
1331	// Truncate the result down to the original type, if needed.
1332	const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1333	return expand(S: T);
1334	}
1335
1336	Value SCEVExpander::visitPtrToIntExpr(const* SCEVPtrToIntExpr *S) {
1337	Value *V = expand(S: S->getOperand());
1338	return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1339	IP: GetOptimalInsertionPointForCastOf(V));
1340	}
1341
1342	Value SCEVExpander::visitTruncateExpr(const* SCEVTruncateExpr *S) {
1343	Value *V = expand(S: S->getOperand());
1344	return Builder.CreateTrunc(V, DestTy: S->getType());
1345	}
1346
1347	Value SCEVExpander::visitZeroExtendExpr(const* SCEVZeroExtendExpr *S) {
1348	Value *V = expand(S: S->getOperand());
1349	return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1350	IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1351	}
1352
1353	Value SCEVExpander::visitSignExtendExpr(const* SCEVSignExtendExpr *S) {
1354	Value *V = expand(S: S->getOperand());
1355	return Builder.CreateSExt(V, DestTy: S->getType());
1356	}
1357
1358	Value SCEVExpander::expandMinMaxExpr(const* SCEVNAryExpr *S,
1359	Intrinsic::ID IntrinID, Twine Name,
1360	bool IsSequential) {
1361	Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - `1`));
1362	Type *Ty = LHS->getType();
1363	if (IsSequential)
1364	LHS = Builder.CreateFreeze(V: LHS);
1365	for (int i = S->getNumOperands() - `2`; i >= `0`; --i) {
1366	Value *RHS = expand(S: S->getOperand(i));
1367	if (IsSequential && i != `0`)
1368	RHS = Builder.CreateFreeze(V: RHS);
1369	Value *Sel;
1370	if (Ty->isIntegerTy())
1371	Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1372	/FMFSource=/nullptr, Name);
1373	else {
1374	Value *ICmp =
1375	Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1376	Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1377	}
1378	LHS = Sel;
1379	}
1380	return LHS;
1381	}
1382
1383	Value SCEVExpander::visitSMaxExpr(const* SCEVSMaxExpr *S) {
1384	return expandMinMaxExpr(S, IntrinID: Intrinsic::smax, Name: "smax");
1385	}
1386
1387	Value SCEVExpander::visitUMaxExpr(const* SCEVUMaxExpr *S) {
1388	return expandMinMaxExpr(S, IntrinID: Intrinsic::umax, Name: "umax");
1389	}
1390
1391	Value SCEVExpander::visitSMinExpr(const* SCEVSMinExpr *S) {
1392	return expandMinMaxExpr(S, IntrinID: Intrinsic::smin, Name: "smin");
1393	}
1394
1395	Value SCEVExpander::visitUMinExpr(const* SCEVUMinExpr *S) {
1396	return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin");
1397	}
1398
1399	Value SCEVExpander::visitSequentialUMinExpr(const* SCEVSequentialUMinExpr *S) {
1400	return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin", /IsSequential/true);
1401	}
1402
1403	Value SCEVExpander::visitVScale(const* SCEVVScale *S) {
1404	return Builder.CreateVScale(Scaling: ConstantInt::get(Ty: S->getType(), V: `1`));
1405	}
1406
1407	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty,
1408	BasicBlock::iterator IP) {
1409	setInsertPoint(IP);
1410	Value *V = expandCodeFor(SH, Ty);
1411	return V;
1412	}
1413
1414	Value SCEVExpander::expandCodeFor(const* SCEV SH, Type Ty) {
1415	// Expand the code for this SCEV.
1416	Value *V = expand(S: SH);
1417
1418	if (Ty) {
1419	assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1420	"non-trivial casts should be done with the SCEVs directly!");
1421	V = InsertNoopCastOfTo(V, Ty);
1422	}
1423	return V;
1424	}
1425
1426	Value *SCEVExpander::FindValueInExprValueMap(
1427	const SCEV S, const* Instruction *InsertPt,
1428	SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1429	// If the expansion is not in CanonicalMode, and the SCEV contains any
1430	// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1431	if (!CanonicalMode && SE.containsAddRecurrence(S))
1432	return nullptr;
1433
1434	// If S is a constant, it may be worse to reuse an existing Value.
1435	if (isa<SCEVConstant>(Val: S))
1436	return nullptr;
1437
1438	for (Value *V : SE.getSCEVValues(S)) {
1439	Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1440	if (!EntInst)
1441	continue;
1442
1443	// Choose a Value from the set which dominates the InsertPt.
1444	// InsertPt should be inside the Value's parent loop so as not to break
1445	// the LCSSA form.
1446	assert(EntInst->getFunction() == InsertPt->getFunction());
1447	if (S->getType() != V->getType() \|\| !SE.DT.dominates(Def: EntInst, User: InsertPt) \|\|
1448	!(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr \|\|
1449	SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1450	continue;
1451
1452	// Make sure reusing the instruction is poison-safe.
1453	if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1454	return V;
1455	DropPoisonGeneratingInsts.clear();
1456	}
1457	return nullptr;
1458	}
1459
1460	// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1461	// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1462	// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1463	// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1464	// the expansion will try to reuse Value from ExprValueMap, and only when it
1465	// fails, expand the SCEV literally.
1466	Value SCEVExpander::expand(const* SCEV *S) {
1467	// Compute an insertion point for this SCEV object. Hoist the instructions
1468	// as far out in the loop nest as possible.
1469	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1470
1471	// We can move insertion point only if there is no div or rem operations
1472	// otherwise we are risky to move it over the check for zero denominator.
1473	auto SafeToHoist = [](const SCEV *S) {
1474	return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1475	if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1476	if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1477	// Division by non-zero constants can be hoisted.
1478	return SC->getValue()->isZero();
1479	// All other divisions should not be moved as they may be
1480	// divisions by zero and should be kept within the
1481	// conditions of the surrounding loops that guard their
1482	// execution (see PR35406).
1483	return true;
1484	}
1485	return false;
1486	});
1487	};
1488	if (SafeToHoist (S)) {
1489	for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1490	L = L->getParentLoop()) {
1491	if (SE.isLoopInvariant(S, L)) {
1492	if (!L) break;
1493	if (BasicBlock *Preheader = L->getLoopPreheader()) {
1494	InsertPt = Preheader->getTerminator()->getIterator();
1495	} else {
1496	// LSR sets the insertion point for AddRec start/step values to the
1497	// block start to simplify value reuse, even though it's an invalid
1498	// position. SCEVExpander must correct for this in all cases.
1499	InsertPt = L->getHeader()->getFirstInsertionPt();
1500	}
1501	} else {
1502	// If the SCEV is computable at this level, insert it into the header
1503	// after the PHIs (and after any other instructions that we've inserted
1504	// there) so that it is guaranteed to dominate any user inside the loop.
1505	if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1506	InsertPt = L->getHeader()->getFirstInsertionPt();
1507
1508	while (InsertPt != Builder.GetInsertPoint() &&
1509	(isInsertedInstruction(I: &*InsertPt) \|\|
1510	isa<DbgInfoIntrinsic>(Val: &*InsertPt))) {
1511	InsertPt = std::next(x: InsertPt);
1512	}
1513	break;
1514	}
1515	}
1516	}
1517
1518	// Check to see if we already expanded this here.
1519	auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1520	if (I != InsertedExpressions.end())
1521	return I ->second;
1522
1523	SCEVInsertPointGuard Guard(Builder, this);
1524	Builder.SetInsertPoint(TheBB: InsertPt ->getParent(), IP: InsertPt);
1525
1526	// Expand the expression into instructions.
1527	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1528	Value V = FindValueInExprValueMap(S, InsertPt: &InsertPt, DropPoisonGeneratingInsts);
1529	if (!V) {
1530	V = visit(S);
1531	V = fixupLCSSAFormFor(V);
1532	} else {
1533	for (Instruction *I : DropPoisonGeneratingInsts) {
1534	rememberFlags(I);
1535	I->dropPoisonGeneratingAnnotations();
1536	// See if we can re-infer from first principles any of the flags we just
1537	// dropped.
1538	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1539	if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1540	auto *BO = cast<BinaryOperator>(Val: I);
1541	BO->setHasNoUnsignedWrap(
1542	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1543	BO->setHasNoSignedWrap(
1544	ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1545	}
1546	if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1547	auto *Src = NNI->getOperand(i_nocapture: `0`);
1548	if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1549	RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1550	DL).value_or(u: false))
1551	NNI->setNonNeg(true);
1552	}
1553	}
1554	}
1555	// Remember the expanded value for this SCEV at this location.
1556	//
1557	// This is independent of PostIncLoops. The mapped value simply materializes
1558	// the expression at this insertion point. If the mapped value happened to be
1559	// a postinc expansion, it could be reused by a non-postinc user, but only if
1560	// its insertion point was already at the head of the loop.
1561	InsertedExpressions [std::make_pair(x&: S, y: &*InsertPt)] = V;
1562	return V;
1563	}
1564
1565	void SCEVExpander::rememberInstruction(Value *I) {
1566	auto DoInsert = [this](Value *V) {
1567	if (!PostIncLoops.empty())
1568	InsertedPostIncValues.insert(V);
1569	else
1570	InsertedValues.insert(V);
1571	};
1572	DoInsert (I);
1573	}
1574
1575	void SCEVExpander::rememberFlags(Instruction *I) {
1576	// If we already have flags for the instruction, keep the existing ones.
1577	OrigFlags.try_emplace(Key: I, Args: PoisonFlags (I));
1578	}
1579
1580	void SCEVExpander::replaceCongruentIVInc(
1581	PHINode &Phi, PHINode &OrigPhi, Loop L, const* DominatorTree *DT,
1582	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1583	BasicBlock *LatchBlock = L->getLoopLatch();
1584	if (!LatchBlock)
1585	return;
1586
1587	Instruction *OrigInc =
1588	dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1589	Instruction *IsomorphicInc =
1590	dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1591	if (!OrigInc \|\| !IsomorphicInc)
1592	return;
1593
1594	// If this phi has the same width but is more canonical, replace the
1595	// original with it. As part of the "more canonical" determination,
1596	// respect a prior decision to use an IV chain.
1597	if (OrigPhi->getType() == Phi->getType() &&
1598	!(ChainedPhis.count(V: Phi) \|\|
1599	isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1600	(ChainedPhis.count(V: Phi) \|\|
1601	isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1602	std::swap(a&: OrigPhi, b&: Phi);
1603	std::swap(a&: OrigInc, b&: IsomorphicInc);
1604	}
1605
1606	// Replacing the congruent phi is sufficient because acyclic
1607	// redundancy elimination, CSE/GVN, should handle the
1608	// rest. However, once SCEV proves that a phi is congruent,
1609	// it's often the head of an IV user cycle that is isomorphic
1610	// with the original phi. It's worth eagerly cleaning up the
1611	// common case of a single IV increment so that DeleteDeadPHIs
1612	// can remove cycles that had postinc uses.
1613	// Because we may potentially introduce a new use of OrigIV that didn't
1614	// exist before at this point, its poison flags need readjustment.
1615	const SCEV *TruncExpr =
1616	SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1617	if (OrigInc == IsomorphicInc \|\| TruncExpr != SE.getSCEV(V: IsomorphicInc) \|\|
1618	!SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1619	return;
1620
1621	bool BothHaveNUW = false;
1622	bool BothHaveNSW = false;
1623	auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1624	auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1625	if (OBOIncV && OBOIsomorphic) {
1626	BothHaveNUW =
1627	OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1628	BothHaveNSW =
1629	OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1630	}
1631
1632	if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1633	/RecomputePoisonFlags/ true))
1634	return;
1635
1636	// We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1637	// are NUW/NSW, then we can preserve them on the wider increment; the narrower
1638	// IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1639	// make IsomorphicInc's uses more poisonous.
1640	assert(OrigInc->getType()->getScalarSizeInBits() >=
1641	IsomorphicInc->getType()->getScalarSizeInBits() &&
1642	"Should only replace an increment with a wider one.");
1643	if (BothHaveNUW \|\| BothHaveNSW) {
1644	OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() \|\| BothHaveNUW);
1645	OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() \|\| BothHaveNSW);
1646	}
1647
1648	SCEV_DEBUG_WITH_TYPE(DebugType,
1649	dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1650	<< *IsomorphicInc << `'\n'`);
1651	Value *NewInc = OrigInc;
1652	if (OrigInc->getType() != IsomorphicInc->getType()) {
1653	BasicBlock::iterator IP;
1654	if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1655	IP = PN->getParent()->getFirstInsertionPt();
1656	else
1657	IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1658
1659	IRBuilder<> Builder(IP ->getParent(), IP);
1660	Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1661	NewInc =
1662	Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1663	}
1664	IsomorphicInc->replaceAllUsesWith(V: NewInc);
1665	DeadInsts.emplace_back(Args&: IsomorphicInc);
1666	}
1667
1668	/// replaceCongruentIVs - Check for congruent phis in this loop header and
1669	/// replace them with their most canonical representative. Return the number of
1670	/// phis eliminated.
1671	///
1672	/// This does not depend on any SCEVExpander state but should be used in
1673	/// the same context that SCEVExpander is used.
1674	unsigned
1675	SCEVExpander::replaceCongruentIVs(Loop L, const* DominatorTree *DT,
1676	SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1677	const TargetTransformInfo *TTI) {
1678	// Find integer phis in order of increasing width.
1679	SmallVector<PHINode*, `8`> Phis;
1680	for (PHINode &PN : L->getHeader()->phis())
1681	Phis.push_back(Elt: &PN);
1682
1683	if (TTI)
1684	// Use stable_sort to preserve order of equivalent PHIs, so the order
1685	// of the sorted Phis is the same from run to run on the same loop.
1686	llvm::stable_sort(Range&: Phis, C: [](Value LHS, Value RHS) {
1687	// Put pointers at the back and make sure pointer < pointer = false.
1688	if (!LHS->getType()->isIntegerTy() \|\| !RHS->getType()->isIntegerTy())
1689	return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1690	return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1691	LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1692	});
1693
1694	unsigned NumElim = `0`;
1695	DenseMap<const SCEV , PHINode > ExprToIVMap;
1696	// Process phis from wide to narrow. Map wide phis to their truncation
1697	// so narrow phis can reuse them.
1698	for (PHINode *Phi : Phis) {
1699	auto SimplifyPHINode = [&](PHINode PN) -> Value {
1700	if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1701	return V;
1702	if (!SE.isSCEVable(Ty: PN->getType()))
1703	return nullptr;
1704	auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1705	if (!Const)
1706	return nullptr;
1707	return Const->getValue();
1708	};
1709
1710	// Fold constant phis. They may be congruent to other constant phis and
1711	// would confuse the logic below that expects proper IVs.
1712	if (Value *V = SimplifyPHINode (Phi)) {
1713	if (V->getType() != Phi->getType())
1714	continue;
1715	SE.forgetValue(V: Phi);
1716	Phi->replaceAllUsesWith(V);
1717	DeadInsts.emplace_back(Args&: Phi);
1718	++NumElim;
1719	SCEV_DEBUG_WITH_TYPE(DebugType,
1720	dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1721	<< `'\n'`);
1722	continue;
1723	}
1724
1725	if (!SE.isSCEVable(Ty: Phi->getType()))
1726	continue;
1727
1728	PHINode *&OrigPhiRef = ExprToIVMap [SE.getSCEV(V: Phi)];
1729	if (!OrigPhiRef) {
1730	OrigPhiRef = Phi;
1731	if (Phi->getType()->isIntegerTy() && TTI &&
1732	TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1733	// Make sure we only rewrite using simple induction variables;
1734	// otherwise, we can make the trip count of a loop unanalyzable
1735	// to SCEV.
1736	const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1737	if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1738	// This phi can be freely truncated to the narrowest phi type. Map the
1739	// truncated expression to it so it will be reused for narrow types.
1740	const SCEV *TruncExpr =
1741	SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1742	ExprToIVMap [TruncExpr] = Phi;
1743	}
1744	}
1745	continue;
1746	}
1747
1748	// Replacing a pointer phi with an integer phi or vice-versa doesn't make
1749	// sense.
1750	if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1751	continue;
1752
1753	replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1754	SCEV_DEBUG_WITH_TYPE(DebugType,
1755	dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1756	<< `'\n'`);
1757	SCEV_DEBUG_WITH_TYPE(
1758	DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << `'\n'`);
1759	++NumElim;
1760	Value *NewIV = OrigPhiRef;
1761	if (OrigPhiRef->getType() != Phi->getType()) {
1762	IRBuilder<> Builder(L->getHeader(),
1763	L->getHeader()->getFirstInsertionPt());
1764	Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1765	NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1766	}
1767	Phi->replaceAllUsesWith(V: NewIV);
1768	DeadInsts.emplace_back(Args&: Phi);
1769	}
1770	return NumElim;
1771	}
1772
1773	bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1774	const Instruction *At,
1775	Loop *L) {
1776	using namespace llvm::PatternMatch;
1777
1778	SmallVector<BasicBlock *, `4`> ExitingBlocks;
1779	L->getExitingBlocks(ExitingBlocks);
1780
1781	// Look for suitable value in simple conditions at the loop exits.
1782	for (BasicBlock *BB : ExitingBlocks) {
1783	ICmpInst::Predicate Pred;
1784	Instruction LHS, RHS;
1785
1786	if (!match(V: BB->getTerminator(),
1787	P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1788	T: m_BasicBlock(), F: m_BasicBlock())))
1789	continue;
1790
1791	if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1792	return true;
1793
1794	if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1795	return true;
1796	}
1797
1798	// Use expand's logic which is used for reusing a previous Value in
1799	// ExprValueMap. Note that we don't currently model the cost of
1800	// needing to drop poison generating flags on the instruction if we
1801	// want to reuse it. We effectively assume that has zero cost.
1802	SmallVector<Instruction *> DropPoisonGeneratingInsts;
1803	return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1804	}
1805
1806	template<typename T> static InstructionCost costAndCollectOperands(
1807	const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1808	TargetTransformInfo::TargetCostKind CostKind,
1809	SmallVectorImpl<SCEVOperand> &Worklist) {
1810
1811	const T *S = cast<T>(WorkItem.S);
1812	InstructionCost Cost = `0`;
1813	// Object to help map SCEV operands to expanded IR instructions.
1814	struct OperationIndices {
1815	OperationIndices(unsigned Opc, size_t min, size_t max) :
1816	Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1817	unsigned Opcode;
1818	size_t MinIdx;
1819	size_t MaxIdx;
1820	};
1821
1822	// Collect the operations of all the instructions that will be needed to
1823	// expand the SCEVExpr. This is so that when we come to cost the operands,
1824	// we know what the generated user(s) will be.
1825	SmallVector<OperationIndices, `2`> Operations;
1826
1827	auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1828	Operations.emplace_back(Opcode, `0`, `0`);
1829	return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1830	Src: S->getOperand(`0`)->getType(),
1831	CCH: TTI::CastContextHint::None, CostKind);
1832	};
1833
1834	auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1835	unsigned MinIdx = `0`,
1836	unsigned MaxIdx = `1`) -> InstructionCost {
1837	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1838	return NumRequired *
1839	TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1840	};
1841
1842	auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1843	unsigned MaxIdx) -> InstructionCost {
1844	Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1845	Type *OpType = S->getType();
1846	return NumRequired * TTI.getCmpSelInstrCost(
1847	Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1848	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1849	};
1850
1851	switch (S->getSCEVType()) {
1852	case scCouldNotCompute:
1853	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1854	case scUnknown:
1855	case scConstant:
1856	case scVScale:
1857	return `0`;
1858	case scPtrToInt:
1859	Cost = CastCost(Instruction::PtrToInt);
1860	break;
1861	case scTruncate:
1862	Cost = CastCost(Instruction::Trunc);
1863	break;
1864	case scZeroExtend:
1865	Cost = CastCost(Instruction::ZExt);
1866	break;
1867	case scSignExtend:
1868	Cost = CastCost(Instruction::SExt);
1869	break;
1870	case scUDivExpr: {
1871	unsigned Opcode = Instruction::UDiv;
1872	if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(`1`)))
1873	if (SC->getAPInt().isPowerOf2())
1874	Opcode = Instruction::LShr;
1875	Cost = ArithCost(Opcode, `1`);
1876	break;
1877	}
1878	case scAddExpr:
1879	Cost = ArithCost(Instruction::Add, S->getNumOperands() - `1`);
1880	break;
1881	case scMulExpr:
1882	// TODO: this is a very pessimistic cost modelling for Mul,
1883	// because of Bin Pow algorithm actually used by the expander,
1884	// see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1885	Cost = ArithCost(Instruction::Mul, S->getNumOperands() - `1`);
1886	break;
1887	case scSMaxExpr:
1888	case scUMaxExpr:
1889	case scSMinExpr:
1890	case scUMinExpr:
1891	case scSequentialUMinExpr: {
1892	// FIXME: should this ask the cost for Intrinsic's?
1893	// The reduction tree.
1894	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `1`);
1895	Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - `1`, `0`, `2`);
1896	switch (S->getSCEVType()) {
1897	case scSequentialUMinExpr: {
1898	// The safety net against poison.
1899	// FIXME: this is broken.
1900	Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - `1`, `0`, `0`);
1901	Cost += ArithCost(Instruction::Or,
1902	S->getNumOperands() > `2` ? S->getNumOperands() - `2` : `0`);
1903	Cost += CmpSelCost(Instruction::Select, `1`, `0`, `1`);
1904	break;
1905	}
1906	default:
1907	assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1908	"Unhandled SCEV expression type?");
1909	break;
1910	}
1911	break;
1912	}
1913	case scAddRecExpr: {
1914	// In this polynominal, we may have some zero operands, and we shouldn't
1915	// really charge for those. So how many non-zero coefficients are there?
1916	int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1917	return !Op->isZero();
1918	});
1919
1920	assert(NumTerms >= `1` && "Polynominal should have at least one term.");
1921	assert(!(*std::prev(S->operands().end()))->isZero() &&
1922	"Last operand should not be zero");
1923
1924	// Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1925	int NumNonZeroDegreeNonOneTerms =
1926	llvm::count_if(S->operands(), [](const SCEV *Op) {
1927	auto *SConst = dyn_cast<SCEVConstant>(Val: Op);
1928	return !SConst \|\| SConst->getAPInt().ugt(RHS: `1`);
1929	});
1930
1931	// Much like with normal add expr, the polynominal will require
1932	// one less addition than the number of it's terms.
1933	InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - `1`,
1934	/MinIdx/ `1`, /MaxIdx/ `1`);
1935	// Here, each* one of those will require a multiplication.*
1936	InstructionCost MulCost =
1937	ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1938	Cost = AddCost + MulCost;
1939
1940	// What is the degree of this polynominal?
1941	int PolyDegree = S->getNumOperands() - `1`;
1942	assert(PolyDegree >= `1` && "Should be at least affine.");
1943
1944	// The final term will be:
1945	// Op_{PolyDegree} x ^ {PolyDegree}*
1946	// Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1947	// Note that x ^ {PolyDegree} = x x ^ {PolyDegree-1} so charging for*
1948	// x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1949	// FIXME: this is conservatively correct, but might be overly pessimistic.
1950	Cost += MulCost * (PolyDegree - `1`);
1951	break;
1952	}
1953	}
1954
1955	for (auto &CostOp : Operations) {
1956	for (auto SCEVOp : enumerate(S->operands())) {
1957	// Clamp the index to account for multiple IR operations being chained.
1958	size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1959	size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1960	Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1961	}
1962	}
1963	return Cost;
1964	}
1965
1966	bool SCEVExpander::isHighCostExpansionHelper(
1967	const SCEVOperand &WorkItem, Loop L, const* Instruction &At,
1968	InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1969	SmallPtrSetImpl<const SCEV *> &Processed,
1970	SmallVectorImpl<SCEVOperand> &Worklist) {
1971	if (Cost > Budget)
1972	return true; // Already run out of budget, give up.
1973
1974	const SCEV *S = WorkItem.S;
1975	// Was the cost of expansion of this expression already accounted for?
1976	if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
1977	return false; // We have already accounted for this expression.
1978
1979	// If we can find an existing value for this scev available at the point "At"
1980	// then consider the expression cheap.
1981	if (hasRelatedExistingExpansion(S, At: &At, L))
1982	return false; // Consider the expression to be free.
1983
1984	TargetTransformInfo::TargetCostKind CostKind =
1985	L->getHeader()->getParent()->hasMinSize()
1986	? TargetTransformInfo::TCK_CodeSize
1987	: TargetTransformInfo::TCK_RecipThroughput;
1988
1989	switch (S->getSCEVType()) {
1990	case scCouldNotCompute:
1991	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1992	case scUnknown:
1993	case scVScale:
1994	// Assume to be zero-cost.
1995	return false;
1996	case scConstant: {
1997	// Only evalulate the costs of constants when optimizing for size.
1998	if (CostKind != TargetTransformInfo::TCK_CodeSize)
1999	return false;
2000	const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
2001	Type *Ty = S->getType();
2002	Cost += TTI.getIntImmCostInst(
2003	Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
2004	return Cost > Budget;
2005	}
2006	case scTruncate:
2007	case scPtrToInt:
2008	case scZeroExtend:
2009	case scSignExtend: {
2010	Cost +=
2011	costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2012	return false; // Will answer upon next entry into this function.
2013	}
2014	case scUDivExpr: {
2015	// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2016	// HowManyLessThans produced to compute a precise expression, rather than a
2017	// UDiv from the user's code. If we can't find a UDiv in the code with some
2018	// simple searching, we need to account for it's cost.
2019
2020	// At the beginning of this function we already tried to find existing
2021	// value for plain 'S'. Now try to lookup 'S + 1' since it is common
2022	// pattern involving division. This is just a simple search heuristic.
2023	if (hasRelatedExistingExpansion(
2024	S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: `1`)), At: &At, L))
2025	return false; // Consider it to be free.
2026
2027	Cost +=
2028	costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2029	return false; // Will answer upon next entry into this function.
2030	}
2031	case scAddExpr:
2032	case scMulExpr:
2033	case scUMaxExpr:
2034	case scSMaxExpr:
2035	case scUMinExpr:
2036	case scSMinExpr:
2037	case scSequentialUMinExpr: {
2038	assert(cast<SCEVNAryExpr>(S)->getNumOperands() > `1` &&
2039	"Nary expr should have more than 1 operand.");
2040	// The simple nary expr will require one less op (or pair of ops)
2041	// than the number of it's terms.
2042	Cost +=
2043	costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2044	return Cost > Budget;
2045	}
2046	case scAddRecExpr: {
2047	assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= `2` &&
2048	"Polynomial should be at least linear");
2049	Cost += costAndCollectOperands<SCEVAddRecExpr>(
2050	WorkItem, TTI, CostKind, Worklist);
2051	return Cost > Budget;
2052	}
2053	}
2054	llvm_unreachable("Unknown SCEV kind!");
2055	}
2056
2057	Value SCEVExpander::expandCodeForPredicate(const* SCEVPredicate *Pred,
2058	Instruction *IP) {
2059	assert(IP);
2060	switch (Pred->getKind()) {
2061	case SCEVPredicate::P_Union:
2062	return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2063	case SCEVPredicate::P_Compare:
2064	return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2065	case SCEVPredicate::P_Wrap: {
2066	auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2067	return expandWrapPredicate(P: AddRecPred, Loc: IP);
2068	}
2069	}
2070	llvm_unreachable("Unknown SCEV predicate type");
2071	}
2072
2073	Value SCEVExpander::expandComparePredicate(const* SCEVComparePredicate *Pred,
2074	Instruction *IP) {
2075	Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2076	Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2077
2078	Builder.SetInsertPoint(IP);
2079	auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2080	auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2081	return I;
2082	}
2083
2084	Value SCEVExpander::generateOverflowCheck(const* SCEVAddRecExpr *AR,
2085	Instruction Loc, bool* Signed) {
2086	assert(AR->isAffine() && "Cannot generate RT check for "
2087	"non-affine expression");
2088
2089	// FIXME: It is highly suspicious that we're ignoring the predicates here.
2090	SmallVector<const SCEVPredicate *, `4`> Pred;
2091	const SCEV *ExitCount =
2092	SE.getPredicatedSymbolicMaxBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2093
2094	assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2095
2096	const SCEV *Step = AR->getStepRecurrence(SE);
2097	const SCEV *Start = AR->getStart();
2098
2099	Type *ARTy = AR->getType();
2100	unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2101	unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2102
2103	// The expression {Start,+,Step} has nusw/nssw if
2104	// Step < 0, Start - \|Step\| Backedge <= Start*
2105	// Step >= 0, Start + \|Step\| Backedge > Start*
2106	// and \|Step\| Backedge doesn't unsigned overflow.*
2107
2108	Builder.SetInsertPoint(Loc);
2109	Value *TripCountVal = expand(S: ExitCount, I: Loc);
2110
2111	IntegerType *Ty =
2112	IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2113
2114	Value *StepValue = expand(S: Step, I: Loc);
2115	Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2116	Value *StartValue = expand(S: Start, I: Loc);
2117
2118	ConstantInt *Zero =
2119	ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2120
2121	Builder.SetInsertPoint(Loc);
2122	// Compute \|Step\|
2123	Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2124	Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2125
2126	// Compute \|Step\| Backedge*
2127	// Compute:
2128	// 1. Start + \|Step\| Backedge < Start*
2129	// 2. Start - \|Step\| Backedge > Start*
2130	//
2131	// And select either 1. or 2. depending on whether step is positive or
2132	// negative. If Step is known to be positive or negative, only create
2133	// either 1. or 2.
2134	auto ComputeEndCheck = [&]() -> Value * {
2135	// Checking <u 0 is always false.
2136	if (!Signed && Start->isZero() && SE.isKnownPositive(S: Step))
2137	return ConstantInt::getFalse(Context&: Loc->getContext());
2138
2139	// Get the backedge taken count and truncate or extended to the AR type.
2140	Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2141
2142	Value MulV, OfMul;
2143	if (Step->isOne()) {
2144	// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2145	// needed, there is never an overflow, so to avoid artificially inflating
2146	// the cost of the check, directly emit the optimized IR.
2147	MulV = TruncTripCount;
2148	OfMul = ConstantInt::getFalse(Context&: MulV->getContext());
2149	} else {
2150	auto *MulF = Intrinsic::getDeclaration(M: Loc->getModule(),
2151	id: Intrinsic::umul_with_overflow, Tys: Ty);
2152	CallInst *Mul =
2153	Builder.CreateCall(Callee: MulF, Args: {AbsStep, TruncTripCount}, Name: "mul");
2154	MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: `0`, Name: "mul.result");
2155	OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: `1`, Name: "mul.overflow");
2156	}
2157
2158	Value Add = nullptr, Sub = nullptr;
2159	bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2160	bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2161
2162	if (isa<PointerType>(Val: ARTy)) {
2163	Value *NegMulV = Builder.CreateNeg(V: MulV);
2164	if (NeedPosCheck)
2165	Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2166	if (NeedNegCheck)
2167	Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2168	} else {
2169	if (NeedPosCheck)
2170	Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2171	if (NeedNegCheck)
2172	Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2173	}
2174
2175	Value EndCompareLT = nullptr*;
2176	Value EndCompareGT = nullptr*;
2177	Value EndCheck = nullptr*;
2178	if (NeedPosCheck)
2179	EndCheck = EndCompareLT = Builder.CreateICmp(
2180	P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2181	if (NeedNegCheck)
2182	EndCheck = EndCompareGT = Builder.CreateICmp(
2183	P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2184	if (NeedPosCheck && NeedNegCheck) {
2185	// Select the answer based on the sign of Step.
2186	EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2187	}
2188	return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2189	};
2190	Value *EndCheck = ComputeEndCheck ();
2191
2192	// If the backedge taken count type is larger than the AR type,
2193	// check that we don't drop any bits by truncating it. If we are
2194	// dropping bits, then we have overflow (unless the step is zero).
2195	if (SrcBits > DstBits) {
2196	auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2197	auto *BackedgeCheck =
2198	Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2199	RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2200	BackedgeCheck = Builder.CreateAnd(
2201	LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2202
2203	EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2204	}
2205
2206	return EndCheck;
2207	}
2208
2209	Value SCEVExpander::expandWrapPredicate(const* SCEVWrapPredicate *Pred,
2210	Instruction *IP) {
2211	const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2212	Value NSSWCheck = nullptr, NUSWCheck = nullptr;
2213
2214	// Add a check for NUSW
2215	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2216	NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2217
2218	// Add a check for NSSW
2219	if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2220	NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2221
2222	if (NUSWCheck && NSSWCheck)
2223	return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2224
2225	if (NUSWCheck)
2226	return NUSWCheck;
2227
2228	if (NSSWCheck)
2229	return NSSWCheck;
2230
2231	return ConstantInt::getFalse(Context&: IP->getContext());
2232	}
2233
2234	Value SCEVExpander::expandUnionPredicate(const* SCEVUnionPredicate *Union,
2235	Instruction *IP) {
2236	// Loop over all checks in this set.
2237	SmallVector<Value *> Checks;
2238	for (const auto *Pred : Union->getPredicates()) {
2239	Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2240	Builder.SetInsertPoint(IP);
2241	}
2242
2243	if (Checks.empty())
2244	return ConstantInt::getFalse(Context&: IP->getContext());
2245	return Builder.CreateOr(Ops: Checks);
2246	}
2247
2248	Value SCEVExpander::fixupLCSSAFormFor(Value V) {
2249	auto *DefI = dyn_cast<Instruction>(Val: V);
2250	if (!PreserveLCSSA \|\| !DefI)
2251	return V;
2252
2253	BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2254	Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2255	Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt ->getParent());
2256	if (!DefLoop \|\| UseLoop == DefLoop \|\| DefLoop->contains(L: UseLoop))
2257	return V;
2258
2259	// Create a temporary instruction to at the current insertion point, so we
2260	// can hand it off to the helper to create LCSSA PHIs if required for the
2261	// new use.
2262	// FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2263	// would accept a insertion point and return an LCSSA phi for that
2264	// insertion point, so there is no need to insert & remove the temporary
2265	// instruction.
2266	Type *ToTy;
2267	if (DefI->getType()->isIntegerTy())
2268	ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: `0`);
2269	else
2270	ToTy = Type::getInt32Ty(C&: DefI->getContext());
2271	Instruction *User =
2272	CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2273	auto RemoveUserOnExit =
2274	make_scope_exit(F: [User]() { User->eraseFromParent(); });
2275
2276	SmallVector<Instruction *, `1`> ToUpdate;
2277	ToUpdate.push_back(Elt: DefI);
2278	SmallVector<PHINode *, `16`> PHIsToRemove;
2279	SmallVector<PHINode *, `16`> InsertedPHIs;
2280	formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2281	InsertedPHIs: &InsertedPHIs);
2282	for (PHINode *PN : InsertedPHIs)
2283	rememberInstruction(I: PN);
2284	for (PHINode *PN : PHIsToRemove) {
2285	if (!PN->use_empty())
2286	continue;
2287	InsertedValues.erase(V: PN);
2288	InsertedPostIncValues.erase(V: PN);
2289	PN->eraseFromParent();
2290	}
2291
2292	return User->getOperand(i: `0`);
2293	}
2294
2295	namespace {
2296	// Search for a SCEV subexpression that is not safe to expand. Any expression
2297	// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2298	// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2299	// instruction, but the important thing is that we prove the denominator is
2300	// nonzero before expansion.
2301	//
2302	// IVUsers already checks that IV-derived expressions are safe. So this check is
2303	// only needed when the expression includes some subexpression that is not IV
2304	// derived.
2305	//
2306	// Currently, we only allow division by a value provably non-zero here.
2307	//
2308	// We cannot generally expand recurrences unless the step dominates the loop
2309	// header. The expander handles the special case of affine recurrences by
2310	// scaling the recurrence outside the loop, but this technique isn't generally
2311	// applicable. Expanding a nested recurrence outside a loop requires computing
2312	// binomial coefficients. This could be done, but the recurrence has to be in a
2313	// perfectly reduced form, which can't be guaranteed.
2314	struct SCEVFindUnsafe {
2315	ScalarEvolution &SE;
2316	bool CanonicalMode;
2317	bool IsUnsafe = false;
2318
2319	SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2320	: SE(SE), CanonicalMode(CanonicalMode) {}
2321
2322	bool follow(const SCEV *S) {
2323	if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2324	if (!SE.isKnownNonZero(S: D->getRHS())) {
2325	IsUnsafe = true;
2326	return false;
2327	}
2328	}
2329	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2330	// For non-affine addrecs or in non-canonical mode we need a preheader
2331	// to insert into.
2332	if (!AR->getLoop()->getLoopPreheader() &&
2333	(!CanonicalMode \|\| !AR->isAffine())) {
2334	IsUnsafe = true;
2335	return false;
2336	}
2337	}
2338	return true;
2339	}
2340	bool isDone() const { return IsUnsafe; }
2341	};
2342	} // namespace
2343
2344	bool SCEVExpander::isSafeToExpand(const SCEV S) const* {
2345	SCEVFindUnsafe Search(SE, CanonicalMode);
2346	visitAll(Root: S, Visitor&: Search);
2347	return !Search.IsUnsafe;
2348	}
2349
2350	bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2351	const Instruction InsertionPoint) const* {
2352	if (!isSafeToExpand(S))
2353	return false;
2354	// We have to prove that the expanded site of S dominates InsertionPoint.
2355	// This is easy when not in the same block, but hard when S is an instruction
2356	// to be expanded somewhere inside the same block as our insertion point.
2357	// What we really need here is something analogous to an OrderedBasicBlock,
2358	// but for the moment, we paper over the problem by handling two common and
2359	// cheap to check cases.
2360	if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2361	return true;
2362	if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2363	if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2364	return true;
2365	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2366	if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2367	return true;
2368	}
2369	return false;
2370	}
2371
2372	void SCEVExpanderCleaner::cleanup() {
2373	// Result is used, nothing to remove.
2374	if (ResultUsed)
2375	return;
2376
2377	// Restore original poison flags.
2378	for (auto [I, Flags] : Expander.OrigFlags)
2379	Flags.apply(I);
2380
2381	auto InsertedInstructions = Expander.getAllInsertedInstructions();
2382	#ifndef NDEBUG
2383	SmallPtrSet<Instruction *, `8`> InsertedSet(InsertedInstructions.begin(),
2384	InsertedInstructions.end());
2385	(void)InsertedSet;
2386	#endif
2387	// Remove sets with value handles.
2388	Expander.clear();
2389
2390	// Remove all inserted instructions.
2391	for (Instruction *I : reverse(C&: InsertedInstructions)) {
2392	#ifndef NDEBUG
2393	assert(all_of(I->users(),
2394	[&InsertedSet](Value *U) {
2395	return InsertedSet.contains(cast<Instruction>(U));
2396	}) &&
2397	"removed instruction should only be used by instructions inserted "
2398	"during expansion");
2399	#endif
2400	assert(!I->getType()->isVoidTy() &&
2401	"inserted instruction should have non-void types");
2402	I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2403	I->eraseFromParent();
2404	}
2405	}
2406

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp