1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Transforms/Utils/LoopUtils.h"
31
32#if LLVM_ENABLE_ABI_BREAKING_CHECKS
33#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
34#else
35#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
36#endif
37
38using namespace llvm;
39
40cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
41 "scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: 4),
42 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
43 "controls the budget that is considered cheap (default = 4)"));
44
45using namespace PatternMatch;
46using namespace SCEVPatternMatch;
47
48PoisonFlags::PoisonFlags(const Instruction *I) {
49 NUW = false;
50 NSW = false;
51 Exact = false;
52 Disjoint = false;
53 NNeg = false;
54 SameSign = false;
55 GEPNW = GEPNoWrapFlags::none();
56 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
57 NUW = OBO->hasNoUnsignedWrap();
58 NSW = OBO->hasNoSignedWrap();
59 }
60 if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
61 Exact = PEO->isExact();
62 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
63 Disjoint = PDI->isDisjoint();
64 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
65 NNeg = PNI->hasNonNeg();
66 if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
67 NUW = TI->hasNoUnsignedWrap();
68 NSW = TI->hasNoSignedWrap();
69 }
70 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
71 GEPNW = GEP->getNoWrapFlags();
72 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
73 SameSign = ICmp->hasSameSign();
74}
75
76void PoisonFlags::apply(Instruction *I) {
77 if (isa<OverflowingBinaryOperator>(Val: I)) {
78 I->setHasNoUnsignedWrap(NUW);
79 I->setHasNoSignedWrap(NSW);
80 }
81 if (isa<PossiblyExactOperator>(Val: I))
82 I->setIsExact(Exact);
83 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
84 PDI->setIsDisjoint(Disjoint);
85 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
86 PNI->setNonNeg(NNeg);
87 if (isa<TruncInst>(Val: I)) {
88 I->setHasNoUnsignedWrap(NUW);
89 I->setHasNoSignedWrap(NSW);
90 }
91 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
92 GEP->setNoWrapFlags(GEPNW);
93 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
94 ICmp->setSameSign(SameSign);
95}
96
97/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
98/// reusing an existing cast if a suitable one (= dominating IP) exists, or
99/// creating a new one.
100Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
101 Instruction::CastOps Op,
102 BasicBlock::iterator IP) {
103 // This function must be called with the builder having a valid insertion
104 // point. It doesn't need to be the actual IP where the uses of the returned
105 // cast will be added, but it must dominate such IP.
106 // We use this precondition to produce a cast that will dominate all its
107 // uses. In particular, this is crucial for the case where the builder's
108 // insertion point *is* the point where we were asked to put the cast.
109 // Since we don't know the builder's insertion point is actually
110 // where the uses will be added (only that it dominates it), we are
111 // not allowed to move it.
112 BasicBlock::iterator BIP = Builder.GetInsertPoint();
113
114 Value *Ret = nullptr;
115
116 if (!isa<Constant>(Val: V)) {
117 // Check to see if there is already a cast!
118 for (User *U : V->users()) {
119 if (U->getType() != Ty)
120 continue;
121 CastInst *CI = dyn_cast<CastInst>(Val: U);
122 if (!CI || CI->getOpcode() != Op)
123 continue;
124
125 // Found a suitable cast that is at IP or comes before IP. Use it. Note
126 // that the cast must also properly dominate the Builder's insertion
127 // point.
128 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
129 (&*IP == CI || CI->comesBefore(Other: &*IP))) {
130 Ret = CI;
131 break;
132 }
133 }
134 }
135
136 // Create a new cast.
137 if (!Ret) {
138 SCEVInsertPointGuard Guard(Builder, this);
139 Builder.SetInsertPoint(&*IP);
140 Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
141 }
142
143 // We assert at the end of the function since IP might point to an
144 // instruction with different dominance properties than a cast
145 // (an invoke for example) and not dominate BIP (but the cast does).
146 assert(!isa<Instruction>(Ret) ||
147 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
148
149 return Ret;
150}
151
152BasicBlock::iterator
153SCEVExpander::findInsertPointAfter(Instruction *I,
154 Instruction *MustDominate) const {
155 BasicBlock::iterator IP = ++I->getIterator();
156 if (auto *II = dyn_cast<InvokeInst>(Val: I))
157 IP = II->getNormalDest()->begin();
158
159 while (isa<PHINode>(Val: IP))
160 ++IP;
161
162 if (isa<FuncletPadInst>(Val: IP) || isa<LandingPadInst>(Val: IP)) {
163 ++IP;
164 } else if (isa<CatchSwitchInst>(Val: IP)) {
165 IP = MustDominate->getParent()->getFirstInsertionPt();
166 } else {
167 assert(!IP->isEHPad() && "unexpected eh pad!");
168 }
169
170 // Adjust insert point to be after instructions inserted by the expander, so
171 // we can re-use already inserted instructions. Avoid skipping past the
172 // original \p MustDominate, in case it is an inserted instruction.
173 while (isInsertedInstruction(I: &*IP) && &*IP != MustDominate)
174 ++IP;
175
176 return IP;
177}
178
179void SCEVExpander::eraseDeadInstructions(Value *Root) {
180 SmallVector<Value *> WorkList;
181 SmallPtrSet<Value *, 8> DeletedValues;
182 append_range(C&: WorkList, R: getAllInsertedInstructions());
183 while (!WorkList.empty()) {
184 Value *V = WorkList.pop_back_val();
185 if (DeletedValues.contains(Ptr: V))
186 continue;
187 auto *I = dyn_cast<Instruction>(Val: V);
188 if (!I || I == Root || !isInsertedInstruction(I) ||
189 !isInstructionTriviallyDead(I))
190 continue;
191 append_range(C&: WorkList, R: I->operands());
192 InsertedValues.erase(V: I);
193 InsertedPostIncValues.erase(V: I);
194 DeletedValues.insert(Ptr: I);
195 I->eraseFromParent();
196 }
197}
198
199BasicBlock::iterator
200SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
201 // Cast the argument at the beginning of the entry block, after
202 // any bitcasts of other arguments.
203 if (Argument *A = dyn_cast<Argument>(Val: V)) {
204 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
205 while ((isa<BitCastInst>(Val: IP) &&
206 isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0)) &&
207 cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0) != A))
208 ++IP;
209 return IP;
210 }
211
212 // Cast the instruction immediately after the instruction.
213 if (Instruction *I = dyn_cast<Instruction>(Val: V))
214 return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
215
216 // Otherwise, this must be some kind of a constant,
217 // so let's plop this cast into the function's entry block.
218 assert(isa<Constant>(V) &&
219 "Expected the cast argument to be a global/constant");
220 return Builder.GetInsertBlock()
221 ->getParent()
222 ->getEntryBlock()
223 .getFirstInsertionPt();
224}
225
226/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
227/// which must be possible with a noop cast, doing what we can to share
228/// the casts.
229Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
230 Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
231 assert((Op == Instruction::BitCast ||
232 Op == Instruction::PtrToInt ||
233 Op == Instruction::IntToPtr) &&
234 "InsertNoopCastOfTo cannot perform non-noop casts!");
235 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
236 "InsertNoopCastOfTo cannot change sizes!");
237
238 // inttoptr only works for integral pointers. For non-integral pointers, we
239 // can create a GEP on null with the integral value as index. Note that
240 // it is safe to use GEP of null instead of inttoptr here, because only
241 // expressions already based on a GEP of null should be converted to pointers
242 // during expansion.
243 if (Op == Instruction::IntToPtr) {
244 auto *PtrTy = cast<PointerType>(Val: Ty);
245 if (DL.isNonIntegralPointerType(PT: PtrTy))
246 return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
247 }
248 // Short-circuit unnecessary bitcasts.
249 if (Op == Instruction::BitCast) {
250 if (V->getType() == Ty)
251 return V;
252 if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
253 if (CI->getOperand(i_nocapture: 0)->getType() == Ty)
254 return CI->getOperand(i_nocapture: 0);
255 }
256 }
257 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
258 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
259 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
260 if (CastInst *CI = dyn_cast<CastInst>(Val: V))
261 if ((CI->getOpcode() == Instruction::PtrToInt ||
262 CI->getOpcode() == Instruction::IntToPtr) &&
263 SE.getTypeSizeInBits(Ty: CI->getType()) ==
264 SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: 0)->getType()))
265 return CI->getOperand(i_nocapture: 0);
266 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
267 if ((CE->getOpcode() == Instruction::PtrToInt ||
268 CE->getOpcode() == Instruction::IntToPtr) &&
269 SE.getTypeSizeInBits(Ty: CE->getType()) ==
270 SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: 0)->getType()))
271 return CE->getOperand(i_nocapture: 0);
272 }
273
274 // Fold a cast of a constant.
275 if (Constant *C = dyn_cast<Constant>(Val: V))
276 return ConstantExpr::getCast(ops: Op, C, Ty);
277
278 // Try to reuse existing cast, or insert one.
279 return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
280}
281
282/// InsertBinop - Insert the specified binary operator, doing a small amount
283/// of work to avoid inserting an obviously redundant operation, and hoisting
284/// to an outer loop when the opportunity is there and it is safe.
285Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
286 Value *LHS, Value *RHS,
287 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
288 // Fold a binop with constant operands.
289 if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
290 if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
291 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
292 return Res;
293
294 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
295 unsigned ScanLimit = 6;
296 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
297 // Scanning starts from the last instruction before the insertion point.
298 BasicBlock::iterator IP = Builder.GetInsertPoint();
299 if (IP != BlockBegin) {
300 --IP;
301 for (; ScanLimit; --IP, --ScanLimit) {
302 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
303 // Ensure that no-wrap flags match.
304 if (isa<OverflowingBinaryOperator>(Val: I)) {
305 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
306 return true;
307 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
308 return true;
309 }
310 // Conservatively, do not use any instruction which has any of exact
311 // flags installed.
312 if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
313 return true;
314 return false;
315 };
316 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(i: 0) == LHS &&
317 IP->getOperand(i: 1) == RHS && !canGenerateIncompatiblePoison(&*IP))
318 return &*IP;
319 if (IP == BlockBegin) break;
320 }
321 }
322
323 // Save the original insertion point so we can restore it when we're done.
324 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
325 SCEVInsertPointGuard Guard(Builder, this);
326
327 if (IsSafeToHoist) {
328 // Move the insertion point out of as many loops as we can.
329 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
330 if (!L->isLoopInvariant(V: LHS) || !L->isLoopInvariant(V: RHS)) break;
331 BasicBlock *Preheader = L->getLoopPreheader();
332 if (!Preheader) break;
333
334 // Ok, move up a level.
335 Builder.SetInsertPoint(Preheader->getTerminator());
336 }
337 }
338
339 // If we haven't found this binop, insert it.
340 // TODO: Use the Builder, which will make CreateBinOp below fold with
341 // InstSimplifyFolder.
342 Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
343 BO->setDebugLoc(Loc);
344 if (Flags & SCEV::FlagNUW)
345 BO->setHasNoUnsignedWrap();
346 if (Flags & SCEV::FlagNSW)
347 BO->setHasNoSignedWrap();
348
349 return BO;
350}
351
352/// expandAddToGEP - Expand an addition expression with a pointer type into
353/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
354/// BasicAliasAnalysis and other passes analyze the result. See the rules
355/// for getelementptr vs. inttoptr in
356/// http://llvm.org/docs/LangRef.html#pointeraliasing
357/// for details.
358///
359/// Design note: The correctness of using getelementptr here depends on
360/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
361/// they may introduce pointer arithmetic which may not be safely converted
362/// into getelementptr.
363///
364/// Design note: It might seem desirable for this function to be more
365/// loop-aware. If some of the indices are loop-invariant while others
366/// aren't, it might seem desirable to emit multiple GEPs, keeping the
367/// loop-invariant portions of the overall computation outside the loop.
368/// However, there are a few reasons this is not done here. Hoisting simple
369/// arithmetic is a low-level optimization that often isn't very
370/// important until late in the optimization process. In fact, passes
371/// like InstructionCombining will combine GEPs, even if it means
372/// pushing loop-invariant computation down into loops, so even if the
373/// GEPs were split here, the work would quickly be undone. The
374/// LoopStrengthReduction pass, which is usually run quite late (and
375/// after the last InstructionCombining pass), takes care of hoisting
376/// loop-invariant portions of expressions, after considering what
377/// can be folded using target addressing modes.
378///
379Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V,
380 SCEV::NoWrapFlags Flags) {
381 assert(!isa<Instruction>(V) ||
382 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
383
384 Value *Idx = expand(S: Offset);
385 GEPNoWrapFlags NW = (Flags & SCEV::FlagNUW) ? GEPNoWrapFlags::noUnsignedWrap()
386 : GEPNoWrapFlags::none();
387
388 // Fold a GEP with constant operands.
389 if (Constant *CLHS = dyn_cast<Constant>(Val: V))
390 if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
391 return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS, Name: "", NW);
392
393 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
394 unsigned ScanLimit = 6;
395 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
396 // Scanning starts from the last instruction before the insertion point.
397 BasicBlock::iterator IP = Builder.GetInsertPoint();
398 if (IP != BlockBegin) {
399 --IP;
400 for (; ScanLimit; --IP, --ScanLimit) {
401 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val&: IP)) {
402 if (GEP->getPointerOperand() == V &&
403 GEP->getSourceElementType() == Builder.getInt8Ty() &&
404 GEP->getOperand(i_nocapture: 1) == Idx) {
405 rememberFlags(I: GEP);
406 GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
407 return &*IP;
408 }
409 }
410 if (IP == BlockBegin) break;
411 }
412 }
413
414 // Save the original insertion point so we can restore it when we're done.
415 SCEVInsertPointGuard Guard(Builder, this);
416
417 // Move the insertion point out of as many loops as we can.
418 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
419 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(V: Idx)) break;
420 BasicBlock *Preheader = L->getLoopPreheader();
421 if (!Preheader) break;
422
423 // Ok, move up a level.
424 Builder.SetInsertPoint(Preheader->getTerminator());
425 }
426
427 // Emit a GEP.
428 return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep", NW);
429}
430
431/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
432/// SCEV expansion. If they are nested, this is the most nested. If they are
433/// neighboring, pick the later.
434static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
435 DominatorTree &DT) {
436 if (!A) return B;
437 if (!B) return A;
438 if (A->contains(L: B)) return B;
439 if (B->contains(L: A)) return A;
440 if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
441 if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
442 return A; // Arbitrarily break the tie.
443}
444
445/// getRelevantLoop - Get the most relevant loop associated with the given
446/// expression, according to PickMostRelevantLoop.
447const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
448 // Test whether we've already computed the most relevant loop for this SCEV.
449 auto Pair = RelevantLoops.try_emplace(Key: S);
450 if (!Pair.second)
451 return Pair.first->second;
452
453 switch (S->getSCEVType()) {
454 case scConstant:
455 case scVScale:
456 return nullptr; // A constant has no relevant loops.
457 case scTruncate:
458 case scZeroExtend:
459 case scSignExtend:
460 case scPtrToAddr:
461 case scPtrToInt:
462 case scAddExpr:
463 case scMulExpr:
464 case scUDivExpr:
465 case scAddRecExpr:
466 case scUMaxExpr:
467 case scSMaxExpr:
468 case scUMinExpr:
469 case scSMinExpr:
470 case scSequentialUMinExpr: {
471 const Loop *L = nullptr;
472 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
473 L = AR->getLoop();
474 for (const SCEV *Op : S->operands())
475 L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
476 return RelevantLoops[S] = L;
477 }
478 case scUnknown: {
479 const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
480 if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
481 return Pair.first->second = SE.LI.getLoopFor(BB: I->getParent());
482 // A non-instruction has no relevant loops.
483 return nullptr;
484 }
485 case scCouldNotCompute:
486 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
487 }
488 llvm_unreachable("Unexpected SCEV type!");
489}
490
491namespace {
492
493/// LoopCompare - Compare loops by PickMostRelevantLoop.
494class LoopCompare {
495 DominatorTree &DT;
496public:
497 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
498
499 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
500 std::pair<const Loop *, const SCEV *> RHS) const {
501 // Keep pointer operands sorted at the end.
502 if (LHS.second->getType()->isPointerTy() !=
503 RHS.second->getType()->isPointerTy())
504 return LHS.second->getType()->isPointerTy();
505
506 // Compare loops with PickMostRelevantLoop.
507 if (LHS.first != RHS.first)
508 return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
509
510 // If one operand is a non-constant negative and the other is not,
511 // put the non-constant negative on the right so that a sub can
512 // be used instead of a negate and add.
513 if (LHS.second->isNonConstantNegative()) {
514 if (!RHS.second->isNonConstantNegative())
515 return false;
516 } else if (RHS.second->isNonConstantNegative())
517 return true;
518
519 // Otherwise they are equivalent according to this comparison.
520 return false;
521 }
522};
523
524}
525
526Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
527 // Recognize the canonical representation of an unsimplifed urem.
528 const SCEV *URemLHS = nullptr;
529 const SCEV *URemRHS = nullptr;
530 if (match(V: S, P: m_scev_URem(LHS: m_SCEV(V&: URemLHS), RHS: m_SCEV(V&: URemRHS), SE))) {
531 Value *LHS = expand(S: URemLHS);
532 Value *RHS = expand(S: URemRHS);
533 return InsertBinop(Opcode: Instruction::URem, LHS, RHS, Flags: SCEV::FlagAnyWrap,
534 /*IsSafeToHoist*/ false);
535 }
536
537 // Collect all the add operands in a loop, along with their associated loops.
538 // Iterate in reverse so that constants are emitted last, all else equal, and
539 // so that pointer operands are inserted first, which the code below relies on
540 // to form more involved GEPs.
541 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
542 for (const SCEV *Op : reverse(C: S->operands()))
543 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
544
545 // Sort by loop. Use a stable sort so that constants follow non-constants and
546 // pointer operands precede non-pointer operands.
547 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
548
549 // Emit instructions to add all the operands. Hoist as much as possible
550 // out of loops, and form meaningful getelementptrs where possible.
551 Value *Sum = nullptr;
552 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
553 const Loop *CurLoop = I->first;
554 const SCEV *Op = I->second;
555 if (!Sum) {
556 // This is the first operand. Just expand it.
557 Sum = expand(S: Op);
558 ++I;
559 continue;
560 }
561
562 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
563 if (isa<PointerType>(Val: Sum->getType())) {
564 // The running sum expression is a pointer. Try to form a getelementptr
565 // at this level with that as the base.
566 SmallVector<const SCEV *, 4> NewOps;
567 for (; I != E && I->first == CurLoop; ++I) {
568 // If the operand is SCEVUnknown and not instructions, peek through
569 // it, to enable more of it to be folded into the GEP.
570 const SCEV *X = I->second;
571 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
572 if (!isa<Instruction>(Val: U->getValue()))
573 X = SE.getSCEV(V: U->getValue());
574 NewOps.push_back(Elt: X);
575 }
576 Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum, Flags: S->getNoWrapFlags());
577 } else if (Op->isNonConstantNegative()) {
578 // Instead of doing a negate and add, just do a subtract.
579 Value *W = expand(S: SE.getNegativeSCEV(V: Op));
580 Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
581 /*IsSafeToHoist*/ true);
582 ++I;
583 } else {
584 // A simple add.
585 Value *W = expand(S: Op);
586 // Canonicalize a constant to the RHS.
587 if (isa<Constant>(Val: Sum))
588 std::swap(a&: Sum, b&: W);
589 Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
590 /*IsSafeToHoist*/ true);
591 ++I;
592 }
593 }
594
595 return Sum;
596}
597
598Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
599 Type *Ty = S->getType();
600
601 // Collect all the mul operands in a loop, along with their associated loops.
602 // Iterate in reverse so that constants are emitted last, all else equal.
603 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
604 for (const SCEV *Op : reverse(C: S->operands()))
605 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
606
607 // Sort by loop. Use a stable sort so that constants follow non-constants.
608 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
609
610 // Emit instructions to mul all the operands. Hoist as much as possible
611 // out of loops.
612 Value *Prod = nullptr;
613 auto I = OpsAndLoops.begin();
614
615 // Expand the calculation of X pow N in the following manner:
616 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
617 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
618 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
619 auto E = I;
620 // Calculate how many times the same operand from the same loop is included
621 // into this power.
622 uint64_t Exponent = 0;
623 const uint64_t MaxExponent = UINT64_MAX >> 1;
624 // No one sane will ever try to calculate such huge exponents, but if we
625 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
626 // below when the power of 2 exceeds our Exponent, and we want it to be
627 // 1u << 31 at most to not deal with unsigned overflow.
628 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
629 ++Exponent;
630 ++E;
631 }
632 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
633
634 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
635 // that are needed into the result.
636 Value *P = expand(S: I->second);
637 Value *Result = nullptr;
638 if (Exponent & 1)
639 Result = P;
640 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
641 P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
642 /*IsSafeToHoist*/ true);
643 if (Exponent & BinExp)
644 Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
645 Flags: SCEV::FlagAnyWrap,
646 /*IsSafeToHoist*/ true)
647 : P;
648 }
649
650 I = E;
651 assert(Result && "Nothing was expanded?");
652 return Result;
653 };
654
655 while (I != OpsAndLoops.end()) {
656 if (!Prod) {
657 // This is the first operand. Just expand it.
658 Prod = ExpandOpBinPowN();
659 } else if (I->second->isAllOnesValue()) {
660 // Instead of doing a multiply by negative one, just do a negate.
661 Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
662 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
663 ++I;
664 } else {
665 // A simple mul.
666 Value *W = ExpandOpBinPowN();
667 // Canonicalize a constant to the RHS.
668 if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
669 const APInt *RHS;
670 if (match(V: W, P: m_Power2(V&: RHS))) {
671 // Canonicalize Prod*(1<<C) to Prod<<C.
672 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
673 auto NWFlags = S->getNoWrapFlags();
674 // clear nsw flag if shl will produce poison value.
675 if (RHS->logBase2() == RHS->getBitWidth() - 1)
676 NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
677 Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
678 RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
679 /*IsSafeToHoist*/ true);
680 } else {
681 Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
682 /*IsSafeToHoist*/ true);
683 }
684 }
685 }
686
687 return Prod;
688}
689
690Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
691 Value *LHS = expand(S: S->getLHS());
692 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
693 const APInt &RHS = SC->getAPInt();
694 if (RHS.isPowerOf2())
695 return InsertBinop(Opcode: Instruction::LShr, LHS,
696 RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
697 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
698 }
699
700 const SCEV *RHSExpr = S->getRHS();
701 Value *RHS = expand(S: RHSExpr);
702 if (SafeUDivMode) {
703 bool GuaranteedNotPoison =
704 ScalarEvolution::isGuaranteedNotToBePoison(Op: RHSExpr);
705 if (!GuaranteedNotPoison)
706 RHS = Builder.CreateFreeze(V: RHS);
707
708 // We need an umax if either RHSExpr is not known to be zero, or if it is
709 // not guaranteed to be non-poison. In the later case, the frozen poison may
710 // be 0.
711 if (!SE.isKnownNonZero(S: RHSExpr) || !GuaranteedNotPoison)
712 RHS = Builder.CreateIntrinsic(RetTy: RHS->getType(), ID: Intrinsic::umax,
713 Args: {RHS, ConstantInt::get(Ty: RHS->getType(), V: 1)});
714 }
715 return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
716 /*IsSafeToHoist*/ SE.isKnownNonZero(S: S->getRHS()));
717}
718
719/// Determine if this is a well-behaved chain of instructions leading back to
720/// the PHI. If so, it may be reused by expanded expressions.
721bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
722 const Loop *L) {
723 if (IncV->getNumOperands() == 0 || isa<PHINode>(Val: IncV) ||
724 (isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
725 return false;
726 // If any of the operands don't dominate the insert position, bail.
727 // Addrec operands are always loop-invariant, so this can only happen
728 // if there are instructions which haven't been hoisted.
729 if (L == IVIncInsertLoop) {
730 for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
731 if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
732 if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
733 return false;
734 }
735 // Advance to the next instruction.
736 IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
737 if (!IncV)
738 return false;
739
740 if (IncV->mayHaveSideEffects())
741 return false;
742
743 if (IncV == PN)
744 return true;
745
746 return isNormalAddRecExprPHI(PN, IncV, L);
747}
748
749/// getIVIncOperand returns an induction variable increment's induction
750/// variable operand.
751///
752/// If allowScale is set, any type of GEP is allowed as long as the nonIV
753/// operands dominate InsertPos.
754///
755/// If allowScale is not set, ensure that a GEP increment conforms to one of the
756/// simple patterns generated by getAddRecExprPHILiterally and
757/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
758Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
759 Instruction *InsertPos,
760 bool allowScale) {
761 if (IncV == InsertPos)
762 return nullptr;
763
764 switch (IncV->getOpcode()) {
765 default:
766 return nullptr;
767 // Check for a simple Add/Sub or GEP of a loop invariant step.
768 case Instruction::Add:
769 case Instruction::Sub: {
770 Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: 1));
771 if (!OInst || SE.DT.dominates(Def: OInst, User: InsertPos))
772 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
773 return nullptr;
774 }
775 case Instruction::BitCast:
776 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
777 case Instruction::GetElementPtr:
778 for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
779 if (isa<Constant>(Val: U))
780 continue;
781 if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
782 if (!SE.DT.dominates(Def: OInst, User: InsertPos))
783 return nullptr;
784 }
785 if (allowScale) {
786 // allow any kind of GEP as long as it can be hoisted.
787 continue;
788 }
789 // GEPs produced by SCEVExpander use i8 element type.
790 if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: 8))
791 return nullptr;
792 break;
793 }
794 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
795 }
796}
797
798/// If the insert point of the current builder or any of the builders on the
799/// stack of saved builders has 'I' as its insert point, update it to point to
800/// the instruction after 'I'. This is intended to be used when the instruction
801/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
802/// different block, the inconsistent insert point (with a mismatched
803/// Instruction and Block) can lead to an instruction being inserted in a block
804/// other than its parent.
805void SCEVExpander::fixupInsertPoints(Instruction *I) {
806 BasicBlock::iterator It(*I);
807 BasicBlock::iterator NewInsertPt = std::next(x: It);
808 if (Builder.GetInsertPoint() == It)
809 Builder.SetInsertPoint(&*NewInsertPt);
810 for (auto *InsertPtGuard : InsertPointGuards)
811 if (InsertPtGuard->GetInsertPoint() == It)
812 InsertPtGuard->SetInsertPoint(NewInsertPt);
813}
814
815/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
816/// it available to other uses in this loop. Recursively hoist any operands,
817/// until we reach a value that dominates InsertPos.
818bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
819 bool RecomputePoisonFlags) {
820 auto FixupPoisonFlags = [this](Instruction *I) {
821 // Drop flags that are potentially inferred from old context and infer flags
822 // in new context.
823 rememberFlags(I);
824 I->dropPoisonGeneratingFlags();
825 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
826 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
827 auto *BO = cast<BinaryOperator>(Val: I);
828 BO->setHasNoUnsignedWrap(
829 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
830 BO->setHasNoSignedWrap(
831 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
832 }
833 };
834
835 if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
836 if (RecomputePoisonFlags)
837 FixupPoisonFlags(IncV);
838 return true;
839 }
840
841 // InsertPos must itself dominate IncV so that IncV's new position satisfies
842 // its existing users.
843 if (isa<PHINode>(Val: InsertPos) ||
844 !SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
845 return false;
846
847 if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
848 return false;
849
850 // Check that the chain of IV operands leading back to Phi can be hoisted.
851 SmallVector<Instruction*, 4> IVIncs;
852 for(;;) {
853 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
854 if (!Oper)
855 return false;
856 // IncV is safe to hoist.
857 IVIncs.push_back(Elt: IncV);
858 IncV = Oper;
859 if (SE.DT.dominates(Def: IncV, User: InsertPos))
860 break;
861 }
862 for (Instruction *I : llvm::reverse(C&: IVIncs)) {
863 fixupInsertPoints(I);
864 I->moveBefore(InsertPos: InsertPos->getIterator());
865 if (RecomputePoisonFlags)
866 FixupPoisonFlags(I);
867 }
868 return true;
869}
870
871bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
872 PHINode *WidePhi,
873 Instruction *OrigInc,
874 Instruction *WideInc) {
875 return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
876 match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
877 OrigInc->getOpcode() == WideInc->getOpcode();
878}
879
880/// Determine if this cyclic phi is in a form that would have been generated by
881/// LSR. We don't care if the phi was actually expanded in this pass, as long
882/// as it is in a low-cost form, for example, no implied multiplication. This
883/// should match any patterns generated by getAddRecExprPHILiterally and
884/// expandAddtoGEP.
885bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
886 const Loop *L) {
887 for(Instruction *IVOper = IncV;
888 (IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
889 /*allowScale=*/false));) {
890 if (IVOper == PN)
891 return true;
892 }
893 return false;
894}
895
896/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
897/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
898/// need to materialize IV increments elsewhere to handle difficult situations.
899Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
900 bool useSubtract) {
901 Value *IncV;
902 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
903 if (PN->getType()->isPointerTy()) {
904 // TODO: Change name to IVName.iv.next.
905 IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
906 } else {
907 IncV = useSubtract ?
908 Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next") :
909 Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next");
910 }
911 return IncV;
912}
913
914/// Check whether we can cheaply express the requested SCEV in terms of
915/// the available PHI SCEV by truncation and/or inversion of the step.
916static bool canBeCheaplyTransformed(ScalarEvolution &SE,
917 const SCEVAddRecExpr *Phi,
918 const SCEVAddRecExpr *Requested,
919 bool &InvertStep) {
920 // We can't transform to match a pointer PHI.
921 Type *PhiTy = Phi->getType();
922 Type *RequestedTy = Requested->getType();
923 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
924 return false;
925
926 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
927 return false;
928
929 // Try truncate it if necessary.
930 Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
931 if (!Phi)
932 return false;
933
934 // Check whether truncation will help.
935 if (Phi == Requested) {
936 InvertStep = false;
937 return true;
938 }
939
940 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
941 if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
942 InvertStep = true;
943 return true;
944 }
945
946 return false;
947}
948
949static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
950 if (!isa<IntegerType>(Val: AR->getType()))
951 return false;
952
953 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
954 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
955 const SCEV *Step = AR->getStepRecurrence(SE);
956 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
957 RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
958 const SCEV *ExtendAfterOp =
959 SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
960 return ExtendAfterOp == OpAfterExtend;
961}
962
963static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
964 if (!isa<IntegerType>(Val: AR->getType()))
965 return false;
966
967 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
968 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
969 const SCEV *Step = AR->getStepRecurrence(SE);
970 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
971 RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
972 const SCEV *ExtendAfterOp =
973 SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
974 return ExtendAfterOp == OpAfterExtend;
975}
976
977/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
978/// the base addrec, which is the addrec without any non-loop-dominating
979/// values, and return the PHI.
980PHINode *
981SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
982 const Loop *L, Type *&TruncTy,
983 bool &InvertStep) {
984 assert((!IVIncInsertLoop || IVIncInsertPos) &&
985 "Uninitialized insert position");
986
987 // Reuse a previously-inserted PHI, if present.
988 BasicBlock *LatchBlock = L->getLoopLatch();
989 if (LatchBlock) {
990 PHINode *AddRecPhiMatch = nullptr;
991 Instruction *IncV = nullptr;
992 TruncTy = nullptr;
993 InvertStep = false;
994
995 // Only try partially matching scevs that need truncation and/or
996 // step-inversion if we know this loop is outside the current loop.
997 bool TryNonMatchingSCEV =
998 IVIncInsertLoop &&
999 SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
1000
1001 for (PHINode &PN : L->getHeader()->phis()) {
1002 if (!SE.isSCEVable(Ty: PN.getType()))
1003 continue;
1004
1005 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
1006 // PHI has no meaning at all.
1007 if (!PN.isComplete()) {
1008 SCEV_DEBUG_WITH_TYPE(
1009 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
1010 continue;
1011 }
1012
1013 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
1014 if (!PhiSCEV)
1015 continue;
1016
1017 bool IsMatchingSCEV = PhiSCEV == Normalized;
1018 // We only handle truncation and inversion of phi recurrences for the
1019 // expanded expression if the expanded expression's loop dominates the
1020 // loop we insert to. Check now, so we can bail out early.
1021 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
1022 continue;
1023
1024 // TODO: this possibly can be reworked to avoid this cast at all.
1025 Instruction *TempIncV =
1026 dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
1027 if (!TempIncV)
1028 continue;
1029
1030 // Check whether we can reuse this PHI node.
1031 if (LSRMode) {
1032 if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1033 continue;
1034 } else {
1035 if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1036 continue;
1037 }
1038
1039 // Stop if we have found an exact match SCEV.
1040 if (IsMatchingSCEV) {
1041 IncV = TempIncV;
1042 TruncTy = nullptr;
1043 InvertStep = false;
1044 AddRecPhiMatch = &PN;
1045 break;
1046 }
1047
1048 // Try whether the phi can be translated into the requested form
1049 // (truncated and/or offset by a constant).
1050 if ((!TruncTy || InvertStep) &&
1051 canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
1052 // Record the phi node. But don't stop we might find an exact match
1053 // later.
1054 AddRecPhiMatch = &PN;
1055 IncV = TempIncV;
1056 TruncTy = Normalized->getType();
1057 }
1058 }
1059
1060 if (AddRecPhiMatch) {
1061 // Ok, the add recurrence looks usable.
1062 // Remember this PHI, even in post-inc mode.
1063 InsertedValues.insert(V: AddRecPhiMatch);
1064 // Remember the increment.
1065 rememberInstruction(I: IncV);
1066 // Those values were not actually inserted but re-used.
1067 ReusedValues.insert(Ptr: AddRecPhiMatch);
1068 ReusedValues.insert(Ptr: IncV);
1069 return AddRecPhiMatch;
1070 }
1071 }
1072
1073 // Save the original insertion point so we can restore it when we're done.
1074 SCEVInsertPointGuard Guard(Builder, this);
1075
1076 // Another AddRec may need to be recursively expanded below. For example, if
1077 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1078 // loop. Remove this loop from the PostIncLoops set before expanding such
1079 // AddRecs. Otherwise, we cannot find a valid position for the step
1080 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1081 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1082 // so it's not worth implementing SmallPtrSet::swap.
1083 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1084 PostIncLoops.clear();
1085
1086 // Expand code for the start value into the loop preheader.
1087 assert(L->getLoopPreheader() &&
1088 "Can't expand add recurrences without a loop preheader!");
1089 Value *StartV =
1090 expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1091
1092 // StartV must have been be inserted into L's preheader to dominate the new
1093 // phi.
1094 assert(!isa<Instruction>(StartV) ||
1095 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1096 L->getHeader()));
1097
1098 // Expand code for the step value. Do this before creating the PHI so that PHI
1099 // reuse code doesn't see an incomplete PHI.
1100 const SCEV *Step = Normalized->getStepRecurrence(SE);
1101 Type *ExpandTy = Normalized->getType();
1102 // If the stride is negative, insert a sub instead of an add for the increment
1103 // (unless it's a constant, because subtracts of constants are canonicalized
1104 // to adds).
1105 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1106 if (useSubtract)
1107 Step = SE.getNegativeSCEV(V: Step);
1108 // Expand the step somewhere that dominates the loop header.
1109 Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1110
1111 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1112 // we actually do emit an addition. It does not apply if we emit a
1113 // subtraction.
1114 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1115 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1116
1117 // Create the PHI.
1118 BasicBlock *Header = L->getHeader();
1119 Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1120 PHINode *PN =
1121 Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine(IVName) + ".iv");
1122
1123 // Create the step instructions and populate the PHI.
1124 for (BasicBlock *Pred : predecessors(BB: Header)) {
1125 // Add a start value.
1126 if (!L->contains(BB: Pred)) {
1127 PN->addIncoming(V: StartV, BB: Pred);
1128 continue;
1129 }
1130
1131 // Create a step value and add it to the PHI.
1132 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1133 // instructions at IVIncInsertPos.
1134 Instruction *InsertPos = L == IVIncInsertLoop ?
1135 IVIncInsertPos : Pred->getTerminator();
1136 Builder.SetInsertPoint(InsertPos);
1137 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1138
1139 if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1140 if (IncrementIsNUW)
1141 cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1142 if (IncrementIsNSW)
1143 cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1144 }
1145 PN->addIncoming(V: IncV, BB: Pred);
1146 }
1147
1148 // After expanding subexpressions, restore the PostIncLoops set so the caller
1149 // can ensure that IVIncrement dominates the current uses.
1150 PostIncLoops = SavedPostIncLoops;
1151
1152 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1153 // effective when we are able to use an IV inserted here, so record it.
1154 InsertedValues.insert(V: PN);
1155 InsertedIVs.push_back(Elt: PN);
1156 return PN;
1157}
1158
1159Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1160 const Loop *L = S->getLoop();
1161
1162 // Determine a normalized form of this expression, which is the expression
1163 // before any post-inc adjustment is made.
1164 const SCEVAddRecExpr *Normalized = S;
1165 if (PostIncLoops.count(Ptr: L)) {
1166 PostIncLoopSet Loops;
1167 Loops.insert(Ptr: L);
1168 Normalized = cast<SCEVAddRecExpr>(
1169 Val: normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1170 }
1171
1172 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1173 const SCEV *Step = Normalized->getStepRecurrence(SE);
1174 assert(SE.properlyDominates(Start, L->getHeader()) &&
1175 "Start does not properly dominate loop header");
1176 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1177
1178 // In some cases, we decide to reuse an existing phi node but need to truncate
1179 // it and/or invert the step.
1180 Type *TruncTy = nullptr;
1181 bool InvertStep = false;
1182 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1183
1184 // Accommodate post-inc mode, if necessary.
1185 Value *Result;
1186 if (!PostIncLoops.count(Ptr: L))
1187 Result = PN;
1188 else {
1189 // In PostInc mode, use the post-incremented value.
1190 BasicBlock *LatchBlock = L->getLoopLatch();
1191 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1192 Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1193
1194 // We might be introducing a new use of the post-inc IV that is not poison
1195 // safe, in which case we should drop poison generating flags. Only keep
1196 // those flags for which SCEV has proven that they always hold.
1197 if (isa<OverflowingBinaryOperator>(Val: Result)) {
1198 auto *I = cast<Instruction>(Val: Result);
1199 if (!S->hasNoUnsignedWrap())
1200 I->setHasNoUnsignedWrap(false);
1201 if (!S->hasNoSignedWrap())
1202 I->setHasNoSignedWrap(false);
1203 }
1204
1205 // For an expansion to use the postinc form, the client must call
1206 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1207 // or dominated by IVIncInsertPos.
1208 if (isa<Instruction>(Val: Result) &&
1209 !SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1210 User: &*Builder.GetInsertPoint())) {
1211 // The induction variable's postinc expansion does not dominate this use.
1212 // IVUsers tries to prevent this case, so it is rare. However, it can
1213 // happen when an IVUser outside the loop is not dominated by the latch
1214 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1215 // all cases. Consider a phi outside whose operand is replaced during
1216 // expansion with the value of the postinc user. Without fundamentally
1217 // changing the way postinc users are tracked, the only remedy is
1218 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1219 // but hopefully expandCodeFor handles that.
1220 bool useSubtract =
1221 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1222 if (useSubtract)
1223 Step = SE.getNegativeSCEV(V: Step);
1224 Value *StepV;
1225 {
1226 // Expand the step somewhere that dominates the loop header.
1227 SCEVInsertPointGuard Guard(Builder, this);
1228 StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1229 }
1230 Result = expandIVInc(PN, StepV, L, useSubtract);
1231 }
1232 }
1233
1234 // We have decided to reuse an induction variable of a dominating loop. Apply
1235 // truncation and/or inversion of the step.
1236 if (TruncTy) {
1237 // Truncate the result.
1238 if (TruncTy != Result->getType())
1239 Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1240
1241 // Invert the result.
1242 if (InvertStep)
1243 Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1244 }
1245
1246 return Result;
1247}
1248
1249Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
1250 Type *STy = S->getType();
1251 const Loop *L = S->getLoop();
1252 BasicBlock *EB = L->getExitBlock();
1253 if (!EB || !EB->getSinglePredecessor() ||
1254 !SE.DT.dominates(A: EB, B: Builder.GetInsertBlock()))
1255 return nullptr;
1256
1257 for (auto &PN : EB->phis()) {
1258 if (!SE.isSCEVable(Ty: PN.getType()))
1259 continue;
1260 auto *ExitSCEV = SE.getSCEV(V: &PN);
1261 if (!isa<SCEVAddRecExpr>(Val: ExitSCEV))
1262 continue;
1263 Type *PhiTy = PN.getType();
1264 if (STy->isIntegerTy() && PhiTy->isPointerTy()) {
1265 ExitSCEV = SE.getPtrToIntExpr(Op: ExitSCEV, Ty: STy);
1266 if (isa<SCEVCouldNotCompute>(Val: ExitSCEV))
1267 continue;
1268 } else if (S->getType() != PN.getType()) {
1269 continue;
1270 }
1271
1272 // Check if we can re-use the existing PN, by adjusting it with an expanded
1273 // offset, if the offset is simpler.
1274 const SCEV *Diff = SE.getMinusSCEV(LHS: S, RHS: ExitSCEV);
1275 const SCEV *Op = Diff;
1276 match(S: Op, P: m_scev_Add(Op0: m_SCEVConstant(), Op1: m_SCEV(V&: Op)));
1277 match(S: Op, P: m_scev_Mul(Op0: m_scev_AllOnes(), Op1: m_SCEV(V&: Op)));
1278 match(S: Op, P: m_scev_PtrToInt(Op0: m_SCEV(V&: Op)));
1279 if (!isa<SCEVConstant, SCEVUnknown>(Val: Op))
1280 continue;
1281
1282 assert(Diff->getType()->isIntegerTy() &&
1283 "difference must be of integer type");
1284 Value *DiffV = expand(S: Diff);
1285 Value *BaseV = fixupLCSSAFormFor(V: &PN);
1286 if (PhiTy->isPointerTy()) {
1287 if (STy->isPointerTy())
1288 return Builder.CreatePtrAdd(Ptr: BaseV, Offset: DiffV);
1289 BaseV = Builder.CreatePtrToInt(V: BaseV, DestTy: DiffV->getType());
1290 }
1291 return Builder.CreateAdd(LHS: BaseV, RHS: DiffV);
1292 }
1293
1294 return nullptr;
1295}
1296
1297Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1298 // In canonical mode we compute the addrec as an expression of a canonical IV
1299 // using evaluateAtIteration and expand the resulting SCEV expression. This
1300 // way we avoid introducing new IVs to carry on the computation of the addrec
1301 // throughout the loop.
1302 //
1303 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1304 // type wider than the addrec itself. Emitting a canonical IV of the
1305 // proper type might produce non-legal types, for example expanding an i64
1306 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1307 // back to non-canonical mode for nested addrecs.
1308 if (!CanonicalMode || (S->getNumOperands() > 2))
1309 return expandAddRecExprLiterally(S);
1310
1311 Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1312 const Loop *L = S->getLoop();
1313
1314 // First check for an existing canonical IV in a suitable type.
1315 PHINode *CanonicalIV = nullptr;
1316 if (PHINode *PN = L->getCanonicalInductionVariable())
1317 if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1318 CanonicalIV = PN;
1319
1320 // Rewrite an AddRec in terms of the canonical induction variable, if
1321 // its type is more narrow.
1322 if (CanonicalIV &&
1323 SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1324 !S->getType()->isPointerTy()) {
1325 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
1326 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1327 NewOps[i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1328 Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1329 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1330 BasicBlock::iterator NewInsertPt =
1331 findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1332 V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1333 return V;
1334 }
1335
1336 // If S is expanded outside the defining loop, check if there is a
1337 // matching LCSSA phi node for it.
1338 if (Value *V = tryToReuseLCSSAPhi(S))
1339 return V;
1340
1341 // {X,+,F} --> X + {0,+,F}
1342 if (!S->getStart()->isZero()) {
1343 if (isa<PointerType>(Val: S->getType())) {
1344 Value *StartV = expand(S: SE.getPointerBase(V: S));
1345 return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV,
1346 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNUW));
1347 }
1348
1349 SmallVector<const SCEV *, 4> NewOps(S->operands());
1350 NewOps[0] = SE.getConstant(Ty, V: 0);
1351 const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1352 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1353
1354 // Just do a normal add. Pre-expand the operands to suppress folding.
1355 //
1356 // The LHS and RHS values are factored out of the expand call to make the
1357 // output independent of the argument evaluation order.
1358 const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1359 const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1360 return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1361 }
1362
1363 // If we don't yet have a canonical IV, create one.
1364 if (!CanonicalIV) {
1365 // Create and insert the PHI node for the induction variable in the
1366 // specified loop.
1367 BasicBlock *Header = L->getHeader();
1368 pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1369 CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1370 CanonicalIV->insertBefore(InsertPos: Header->begin());
1371 rememberInstruction(I: CanonicalIV);
1372
1373 SmallPtrSet<BasicBlock *, 4> PredSeen;
1374 Constant *One = ConstantInt::get(Ty, V: 1);
1375 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1376 BasicBlock *HP = *HPI;
1377 if (!PredSeen.insert(Ptr: HP).second) {
1378 // There must be an incoming value for each predecessor, even the
1379 // duplicates!
1380 CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1381 continue;
1382 }
1383
1384 if (L->contains(BB: HP)) {
1385 // Insert a unit add instruction right before the terminator
1386 // corresponding to the back-edge.
1387 Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1388 Name: "indvar.next",
1389 InsertBefore: HP->getTerminator()->getIterator());
1390 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1391 rememberInstruction(I: Add);
1392 CanonicalIV->addIncoming(V: Add, BB: HP);
1393 } else {
1394 CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1395 }
1396 }
1397 }
1398
1399 // {0,+,1} --> Insert a canonical induction variable into the loop!
1400 if (S->isAffine() && S->getOperand(i: 1)->isOne()) {
1401 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1402 "IVs with types different from the canonical IV should "
1403 "already have been handled!");
1404 return CanonicalIV;
1405 }
1406
1407 // {0,+,F} --> {0,+,1} * F
1408
1409 // If this is a simple linear addrec, emit it now as a special case.
1410 if (S->isAffine()) // {0,+,F} --> i*F
1411 return
1412 expand(S: SE.getTruncateOrNoop(
1413 V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1414 RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: 1),
1415 Ty: CanonicalIV->getType())),
1416 Ty));
1417
1418 // If this is a chain of recurrences, turn it into a closed form, using the
1419 // folders, then expandCodeFor the closed form. This allows the folders to
1420 // simplify the expression without having to build a bunch of special code
1421 // into this folder.
1422 const SCEV *IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.
1423
1424 // Promote S up to the canonical IV type, if the cast is foldable.
1425 const SCEV *NewS = S;
1426 const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1427 if (isa<SCEVAddRecExpr>(Val: Ext))
1428 NewS = Ext;
1429
1430 const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1431
1432 // Truncate the result down to the original type, if needed.
1433 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1434 return expand(S: T);
1435}
1436
1437Value *SCEVExpander::visitPtrToAddrExpr(const SCEVPtrToAddrExpr *S) {
1438 Value *V = expand(S: S->getOperand());
1439 return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToAddr,
1440 IP: GetOptimalInsertionPointForCastOf(V));
1441}
1442
1443Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1444 Value *V = expand(S: S->getOperand());
1445 return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1446 IP: GetOptimalInsertionPointForCastOf(V));
1447}
1448
1449Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1450 Value *V = expand(S: S->getOperand());
1451 return Builder.CreateTrunc(V, DestTy: S->getType());
1452}
1453
1454Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1455 Value *V = expand(S: S->getOperand());
1456 return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1457 IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1458}
1459
1460Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1461 Value *V = expand(S: S->getOperand());
1462 return Builder.CreateSExt(V, DestTy: S->getType());
1463}
1464
1465Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1466 Intrinsic::ID IntrinID, Twine Name,
1467 bool IsSequential) {
1468 bool PrevSafeMode = SafeUDivMode;
1469 SafeUDivMode |= IsSequential;
1470 Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - 1));
1471 Type *Ty = LHS->getType();
1472 if (IsSequential)
1473 LHS = Builder.CreateFreeze(V: LHS);
1474 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1475 SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
1476 Value *RHS = expand(S: S->getOperand(i));
1477 if (IsSequential && i != 0)
1478 RHS = Builder.CreateFreeze(V: RHS);
1479 Value *Sel;
1480 if (Ty->isIntegerTy())
1481 Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1482 /*FMFSource=*/nullptr, Name);
1483 else {
1484 Value *ICmp =
1485 Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1486 Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1487 }
1488 LHS = Sel;
1489 }
1490 SafeUDivMode = PrevSafeMode;
1491 return LHS;
1492}
1493
1494Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1495 return expandMinMaxExpr(S, IntrinID: Intrinsic::smax, Name: "smax");
1496}
1497
1498Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1499 return expandMinMaxExpr(S, IntrinID: Intrinsic::umax, Name: "umax");
1500}
1501
1502Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1503 return expandMinMaxExpr(S, IntrinID: Intrinsic::smin, Name: "smin");
1504}
1505
1506Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1507 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin");
1508}
1509
1510Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1511 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin", /*IsSequential*/true);
1512}
1513
1514Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1515 return Builder.CreateVScale(Ty: S->getType());
1516}
1517
1518Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
1519 BasicBlock::iterator IP) {
1520 setInsertPoint(IP);
1521 Value *V = expandCodeFor(SH, Ty);
1522 return V;
1523}
1524
1525Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
1526 // Expand the code for this SCEV.
1527 Value *V = expand(S: SH);
1528
1529 if (Ty && Ty != V->getType()) {
1530 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1531 "non-trivial casts should be done with the SCEVs directly!");
1532 V = InsertNoopCastOfTo(V, Ty);
1533 }
1534 return V;
1535}
1536
1537Value *SCEVExpander::FindValueInExprValueMap(
1538 const SCEV *S, const Instruction *InsertPt,
1539 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1540 // If the expansion is not in CanonicalMode, and the SCEV contains any
1541 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1542 if (!CanonicalMode && SE.containsAddRecurrence(S))
1543 return nullptr;
1544
1545 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1546 if (isa<SCEVConstant>(Val: S) || isa<SCEVUnknown>(Val: S))
1547 return nullptr;
1548
1549 for (Value *V : SE.getSCEVValues(S)) {
1550 Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1551 if (!EntInst)
1552 continue;
1553
1554 // Choose a Value from the set which dominates the InsertPt.
1555 // InsertPt should be inside the Value's parent loop so as not to break
1556 // the LCSSA form.
1557 assert(EntInst->getFunction() == InsertPt->getFunction());
1558 if (S->getType() != V->getType() || !SE.DT.dominates(Def: EntInst, User: InsertPt) ||
1559 !(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr ||
1560 SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1561 continue;
1562
1563 // Make sure reusing the instruction is poison-safe.
1564 if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1565 return V;
1566 DropPoisonGeneratingInsts.clear();
1567 }
1568 return nullptr;
1569}
1570
1571// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1572// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1573// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1574// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1575// the expansion will try to reuse Value from ExprValueMap, and only when it
1576// fails, expand the SCEV literally.
1577Value *SCEVExpander::expand(const SCEV *S) {
1578 // Compute an insertion point for this SCEV object. Hoist the instructions
1579 // as far out in the loop nest as possible.
1580 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1581
1582 // We can move insertion point only if there is no div or rem operations
1583 // otherwise we are risky to move it over the check for zero denominator.
1584 auto SafeToHoist = [](const SCEV *S) {
1585 return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1586 if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1587 if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1588 // Division by non-zero constants can be hoisted.
1589 return SC->getValue()->isZero();
1590 // All other divisions should not be moved as they may be
1591 // divisions by zero and should be kept within the
1592 // conditions of the surrounding loops that guard their
1593 // execution (see PR35406).
1594 return true;
1595 }
1596 return false;
1597 });
1598 };
1599 if (SafeToHoist(S)) {
1600 for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1601 L = L->getParentLoop()) {
1602 if (SE.isLoopInvariant(S, L)) {
1603 if (!L) break;
1604 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1605 InsertPt = Preheader->getTerminator()->getIterator();
1606 } else {
1607 // LSR sets the insertion point for AddRec start/step values to the
1608 // block start to simplify value reuse, even though it's an invalid
1609 // position. SCEVExpander must correct for this in all cases.
1610 InsertPt = L->getHeader()->getFirstInsertionPt();
1611 }
1612 } else {
1613 // If the SCEV is computable at this level, insert it into the header
1614 // after the PHIs (and after any other instructions that we've inserted
1615 // there) so that it is guaranteed to dominate any user inside the loop.
1616 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1617 InsertPt = L->getHeader()->getFirstInsertionPt();
1618
1619 while (InsertPt != Builder.GetInsertPoint() &&
1620 (isInsertedInstruction(I: &*InsertPt))) {
1621 InsertPt = std::next(x: InsertPt);
1622 }
1623 break;
1624 }
1625 }
1626 }
1627
1628 // Check to see if we already expanded this here.
1629 auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1630 if (I != InsertedExpressions.end())
1631 return I->second;
1632
1633 SCEVInsertPointGuard Guard(Builder, this);
1634 Builder.SetInsertPoint(TheBB: InsertPt->getParent(), IP: InsertPt);
1635
1636 // Expand the expression into instructions.
1637 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1638 Value *V = FindValueInExprValueMap(S, InsertPt: &*InsertPt, DropPoisonGeneratingInsts);
1639 if (!V) {
1640 V = visit(S);
1641 V = fixupLCSSAFormFor(V);
1642 } else {
1643 for (Instruction *I : DropPoisonGeneratingInsts) {
1644 rememberFlags(I);
1645 I->dropPoisonGeneratingAnnotations();
1646 // See if we can re-infer from first principles any of the flags we just
1647 // dropped.
1648 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1649 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1650 auto *BO = cast<BinaryOperator>(Val: I);
1651 BO->setHasNoUnsignedWrap(
1652 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1653 BO->setHasNoSignedWrap(
1654 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1655 }
1656 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1657 auto *Src = NNI->getOperand(i_nocapture: 0);
1658 if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1659 RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1660 DL).value_or(u: false))
1661 NNI->setNonNeg(true);
1662 }
1663 }
1664 }
1665 // Remember the expanded value for this SCEV at this location.
1666 //
1667 // This is independent of PostIncLoops. The mapped value simply materializes
1668 // the expression at this insertion point. If the mapped value happened to be
1669 // a postinc expansion, it could be reused by a non-postinc user, but only if
1670 // its insertion point was already at the head of the loop.
1671 InsertedExpressions[std::make_pair(x&: S, y: &*InsertPt)] = V;
1672 return V;
1673}
1674
1675void SCEVExpander::rememberInstruction(Value *I) {
1676 auto DoInsert = [this](Value *V) {
1677 if (!PostIncLoops.empty())
1678 InsertedPostIncValues.insert(V);
1679 else
1680 InsertedValues.insert(V);
1681 };
1682 DoInsert(I);
1683}
1684
1685void SCEVExpander::rememberFlags(Instruction *I) {
1686 // If we already have flags for the instruction, keep the existing ones.
1687 OrigFlags.try_emplace(Key: I, Args: PoisonFlags(I));
1688}
1689
1690void SCEVExpander::replaceCongruentIVInc(
1691 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1692 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1693 BasicBlock *LatchBlock = L->getLoopLatch();
1694 if (!LatchBlock)
1695 return;
1696
1697 Instruction *OrigInc =
1698 dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1699 Instruction *IsomorphicInc =
1700 dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1701 if (!OrigInc || !IsomorphicInc)
1702 return;
1703
1704 // If this phi has the same width but is more canonical, replace the
1705 // original with it. As part of the "more canonical" determination,
1706 // respect a prior decision to use an IV chain.
1707 if (OrigPhi->getType() == Phi->getType()) {
1708 bool Chained = ChainedPhis.contains(V: Phi);
1709 if (!(Chained || isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1710 (Chained || isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1711 std::swap(a&: OrigPhi, b&: Phi);
1712 std::swap(a&: OrigInc, b&: IsomorphicInc);
1713 }
1714 }
1715
1716 // Replacing the congruent phi is sufficient because acyclic
1717 // redundancy elimination, CSE/GVN, should handle the
1718 // rest. However, once SCEV proves that a phi is congruent,
1719 // it's often the head of an IV user cycle that is isomorphic
1720 // with the original phi. It's worth eagerly cleaning up the
1721 // common case of a single IV increment so that DeleteDeadPHIs
1722 // can remove cycles that had postinc uses.
1723 // Because we may potentially introduce a new use of OrigIV that didn't
1724 // exist before at this point, its poison flags need readjustment.
1725 const SCEV *TruncExpr =
1726 SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1727 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(V: IsomorphicInc) ||
1728 !SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1729 return;
1730
1731 bool BothHaveNUW = false;
1732 bool BothHaveNSW = false;
1733 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1734 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1735 if (OBOIncV && OBOIsomorphic) {
1736 BothHaveNUW =
1737 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1738 BothHaveNSW =
1739 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1740 }
1741
1742 if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1743 /*RecomputePoisonFlags*/ true))
1744 return;
1745
1746 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1747 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1748 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1749 // make IsomorphicInc's uses more poisonous.
1750 assert(OrigInc->getType()->getScalarSizeInBits() >=
1751 IsomorphicInc->getType()->getScalarSizeInBits() &&
1752 "Should only replace an increment with a wider one.");
1753 if (BothHaveNUW || BothHaveNSW) {
1754 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1755 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1756 }
1757
1758 SCEV_DEBUG_WITH_TYPE(DebugType,
1759 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1760 << *IsomorphicInc << '\n');
1761 Value *NewInc = OrigInc;
1762 if (OrigInc->getType() != IsomorphicInc->getType()) {
1763 BasicBlock::iterator IP;
1764 if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1765 IP = PN->getParent()->getFirstInsertionPt();
1766 else
1767 IP = OrigInc->getNextNode()->getIterator();
1768
1769 IRBuilder<> Builder(IP->getParent(), IP);
1770 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1771 NewInc =
1772 Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1773 }
1774 IsomorphicInc->replaceAllUsesWith(V: NewInc);
1775 DeadInsts.emplace_back(Args&: IsomorphicInc);
1776}
1777
1778/// replaceCongruentIVs - Check for congruent phis in this loop header and
1779/// replace them with their most canonical representative. Return the number of
1780/// phis eliminated.
1781///
1782/// This does not depend on any SCEVExpander state but should be used in
1783/// the same context that SCEVExpander is used.
1784unsigned
1785SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
1786 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1787 const TargetTransformInfo *TTI) {
1788 // Find integer phis in order of increasing width.
1789 SmallVector<PHINode *, 8> Phis(
1790 llvm::make_pointer_range(Range: L->getHeader()->phis()));
1791
1792 if (TTI)
1793 // Use stable_sort to preserve order of equivalent PHIs, so the order
1794 // of the sorted Phis is the same from run to run on the same loop.
1795 llvm::stable_sort(Range&: Phis, C: [](Value *LHS, Value *RHS) {
1796 // Put pointers at the back and make sure pointer < pointer = false.
1797 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1798 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1799 return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1800 LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1801 });
1802
1803 unsigned NumElim = 0;
1804 DenseMap<const SCEV *, PHINode *> ExprToIVMap;
1805 // Process phis from wide to narrow. Map wide phis to their truncation
1806 // so narrow phis can reuse them.
1807 for (PHINode *Phi : Phis) {
1808 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1809 if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1810 return V;
1811 if (!SE.isSCEVable(Ty: PN->getType()))
1812 return nullptr;
1813 auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1814 if (!Const)
1815 return nullptr;
1816 return Const->getValue();
1817 };
1818
1819 // Fold constant phis. They may be congruent to other constant phis and
1820 // would confuse the logic below that expects proper IVs.
1821 if (Value *V = SimplifyPHINode(Phi)) {
1822 if (V->getType() != Phi->getType())
1823 continue;
1824 SE.forgetValue(V: Phi);
1825 Phi->replaceAllUsesWith(V);
1826 DeadInsts.emplace_back(Args&: Phi);
1827 ++NumElim;
1828 SCEV_DEBUG_WITH_TYPE(DebugType,
1829 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1830 << '\n');
1831 continue;
1832 }
1833
1834 if (!SE.isSCEVable(Ty: Phi->getType()))
1835 continue;
1836
1837 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(V: Phi)];
1838 if (!OrigPhiRef) {
1839 OrigPhiRef = Phi;
1840 if (Phi->getType()->isIntegerTy() && TTI &&
1841 TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1842 // Make sure we only rewrite using simple induction variables;
1843 // otherwise, we can make the trip count of a loop unanalyzable
1844 // to SCEV.
1845 const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1846 if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1847 // This phi can be freely truncated to the narrowest phi type. Map the
1848 // truncated expression to it so it will be reused for narrow types.
1849 const SCEV *TruncExpr =
1850 SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1851 ExprToIVMap[TruncExpr] = Phi;
1852 }
1853 }
1854 continue;
1855 }
1856
1857 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1858 // sense.
1859 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1860 continue;
1861
1862 replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1863 SCEV_DEBUG_WITH_TYPE(DebugType,
1864 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1865 << '\n');
1866 SCEV_DEBUG_WITH_TYPE(
1867 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1868 ++NumElim;
1869 Value *NewIV = OrigPhiRef;
1870 if (OrigPhiRef->getType() != Phi->getType()) {
1871 IRBuilder<> Builder(L->getHeader(),
1872 L->getHeader()->getFirstInsertionPt());
1873 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1874 NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1875 }
1876 Phi->replaceAllUsesWith(V: NewIV);
1877 DeadInsts.emplace_back(Args&: Phi);
1878 }
1879 return NumElim;
1880}
1881
1882bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1883 const Instruction *At,
1884 Loop *L) {
1885 using namespace llvm::PatternMatch;
1886
1887 SmallVector<BasicBlock *, 4> ExitingBlocks;
1888 L->getExitingBlocks(ExitingBlocks);
1889
1890 // Look for suitable value in simple conditions at the loop exits.
1891 for (BasicBlock *BB : ExitingBlocks) {
1892 CmpPredicate Pred;
1893 Instruction *LHS, *RHS;
1894
1895 if (!match(V: BB->getTerminator(),
1896 P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1897 T: m_BasicBlock(), F: m_BasicBlock())))
1898 continue;
1899
1900 if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1901 return true;
1902
1903 if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1904 return true;
1905 }
1906
1907 // Use expand's logic which is used for reusing a previous Value in
1908 // ExprValueMap. Note that we don't currently model the cost of
1909 // needing to drop poison generating flags on the instruction if we
1910 // want to reuse it. We effectively assume that has zero cost.
1911 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1912 return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1913}
1914
1915template<typename T> static InstructionCost costAndCollectOperands(
1916 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1917 TargetTransformInfo::TargetCostKind CostKind,
1918 SmallVectorImpl<SCEVOperand> &Worklist) {
1919
1920 const T *S = cast<T>(WorkItem.S);
1921 InstructionCost Cost = 0;
1922 // Object to help map SCEV operands to expanded IR instructions.
1923 struct OperationIndices {
1924 OperationIndices(unsigned Opc, size_t min, size_t max) :
1925 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1926 unsigned Opcode;
1927 size_t MinIdx;
1928 size_t MaxIdx;
1929 };
1930
1931 // Collect the operations of all the instructions that will be needed to
1932 // expand the SCEVExpr. This is so that when we come to cost the operands,
1933 // we know what the generated user(s) will be.
1934 SmallVector<OperationIndices, 2> Operations;
1935
1936 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1937 Operations.emplace_back(Opcode, 0, 0);
1938 return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1939 Src: S->getOperand(0)->getType(),
1940 CCH: TTI::CastContextHint::None, CostKind);
1941 };
1942
1943 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1944 unsigned MinIdx = 0,
1945 unsigned MaxIdx = 1) -> InstructionCost {
1946 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1947 return NumRequired *
1948 TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1949 };
1950
1951 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1952 unsigned MaxIdx) -> InstructionCost {
1953 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1954 Type *OpType = S->getType();
1955 return NumRequired * TTI.getCmpSelInstrCost(
1956 Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1957 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1958 };
1959
1960 switch (S->getSCEVType()) {
1961 case scCouldNotCompute:
1962 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1963 case scUnknown:
1964 case scConstant:
1965 case scVScale:
1966 return 0;
1967 case scPtrToAddr:
1968 Cost = CastCost(Instruction::PtrToAddr);
1969 break;
1970 case scPtrToInt:
1971 Cost = CastCost(Instruction::PtrToInt);
1972 break;
1973 case scTruncate:
1974 Cost = CastCost(Instruction::Trunc);
1975 break;
1976 case scZeroExtend:
1977 Cost = CastCost(Instruction::ZExt);
1978 break;
1979 case scSignExtend:
1980 Cost = CastCost(Instruction::SExt);
1981 break;
1982 case scUDivExpr: {
1983 unsigned Opcode = Instruction::UDiv;
1984 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1985 if (SC->getAPInt().isPowerOf2())
1986 Opcode = Instruction::LShr;
1987 Cost = ArithCost(Opcode, 1);
1988 break;
1989 }
1990 case scAddExpr:
1991 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1992 break;
1993 case scMulExpr:
1994 // TODO: this is a very pessimistic cost modelling for Mul,
1995 // because of Bin Pow algorithm actually used by the expander,
1996 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1997 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1998 break;
1999 case scSMaxExpr:
2000 case scUMaxExpr:
2001 case scSMinExpr:
2002 case scUMinExpr:
2003 case scSequentialUMinExpr: {
2004 // FIXME: should this ask the cost for Intrinsic's?
2005 // The reduction tree.
2006 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
2007 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
2008 switch (S->getSCEVType()) {
2009 case scSequentialUMinExpr: {
2010 // The safety net against poison.
2011 // FIXME: this is broken.
2012 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
2013 Cost += ArithCost(Instruction::Or,
2014 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
2015 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
2016 break;
2017 }
2018 default:
2019 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
2020 "Unhandled SCEV expression type?");
2021 break;
2022 }
2023 break;
2024 }
2025 case scAddRecExpr: {
2026 // Addrec expands to a phi and add per recurrence.
2027 unsigned NumRecurrences = S->getNumOperands() - 1;
2028 Cost += TTI.getCFInstrCost(Opcode: Instruction::PHI, CostKind) * NumRecurrences;
2029 Cost +=
2030 TTI.getArithmeticInstrCost(Opcode: Instruction::Add, Ty: S->getType(), CostKind) *
2031 NumRecurrences;
2032 // AR start is used in phi.
2033 Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
2034 // Other operands are used in add.
2035 for (const SCEV *Op : S->operands().drop_front())
2036 Worklist.emplace_back(Args: Instruction::Add, Args: 1, Args&: Op);
2037 break;
2038 }
2039 }
2040
2041 for (auto &CostOp : Operations) {
2042 for (auto SCEVOp : enumerate(S->operands())) {
2043 // Clamp the index to account for multiple IR operations being chained.
2044 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
2045 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
2046 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
2047 }
2048 }
2049 return Cost;
2050}
2051
2052bool SCEVExpander::isHighCostExpansionHelper(
2053 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
2054 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
2055 SmallPtrSetImpl<const SCEV *> &Processed,
2056 SmallVectorImpl<SCEVOperand> &Worklist) {
2057 if (Cost > Budget)
2058 return true; // Already run out of budget, give up.
2059
2060 const SCEV *S = WorkItem.S;
2061 // Was the cost of expansion of this expression already accounted for?
2062 if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
2063 return false; // We have already accounted for this expression.
2064
2065 // If we can find an existing value for this scev available at the point "At"
2066 // then consider the expression cheap.
2067 if (hasRelatedExistingExpansion(S, At: &At, L))
2068 return false; // Consider the expression to be free.
2069
2070 TargetTransformInfo::TargetCostKind CostKind =
2071 L->getHeader()->getParent()->hasMinSize()
2072 ? TargetTransformInfo::TCK_CodeSize
2073 : TargetTransformInfo::TCK_RecipThroughput;
2074
2075 switch (S->getSCEVType()) {
2076 case scCouldNotCompute:
2077 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2078 case scUnknown:
2079 case scVScale:
2080 // Assume to be zero-cost.
2081 return false;
2082 case scConstant: {
2083 // Only evalulate the costs of constants when optimizing for size.
2084 if (CostKind != TargetTransformInfo::TCK_CodeSize)
2085 return false;
2086 const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
2087 Type *Ty = S->getType();
2088 Cost += TTI.getIntImmCostInst(
2089 Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
2090 return Cost > Budget;
2091 }
2092 case scTruncate:
2093 case scPtrToAddr:
2094 case scPtrToInt:
2095 case scZeroExtend:
2096 case scSignExtend: {
2097 Cost +=
2098 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2099 return false; // Will answer upon next entry into this function.
2100 }
2101 case scUDivExpr: {
2102 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2103 // HowManyLessThans produced to compute a precise expression, rather than a
2104 // UDiv from the user's code. If we can't find a UDiv in the code with some
2105 // simple searching, we need to account for it's cost.
2106
2107 // At the beginning of this function we already tried to find existing
2108 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2109 // pattern involving division. This is just a simple search heuristic.
2110 if (hasRelatedExistingExpansion(
2111 S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: 1)), At: &At, L))
2112 return false; // Consider it to be free.
2113
2114 Cost +=
2115 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2116 return false; // Will answer upon next entry into this function.
2117 }
2118 case scAddExpr:
2119 case scMulExpr:
2120 case scUMaxExpr:
2121 case scSMaxExpr:
2122 case scUMinExpr:
2123 case scSMinExpr:
2124 case scSequentialUMinExpr: {
2125 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2126 "Nary expr should have more than 1 operand.");
2127 // The simple nary expr will require one less op (or pair of ops)
2128 // than the number of it's terms.
2129 Cost +=
2130 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2131 return Cost > Budget;
2132 }
2133 case scAddRecExpr: {
2134 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2135 "Polynomial should be at least linear");
2136 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2137 WorkItem, TTI, CostKind, Worklist);
2138 return Cost > Budget;
2139 }
2140 }
2141 llvm_unreachable("Unknown SCEV kind!");
2142}
2143
2144Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
2145 Instruction *IP) {
2146 assert(IP);
2147 switch (Pred->getKind()) {
2148 case SCEVPredicate::P_Union:
2149 return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2150 case SCEVPredicate::P_Compare:
2151 return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2152 case SCEVPredicate::P_Wrap: {
2153 auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2154 return expandWrapPredicate(P: AddRecPred, Loc: IP);
2155 }
2156 }
2157 llvm_unreachable("Unknown SCEV predicate type");
2158}
2159
2160Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
2161 Instruction *IP) {
2162 Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2163 Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2164
2165 Builder.SetInsertPoint(IP);
2166 auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2167 auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2168 return I;
2169}
2170
2171Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
2172 Instruction *Loc, bool Signed) {
2173 assert(AR->isAffine() && "Cannot generate RT check for "
2174 "non-affine expression");
2175
2176 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2177 SmallVector<const SCEVPredicate *, 4> Pred;
2178 const SCEV *ExitCount =
2179 SE.getPredicatedSymbolicMaxBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2180
2181 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2182
2183 const SCEV *Step = AR->getStepRecurrence(SE);
2184 const SCEV *Start = AR->getStart();
2185
2186 Type *ARTy = AR->getType();
2187 unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2188 unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2189
2190 // The expression {Start,+,Step} has nusw/nssw if
2191 // Step < 0, Start - |Step| * Backedge <= Start
2192 // Step >= 0, Start + |Step| * Backedge > Start
2193 // and |Step| * Backedge doesn't unsigned overflow.
2194
2195 Builder.SetInsertPoint(Loc);
2196 Value *TripCountVal = expand(S: ExitCount, I: Loc);
2197
2198 IntegerType *Ty =
2199 IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2200
2201 Value *StepValue = expand(S: Step, I: Loc);
2202 Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2203 Value *StartValue = expand(S: Start, I: Loc);
2204
2205 ConstantInt *Zero =
2206 ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2207
2208 Builder.SetInsertPoint(Loc);
2209 // Compute |Step|
2210 Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2211 Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2212
2213 // Compute |Step| * Backedge
2214 // Compute:
2215 // 1. Start + |Step| * Backedge < Start
2216 // 2. Start - |Step| * Backedge > Start
2217 //
2218 // And select either 1. or 2. depending on whether step is positive or
2219 // negative. If Step is known to be positive or negative, only create
2220 // either 1. or 2.
2221 auto ComputeEndCheck = [&]() -> Value * {
2222 // Get the backedge taken count and truncate or extended to the AR type.
2223 Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2224
2225 CallInst *Mul = Builder.CreateIntrinsic(ID: Intrinsic::umul_with_overflow, Types: Ty,
2226 Args: {AbsStep, TruncTripCount},
2227 /*FMFSource=*/nullptr, Name: "mul");
2228 Value *MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: 0, Name: "mul.result");
2229 Value *OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: 1, Name: "mul.overflow");
2230
2231 Value *Add = nullptr, *Sub = nullptr;
2232 bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2233 bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2234
2235 if (isa<PointerType>(Val: ARTy)) {
2236 Value *NegMulV = Builder.CreateNeg(V: MulV);
2237 if (NeedPosCheck)
2238 Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2239 if (NeedNegCheck)
2240 Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2241 } else {
2242 if (NeedPosCheck)
2243 Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2244 if (NeedNegCheck)
2245 Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2246 }
2247
2248 Value *EndCompareLT = nullptr;
2249 Value *EndCompareGT = nullptr;
2250 Value *EndCheck = nullptr;
2251 if (NeedPosCheck)
2252 EndCheck = EndCompareLT = Builder.CreateICmp(
2253 P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2254 if (NeedNegCheck)
2255 EndCheck = EndCompareGT = Builder.CreateICmp(
2256 P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2257 if (NeedPosCheck && NeedNegCheck) {
2258 // Select the answer based on the sign of Step.
2259 EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2260 }
2261 return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2262 };
2263 Value *EndCheck = ComputeEndCheck();
2264
2265 // If the backedge taken count type is larger than the AR type,
2266 // check that we don't drop any bits by truncating it. If we are
2267 // dropping bits, then we have overflow (unless the step is zero).
2268 if (SrcBits > DstBits) {
2269 auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2270 auto *BackedgeCheck =
2271 Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2272 RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2273 BackedgeCheck = Builder.CreateAnd(
2274 LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2275
2276 EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2277 }
2278
2279 return EndCheck;
2280}
2281
2282Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
2283 Instruction *IP) {
2284 const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2285 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2286
2287 // Add a check for NUSW
2288 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2289 NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2290
2291 // Add a check for NSSW
2292 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2293 NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2294
2295 if (NUSWCheck && NSSWCheck)
2296 return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2297
2298 if (NUSWCheck)
2299 return NUSWCheck;
2300
2301 if (NSSWCheck)
2302 return NSSWCheck;
2303
2304 return ConstantInt::getFalse(Context&: IP->getContext());
2305}
2306
2307Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
2308 Instruction *IP) {
2309 // Loop over all checks in this set.
2310 SmallVector<Value *> Checks;
2311 for (const auto *Pred : Union->getPredicates()) {
2312 Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2313 Builder.SetInsertPoint(IP);
2314 }
2315
2316 if (Checks.empty())
2317 return ConstantInt::getFalse(Context&: IP->getContext());
2318 return Builder.CreateOr(Ops: Checks);
2319}
2320
2321Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2322 auto *DefI = dyn_cast<Instruction>(Val: V);
2323 if (!PreserveLCSSA || !DefI)
2324 return V;
2325
2326 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2327 Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2328 Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt->getParent());
2329 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(L: UseLoop))
2330 return V;
2331
2332 // Create a temporary instruction to at the current insertion point, so we
2333 // can hand it off to the helper to create LCSSA PHIs if required for the
2334 // new use.
2335 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2336 // would accept a insertion point and return an LCSSA phi for that
2337 // insertion point, so there is no need to insert & remove the temporary
2338 // instruction.
2339 Type *ToTy;
2340 if (DefI->getType()->isIntegerTy())
2341 ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: 0);
2342 else
2343 ToTy = Type::getInt32Ty(C&: DefI->getContext());
2344 Instruction *User =
2345 CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2346 llvm::scope_exit RemoveUserOnExit([User]() { User->eraseFromParent(); });
2347
2348 SmallVector<Instruction *, 1> ToUpdate;
2349 ToUpdate.push_back(Elt: DefI);
2350 SmallVector<PHINode *, 16> PHIsToRemove;
2351 SmallVector<PHINode *, 16> InsertedPHIs;
2352 formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2353 InsertedPHIs: &InsertedPHIs);
2354 for (PHINode *PN : InsertedPHIs)
2355 rememberInstruction(I: PN);
2356 for (PHINode *PN : PHIsToRemove) {
2357 if (!PN->use_empty())
2358 continue;
2359 InsertedValues.erase(V: PN);
2360 InsertedPostIncValues.erase(V: PN);
2361 PN->eraseFromParent();
2362 }
2363
2364 return User->getOperand(i: 0);
2365}
2366
2367namespace {
2368// Search for a SCEV subexpression that is not safe to expand. Any expression
2369// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2370// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2371// instruction, but the important thing is that we prove the denominator is
2372// nonzero before expansion.
2373//
2374// IVUsers already checks that IV-derived expressions are safe. So this check is
2375// only needed when the expression includes some subexpression that is not IV
2376// derived.
2377//
2378// Currently, we only allow division by a value provably non-zero here.
2379//
2380// We cannot generally expand recurrences unless the step dominates the loop
2381// header. The expander handles the special case of affine recurrences by
2382// scaling the recurrence outside the loop, but this technique isn't generally
2383// applicable. Expanding a nested recurrence outside a loop requires computing
2384// binomial coefficients. This could be done, but the recurrence has to be in a
2385// perfectly reduced form, which can't be guaranteed.
2386struct SCEVFindUnsafe {
2387 ScalarEvolution &SE;
2388 bool CanonicalMode;
2389 bool IsUnsafe = false;
2390
2391 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2392 : SE(SE), CanonicalMode(CanonicalMode) {}
2393
2394 bool follow(const SCEV *S) {
2395 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2396 if (!SE.isKnownNonZero(S: D->getRHS())) {
2397 IsUnsafe = true;
2398 return false;
2399 }
2400 }
2401 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2402 // For non-affine addrecs or in non-canonical mode we need a preheader
2403 // to insert into.
2404 if (!AR->getLoop()->getLoopPreheader() &&
2405 (!CanonicalMode || !AR->isAffine())) {
2406 IsUnsafe = true;
2407 return false;
2408 }
2409 }
2410 return true;
2411 }
2412 bool isDone() const { return IsUnsafe; }
2413};
2414} // namespace
2415
2416bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
2417 SCEVFindUnsafe Search(SE, CanonicalMode);
2418 visitAll(Root: S, Visitor&: Search);
2419 return !Search.IsUnsafe;
2420}
2421
2422bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2423 const Instruction *InsertionPoint) const {
2424 if (!isSafeToExpand(S))
2425 return false;
2426 // We have to prove that the expanded site of S dominates InsertionPoint.
2427 // This is easy when not in the same block, but hard when S is an instruction
2428 // to be expanded somewhere inside the same block as our insertion point.
2429 // What we really need here is something analogous to an OrderedBasicBlock,
2430 // but for the moment, we paper over the problem by handling two common and
2431 // cheap to check cases.
2432 if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2433 return true;
2434 if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2435 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2436 return true;
2437 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2438 if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2439 return true;
2440 }
2441 return false;
2442}
2443
2444void SCEVExpanderCleaner::cleanup() {
2445 // Result is used, nothing to remove.
2446 if (ResultUsed)
2447 return;
2448
2449 // Restore original poison flags.
2450 for (auto [I, Flags] : Expander.OrigFlags)
2451 Flags.apply(I);
2452
2453 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2454#ifndef NDEBUG
2455 SmallPtrSet<Instruction *, 8> InsertedSet(llvm::from_range,
2456 InsertedInstructions);
2457 (void)InsertedSet;
2458#endif
2459 // Remove sets with value handles.
2460 Expander.clear();
2461
2462 // Remove all inserted instructions.
2463 for (Instruction *I : reverse(C&: InsertedInstructions)) {
2464#ifndef NDEBUG
2465 assert(all_of(I->users(),
2466 [&InsertedSet](Value *U) {
2467 return InsertedSet.contains(cast<Instruction>(U));
2468 }) &&
2469 "removed instruction should only be used by instructions inserted "
2470 "during expansion");
2471#endif
2472 assert(!I->getType()->isVoidTy() &&
2473 "inserted instruction should have non-void types");
2474 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2475 I->eraseFromParent();
2476 }
2477}
2478