1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Transforms/Utils/LoopUtils.h"
31
32#if LLVM_ENABLE_ABI_BREAKING_CHECKS
33#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
34#else
35#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
36#endif
37
38using namespace llvm;
39
40cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
41 "scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: 4),
42 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
43 "controls the budget that is considered cheap (default = 4)"));
44
45using namespace PatternMatch;
46using namespace SCEVPatternMatch;
47
48PoisonFlags::PoisonFlags(const Instruction *I) {
49 NUW = false;
50 NSW = false;
51 Exact = false;
52 Disjoint = false;
53 NNeg = false;
54 SameSign = false;
55 GEPNW = GEPNoWrapFlags::none();
56 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
57 NUW = OBO->hasNoUnsignedWrap();
58 NSW = OBO->hasNoSignedWrap();
59 }
60 if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
61 Exact = PEO->isExact();
62 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
63 Disjoint = PDI->isDisjoint();
64 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
65 NNeg = PNI->hasNonNeg();
66 if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
67 NUW = TI->hasNoUnsignedWrap();
68 NSW = TI->hasNoSignedWrap();
69 }
70 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
71 GEPNW = GEP->getNoWrapFlags();
72 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
73 SameSign = ICmp->hasSameSign();
74}
75
76void PoisonFlags::apply(Instruction *I) {
77 if (isa<OverflowingBinaryOperator>(Val: I)) {
78 I->setHasNoUnsignedWrap(NUW);
79 I->setHasNoSignedWrap(NSW);
80 }
81 if (isa<PossiblyExactOperator>(Val: I))
82 I->setIsExact(Exact);
83 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
84 PDI->setIsDisjoint(Disjoint);
85 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
86 PNI->setNonNeg(NNeg);
87 if (isa<TruncInst>(Val: I)) {
88 I->setHasNoUnsignedWrap(NUW);
89 I->setHasNoSignedWrap(NSW);
90 }
91 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
92 GEP->setNoWrapFlags(GEPNW);
93 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
94 ICmp->setSameSign(SameSign);
95}
96
97/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
98/// reusing an existing cast if a suitable one (= dominating IP) exists, or
99/// creating a new one.
100Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
101 Instruction::CastOps Op,
102 BasicBlock::iterator IP) {
103 // This function must be called with the builder having a valid insertion
104 // point. It doesn't need to be the actual IP where the uses of the returned
105 // cast will be added, but it must dominate such IP.
106 // We use this precondition to produce a cast that will dominate all its
107 // uses. In particular, this is crucial for the case where the builder's
108 // insertion point *is* the point where we were asked to put the cast.
109 // Since we don't know the builder's insertion point is actually
110 // where the uses will be added (only that it dominates it), we are
111 // not allowed to move it.
112 BasicBlock::iterator BIP = Builder.GetInsertPoint();
113
114 Value *Ret = nullptr;
115
116 if (!isa<Constant>(Val: V)) {
117 // Check to see if there is already a cast!
118 for (User *U : V->users()) {
119 if (U->getType() != Ty)
120 continue;
121 CastInst *CI = dyn_cast<CastInst>(Val: U);
122 if (!CI || CI->getOpcode() != Op)
123 continue;
124
125 // Found a suitable cast that is at IP or comes before IP. Use it. Note
126 // that the cast must also properly dominate the Builder's insertion
127 // point.
128 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
129 (&*IP == CI || CI->comesBefore(Other: &*IP))) {
130 Ret = CI;
131 break;
132 }
133 }
134 }
135
136 // Create a new cast.
137 if (!Ret) {
138 SCEVInsertPointGuard Guard(Builder, this);
139 Builder.SetInsertPoint(&*IP);
140 Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
141 }
142
143 // We assert at the end of the function since IP might point to an
144 // instruction with different dominance properties than a cast
145 // (an invoke for example) and not dominate BIP (but the cast does).
146 assert(!isa<Instruction>(Ret) ||
147 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
148
149 return Ret;
150}
151
152BasicBlock::iterator
153SCEVExpander::findInsertPointAfter(Instruction *I,
154 Instruction *MustDominate) const {
155 BasicBlock::iterator IP = ++I->getIterator();
156 if (auto *II = dyn_cast<InvokeInst>(Val: I))
157 IP = II->getNormalDest()->begin();
158
159 while (isa<PHINode>(Val: IP))
160 ++IP;
161
162 if (isa<FuncletPadInst>(Val: IP) || isa<LandingPadInst>(Val: IP)) {
163 ++IP;
164 } else if (isa<CatchSwitchInst>(Val: IP)) {
165 IP = MustDominate->getParent()->getFirstInsertionPt();
166 } else {
167 assert(!IP->isEHPad() && "unexpected eh pad!");
168 }
169
170 // Adjust insert point to be after instructions inserted by the expander, so
171 // we can re-use already inserted instructions. Avoid skipping past the
172 // original \p MustDominate, in case it is an inserted instruction.
173 while (isInsertedInstruction(I: &*IP) && &*IP != MustDominate)
174 ++IP;
175
176 return IP;
177}
178
179void SCEVExpander::eraseDeadInstructions(Value *Root) {
180 SmallVector<Value *> WorkList;
181 SmallPtrSet<Value *, 8> DeletedValues;
182 append_range(C&: WorkList, R: getAllInsertedInstructions());
183 while (!WorkList.empty()) {
184 Value *V = WorkList.pop_back_val();
185 if (DeletedValues.contains(Ptr: V))
186 continue;
187 auto *I = dyn_cast<Instruction>(Val: V);
188 if (!I || I == Root || !isInsertedInstruction(I) ||
189 !isInstructionTriviallyDead(I))
190 continue;
191 append_range(C&: WorkList, R: I->operands());
192 InsertedValues.erase(V: I);
193 InsertedPostIncValues.erase(V: I);
194 DeletedValues.insert(Ptr: I);
195 I->eraseFromParent();
196 }
197}
198
199BasicBlock::iterator
200SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
201 // Cast the argument at the beginning of the entry block, after
202 // any bitcasts of other arguments.
203 if (Argument *A = dyn_cast<Argument>(Val: V)) {
204 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
205 while ((isa<BitCastInst>(Val: IP) &&
206 isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0)) &&
207 cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0) != A))
208 ++IP;
209 return IP;
210 }
211
212 // Cast the instruction immediately after the instruction.
213 if (Instruction *I = dyn_cast<Instruction>(Val: V))
214 return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
215
216 // Otherwise, this must be some kind of a constant,
217 // so let's plop this cast into the function's entry block.
218 assert(isa<Constant>(V) &&
219 "Expected the cast argument to be a global/constant");
220 return Builder.GetInsertBlock()
221 ->getParent()
222 ->getEntryBlock()
223 .getFirstInsertionPt();
224}
225
226/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
227/// which must be possible with a noop cast, doing what we can to share
228/// the casts.
229Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
230 Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
231 assert((Op == Instruction::BitCast ||
232 Op == Instruction::PtrToInt ||
233 Op == Instruction::IntToPtr) &&
234 "InsertNoopCastOfTo cannot perform non-noop casts!");
235 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
236 "InsertNoopCastOfTo cannot change sizes!");
237
238 // inttoptr only works for integral pointers. For non-integral pointers, we
239 // can create a GEP on null with the integral value as index. Note that
240 // it is safe to use GEP of null instead of inttoptr here, because only
241 // expressions already based on a GEP of null should be converted to pointers
242 // during expansion.
243 if (Op == Instruction::IntToPtr) {
244 auto *PtrTy = cast<PointerType>(Val: Ty);
245 if (DL.isNonIntegralPointerType(PT: PtrTy))
246 return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
247 }
248 // Short-circuit unnecessary bitcasts.
249 if (Op == Instruction::BitCast) {
250 if (V->getType() == Ty)
251 return V;
252 if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
253 if (CI->getOperand(i_nocapture: 0)->getType() == Ty)
254 return CI->getOperand(i_nocapture: 0);
255 }
256 }
257 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
258 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
259 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
260 if (CastInst *CI = dyn_cast<CastInst>(Val: V))
261 if ((CI->getOpcode() == Instruction::PtrToInt ||
262 CI->getOpcode() == Instruction::IntToPtr) &&
263 SE.getTypeSizeInBits(Ty: CI->getType()) ==
264 SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: 0)->getType()))
265 return CI->getOperand(i_nocapture: 0);
266 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
267 if ((CE->getOpcode() == Instruction::PtrToInt ||
268 CE->getOpcode() == Instruction::IntToPtr) &&
269 SE.getTypeSizeInBits(Ty: CE->getType()) ==
270 SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: 0)->getType()))
271 return CE->getOperand(i_nocapture: 0);
272 }
273
274 // Fold a cast of a constant.
275 if (Constant *C = dyn_cast<Constant>(Val: V))
276 return ConstantExpr::getCast(ops: Op, C, Ty);
277
278 // Try to reuse existing cast, or insert one.
279 return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
280}
281
282/// InsertBinop - Insert the specified binary operator, doing a small amount
283/// of work to avoid inserting an obviously redundant operation, and hoisting
284/// to an outer loop when the opportunity is there and it is safe.
285Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
286 Value *LHS, Value *RHS,
287 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
288 // Fold a binop with constant operands.
289 if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
290 if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
291 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
292 return Res;
293
294 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
295 unsigned ScanLimit = 6;
296 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
297 // Scanning starts from the last instruction before the insertion point.
298 BasicBlock::iterator IP = Builder.GetInsertPoint();
299 if (IP != BlockBegin) {
300 --IP;
301 for (; ScanLimit; --IP, --ScanLimit) {
302 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
303 // Ensure that no-wrap flags match.
304 if (isa<OverflowingBinaryOperator>(Val: I)) {
305 if (I->hasNoSignedWrap() != any(Val: Flags & SCEV::FlagNSW))
306 return true;
307 if (I->hasNoUnsignedWrap() != any(Val: Flags & SCEV::FlagNUW))
308 return true;
309 }
310 // Conservatively, do not use any instruction which has any of exact
311 // flags installed.
312 if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
313 return true;
314 return false;
315 };
316 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(i: 0) == LHS &&
317 IP->getOperand(i: 1) == RHS && !canGenerateIncompatiblePoison(&*IP))
318 return &*IP;
319 if (IP == BlockBegin) break;
320 }
321 }
322
323 // Save the original insertion point so we can restore it when we're done.
324 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
325 SCEVInsertPointGuard Guard(Builder, this);
326
327 if (IsSafeToHoist) {
328 // Move the insertion point out of as many loops as we can.
329 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
330 if (!L->isLoopInvariant(V: LHS) || !L->isLoopInvariant(V: RHS)) break;
331 BasicBlock *Preheader = L->getLoopPreheader();
332 if (!Preheader) break;
333
334 // Ok, move up a level.
335 Builder.SetInsertPoint(Preheader->getTerminator());
336 }
337 }
338
339 // If we haven't found this binop, insert it.
340 // TODO: Use the Builder, which will make CreateBinOp below fold with
341 // InstSimplifyFolder.
342 Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
343 BO->setDebugLoc(Loc);
344 if (any(Val: Flags & SCEV::FlagNUW))
345 BO->setHasNoUnsignedWrap();
346 if (any(Val: Flags & SCEV::FlagNSW))
347 BO->setHasNoSignedWrap();
348
349 return BO;
350}
351
352/// expandAddToGEP - Expand an addition expression with a pointer type into
353/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
354/// BasicAliasAnalysis and other passes analyze the result. See the rules
355/// for getelementptr vs. inttoptr in
356/// http://llvm.org/docs/LangRef.html#pointeraliasing
357/// for details.
358///
359/// Design note: The correctness of using getelementptr here depends on
360/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
361/// they may introduce pointer arithmetic which may not be safely converted
362/// into getelementptr.
363///
364/// Design note: It might seem desirable for this function to be more
365/// loop-aware. If some of the indices are loop-invariant while others
366/// aren't, it might seem desirable to emit multiple GEPs, keeping the
367/// loop-invariant portions of the overall computation outside the loop.
368/// However, there are a few reasons this is not done here. Hoisting simple
369/// arithmetic is a low-level optimization that often isn't very
370/// important until late in the optimization process. In fact, passes
371/// like InstructionCombining will combine GEPs, even if it means
372/// pushing loop-invariant computation down into loops, so even if the
373/// GEPs were split here, the work would quickly be undone. The
374/// LoopStrengthReduction pass, which is usually run quite late (and
375/// after the last InstructionCombining pass), takes care of hoisting
376/// loop-invariant portions of expressions, after considering what
377/// can be folded using target addressing modes.
378///
379Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V,
380 SCEV::NoWrapFlags Flags) {
381 assert(!isa<Instruction>(V) ||
382 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
383
384 Value *Idx = expand(S: Offset);
385 GEPNoWrapFlags NW = any(Val: Flags & SCEV::FlagNUW)
386 ? GEPNoWrapFlags::noUnsignedWrap()
387 : GEPNoWrapFlags::none();
388
389 // Fold a GEP with constant operands.
390 if (Constant *CLHS = dyn_cast<Constant>(Val: V))
391 if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
392 return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS, Name: "", NW);
393
394 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
395 unsigned ScanLimit = 6;
396 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
397 // Scanning starts from the last instruction before the insertion point.
398 BasicBlock::iterator IP = Builder.GetInsertPoint();
399 if (IP != BlockBegin) {
400 --IP;
401 for (; ScanLimit; --IP, --ScanLimit) {
402 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val&: IP)) {
403 if (GEP->getPointerOperand() == V &&
404 GEP->getSourceElementType() == Builder.getInt8Ty() &&
405 GEP->getOperand(i_nocapture: 1) == Idx) {
406 rememberFlags(I: GEP);
407 GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
408 return &*IP;
409 }
410 }
411 if (IP == BlockBegin) break;
412 }
413 }
414
415 // Save the original insertion point so we can restore it when we're done.
416 SCEVInsertPointGuard Guard(Builder, this);
417
418 // Move the insertion point out of as many loops as we can.
419 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
420 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(V: Idx)) break;
421 BasicBlock *Preheader = L->getLoopPreheader();
422 if (!Preheader) break;
423
424 // Ok, move up a level.
425 Builder.SetInsertPoint(Preheader->getTerminator());
426 }
427
428 // Emit a GEP.
429 return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep", NW);
430}
431
432/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
433/// SCEV expansion. If they are nested, this is the most nested. If they are
434/// neighboring, pick the later.
435static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
436 DominatorTree &DT) {
437 if (!A) return B;
438 if (!B) return A;
439 if (A->contains(L: B)) return B;
440 if (B->contains(L: A)) return A;
441 if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
442 if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
443 return A; // Arbitrarily break the tie.
444}
445
446/// getRelevantLoop - Get the most relevant loop associated with the given
447/// expression, according to PickMostRelevantLoop.
448const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
449 // Test whether we've already computed the most relevant loop for this SCEV.
450 auto Pair = RelevantLoops.try_emplace(Key: S);
451 if (!Pair.second)
452 return Pair.first->second;
453
454 switch (S->getSCEVType()) {
455 case scConstant:
456 case scVScale:
457 return nullptr; // A constant has no relevant loops.
458 case scTruncate:
459 case scZeroExtend:
460 case scSignExtend:
461 case scPtrToAddr:
462 case scPtrToInt:
463 case scAddExpr:
464 case scMulExpr:
465 case scUDivExpr:
466 case scAddRecExpr:
467 case scUMaxExpr:
468 case scSMaxExpr:
469 case scUMinExpr:
470 case scSMinExpr:
471 case scSequentialUMinExpr: {
472 const Loop *L = nullptr;
473 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
474 L = AR->getLoop();
475 for (const SCEV *Op : S->operands())
476 L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
477 return RelevantLoops[S] = L;
478 }
479 case scUnknown: {
480 const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
481 if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
482 return Pair.first->second = SE.LI.getLoopFor(BB: I->getParent());
483 // A non-instruction has no relevant loops.
484 return nullptr;
485 }
486 case scCouldNotCompute:
487 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
488 }
489 llvm_unreachable("Unexpected SCEV type!");
490}
491
492namespace {
493
494/// LoopCompare - Compare loops by PickMostRelevantLoop.
495class LoopCompare {
496 DominatorTree &DT;
497public:
498 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
499
500 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
501 std::pair<const Loop *, const SCEV *> RHS) const {
502 // Keep pointer operands sorted at the end.
503 if (LHS.second->getType()->isPointerTy() !=
504 RHS.second->getType()->isPointerTy())
505 return LHS.second->getType()->isPointerTy();
506
507 // Compare loops with PickMostRelevantLoop.
508 if (LHS.first != RHS.first)
509 return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
510
511 // If one operand is a non-constant negative and the other is not,
512 // put the non-constant negative on the right so that a sub can
513 // be used instead of a negate and add.
514 if (LHS.second->isNonConstantNegative()) {
515 if (!RHS.second->isNonConstantNegative())
516 return false;
517 } else if (RHS.second->isNonConstantNegative())
518 return true;
519
520 // Otherwise they are equivalent according to this comparison.
521 return false;
522 }
523};
524
525}
526
527Value *SCEVExpander::visitAddExpr(SCEVUseT<const SCEVAddExpr *> S) {
528 // Recognize the canonical representation of an unsimplifed urem.
529 const SCEV *URemLHS = nullptr;
530 const SCEV *URemRHS = nullptr;
531 if (match(U: S, P: m_scev_URem(LHS: m_SCEV(V&: URemLHS), RHS: m_SCEV(V&: URemRHS), SE))) {
532 Value *LHS = expand(S: URemLHS);
533 Value *RHS = expand(S: URemRHS);
534 return InsertBinop(Opcode: Instruction::URem, LHS, RHS, Flags: SCEV::FlagAnyWrap,
535 /*IsSafeToHoist*/ false);
536 }
537
538 // Collect all the add operands in a loop, along with their associated loops.
539 // Iterate in reverse so that constants are emitted last, all else equal, and
540 // so that pointer operands are inserted first, which the code below relies on
541 // to form more involved GEPs.
542 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
543 for (const SCEV *Op : reverse(C: S->operands()))
544 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
545
546 // Sort by loop. Use a stable sort so that constants follow non-constants and
547 // pointer operands precede non-pointer operands.
548 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
549
550 // Emit instructions to add all the operands. Hoist as much as possible
551 // out of loops, and form meaningful getelementptrs where possible.
552 Value *Sum = nullptr;
553 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
554 const Loop *CurLoop = I->first;
555 const SCEV *Op = I->second;
556 if (!Sum) {
557 // This is the first operand. Just expand it.
558 Sum = expand(S: Op);
559 ++I;
560 continue;
561 }
562
563 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
564 if (isa<PointerType>(Val: Sum->getType())) {
565 // The running sum expression is a pointer. Try to form a getelementptr
566 // at this level with that as the base.
567 SmallVector<SCEVUse, 4> NewOps;
568 for (; I != E && I->first == CurLoop; ++I) {
569 // If the operand is SCEVUnknown and not instructions, peek through
570 // it, to enable more of it to be folded into the GEP.
571 const SCEV *X = I->second;
572 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
573 if (!isa<Instruction>(Val: U->getValue()))
574 X = SE.getSCEV(V: U->getValue());
575 NewOps.push_back(Elt: X);
576 }
577 Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum, Flags: S.getNoWrapFlags());
578 } else if (Op->isNonConstantNegative()) {
579 // Instead of doing a negate and add, just do a subtract.
580 Value *W = expand(S: SE.getNegativeSCEV(V: Op));
581 Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
582 /*IsSafeToHoist*/ true);
583 ++I;
584 } else {
585 // A simple add.
586 Value *W = expand(S: Op);
587 // Canonicalize a constant to the RHS.
588 if (isa<Constant>(Val: Sum))
589 std::swap(a&: Sum, b&: W);
590 Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S.getNoWrapFlags(),
591 /*IsSafeToHoist*/ true);
592 ++I;
593 }
594 }
595
596 return Sum;
597}
598
599Value *SCEVExpander::visitMulExpr(SCEVUseT<const SCEVMulExpr *> S) {
600 Type *Ty = S->getType();
601
602 // Collect all the mul operands in a loop, along with their associated loops.
603 // Iterate in reverse so that constants are emitted last, all else equal.
604 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
605 for (const SCEV *Op : reverse(C: S->operands()))
606 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
607
608 // Sort by loop. Use a stable sort so that constants follow non-constants.
609 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
610
611 // Emit instructions to mul all the operands. Hoist as much as possible
612 // out of loops.
613 Value *Prod = nullptr;
614 auto I = OpsAndLoops.begin();
615
616 // Expand the calculation of X pow N in the following manner:
617 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
618 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
619 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
620 auto E = I;
621 // Calculate how many times the same operand from the same loop is included
622 // into this power.
623 uint64_t Exponent = 0;
624 const uint64_t MaxExponent = UINT64_MAX >> 1;
625 // No one sane will ever try to calculate such huge exponents, but if we
626 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
627 // below when the power of 2 exceeds our Exponent, and we want it to be
628 // 1u << 31 at most to not deal with unsigned overflow.
629 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
630 ++Exponent;
631 ++E;
632 }
633 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
634
635 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
636 // that are needed into the result.
637 Value *P = expand(S: I->second);
638 Value *Result = nullptr;
639 if (Exponent & 1)
640 Result = P;
641 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
642 P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
643 /*IsSafeToHoist*/ true);
644 if (Exponent & BinExp)
645 Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
646 Flags: SCEV::FlagAnyWrap,
647 /*IsSafeToHoist*/ true)
648 : P;
649 }
650
651 I = E;
652 assert(Result && "Nothing was expanded?");
653 return Result;
654 };
655
656 while (I != OpsAndLoops.end()) {
657 if (!Prod) {
658 // This is the first operand. Just expand it.
659 Prod = ExpandOpBinPowN();
660 } else if (I->second->isAllOnesValue()) {
661 // Instead of doing a multiply by negative one, just do a negate.
662 Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
663 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
664 ++I;
665 } else {
666 // A simple mul.
667 Value *W = ExpandOpBinPowN();
668 // Canonicalize a constant to the RHS.
669 if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
670 const APInt *RHS;
671 if (match(V: W, P: m_Power2(V&: RHS))) {
672 // Canonicalize Prod*(1<<C) to Prod<<C.
673 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
674 auto NWFlags = S.getNoWrapFlags();
675 // clear nsw flag if shl will produce poison value.
676 if (RHS->logBase2() == RHS->getBitWidth() - 1)
677 NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
678 Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
679 RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
680 /*IsSafeToHoist*/ true);
681 } else {
682 Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S.getNoWrapFlags(),
683 /*IsSafeToHoist*/ true);
684 }
685 }
686 }
687
688 return Prod;
689}
690
691Value *SCEVExpander::visitUDivExpr(SCEVUseT<const SCEVUDivExpr *> S) {
692 Value *LHS = expand(S: S->getLHS());
693 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
694 const APInt &RHS = SC->getAPInt();
695 if (RHS.isPowerOf2())
696 return InsertBinop(Opcode: Instruction::LShr, LHS,
697 RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
698 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
699 }
700
701 const SCEV *RHSExpr = S->getRHS();
702 Value *RHS = expand(S: RHSExpr);
703 if (SafeUDivMode) {
704 bool GuaranteedNotPoison =
705 ScalarEvolution::isGuaranteedNotToBePoison(Op: RHSExpr);
706 if (!GuaranteedNotPoison)
707 RHS = Builder.CreateFreeze(V: RHS);
708
709 // We need an umax if either RHSExpr is not known to be zero, or if it is
710 // not guaranteed to be non-poison. In the later case, the frozen poison may
711 // be 0.
712 if (!SE.isKnownNonZero(S: RHSExpr) || !GuaranteedNotPoison)
713 RHS = Builder.CreateIntrinsic(RetTy: RHS->getType(), ID: Intrinsic::umax,
714 Args: {RHS, ConstantInt::get(Ty: RHS->getType(), V: 1)});
715 }
716 return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
717 /*IsSafeToHoist*/ SE.isKnownNonZero(S: S->getRHS()));
718}
719
720/// Determine if this is a well-behaved chain of instructions leading back to
721/// the PHI. If so, it may be reused by expanded expressions.
722bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
723 const Loop *L) {
724 if (IncV->getNumOperands() == 0 || isa<PHINode>(Val: IncV) ||
725 (isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
726 return false;
727 // If any of the operands don't dominate the insert position, bail.
728 // Addrec operands are always loop-invariant, so this can only happen
729 // if there are instructions which haven't been hoisted.
730 if (L == IVIncInsertLoop) {
731 for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
732 if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
733 if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
734 return false;
735 }
736 // Advance to the next instruction.
737 IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
738 if (!IncV)
739 return false;
740
741 if (IncV->mayHaveSideEffects())
742 return false;
743
744 if (IncV == PN)
745 return true;
746
747 return isNormalAddRecExprPHI(PN, IncV, L);
748}
749
750/// getIVIncOperand returns an induction variable increment's induction
751/// variable operand.
752///
753/// If allowScale is set, any type of GEP is allowed as long as the nonIV
754/// operands dominate InsertPos.
755///
756/// If allowScale is not set, ensure that a GEP increment conforms to one of the
757/// simple patterns generated by getAddRecExprPHILiterally and
758/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
759Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
760 Instruction *InsertPos,
761 bool allowScale) {
762 if (IncV == InsertPos)
763 return nullptr;
764
765 switch (IncV->getOpcode()) {
766 default:
767 return nullptr;
768 // Check for a simple Add/Sub or GEP of a loop invariant step.
769 case Instruction::Add:
770 case Instruction::Sub: {
771 Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: 1));
772 if (!OInst || SE.DT.dominates(Def: OInst, User: InsertPos))
773 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
774 return nullptr;
775 }
776 case Instruction::BitCast:
777 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
778 case Instruction::GetElementPtr:
779 for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
780 if (isa<Constant>(Val: U))
781 continue;
782 if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
783 if (!SE.DT.dominates(Def: OInst, User: InsertPos))
784 return nullptr;
785 }
786 if (allowScale) {
787 // allow any kind of GEP as long as it can be hoisted.
788 continue;
789 }
790 // GEPs produced by SCEVExpander use i8 element type.
791 if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: 8))
792 return nullptr;
793 break;
794 }
795 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
796 }
797}
798
799/// If the insert point of the current builder or any of the builders on the
800/// stack of saved builders has 'I' as its insert point, update it to point to
801/// the instruction after 'I'. This is intended to be used when the instruction
802/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
803/// different block, the inconsistent insert point (with a mismatched
804/// Instruction and Block) can lead to an instruction being inserted in a block
805/// other than its parent.
806void SCEVExpander::fixupInsertPoints(Instruction *I) {
807 BasicBlock::iterator It(*I);
808 BasicBlock::iterator NewInsertPt = std::next(x: It);
809 if (Builder.GetInsertPoint() == It)
810 Builder.SetInsertPoint(&*NewInsertPt);
811 for (auto *InsertPtGuard : InsertPointGuards)
812 if (InsertPtGuard->GetInsertPoint() == It)
813 InsertPtGuard->SetInsertPoint(NewInsertPt);
814}
815
816/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
817/// it available to other uses in this loop. Recursively hoist any operands,
818/// until we reach a value that dominates InsertPos.
819bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
820 bool RecomputePoisonFlags) {
821 auto FixupPoisonFlags = [this](Instruction *I) {
822 // Drop flags that are potentially inferred from old context and infer flags
823 // in new context.
824 rememberFlags(I);
825 I->dropPoisonGeneratingFlags();
826 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
827 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
828 auto *BO = cast<BinaryOperator>(Val: I);
829 BO->setHasNoUnsignedWrap(
830 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
831 BO->setHasNoSignedWrap(
832 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
833 }
834 };
835
836 if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
837 if (RecomputePoisonFlags)
838 FixupPoisonFlags(IncV);
839 return true;
840 }
841
842 // InsertPos must itself dominate IncV so that IncV's new position satisfies
843 // its existing users.
844 if (isa<PHINode>(Val: InsertPos) ||
845 !SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
846 return false;
847
848 if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
849 return false;
850
851 // Check that the chain of IV operands leading back to Phi can be hoisted.
852 SmallVector<Instruction*, 4> IVIncs;
853 for(;;) {
854 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
855 if (!Oper)
856 return false;
857 // IncV is safe to hoist.
858 IVIncs.push_back(Elt: IncV);
859 IncV = Oper;
860 if (SE.DT.dominates(Def: IncV, User: InsertPos))
861 break;
862 }
863 for (Instruction *I : llvm::reverse(C&: IVIncs)) {
864 fixupInsertPoints(I);
865 I->moveBefore(InsertPos: InsertPos->getIterator());
866 if (RecomputePoisonFlags)
867 FixupPoisonFlags(I);
868 }
869 return true;
870}
871
872bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
873 PHINode *WidePhi,
874 Instruction *OrigInc,
875 Instruction *WideInc) {
876 return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
877 match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
878 OrigInc->getOpcode() == WideInc->getOpcode();
879}
880
881/// Determine if this cyclic phi is in a form that would have been generated by
882/// LSR. We don't care if the phi was actually expanded in this pass, as long
883/// as it is in a low-cost form, for example, no implied multiplication. This
884/// should match any patterns generated by getAddRecExprPHILiterally and
885/// expandAddtoGEP.
886bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
887 const Loop *L) {
888 for(Instruction *IVOper = IncV;
889 (IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
890 /*allowScale=*/false));) {
891 if (IVOper == PN)
892 return true;
893 }
894 return false;
895}
896
897/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
898/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
899/// need to materialize IV increments elsewhere to handle difficult situations.
900Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
901 bool useSubtract) {
902 Value *IncV;
903 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
904 if (PN->getType()->isPointerTy()) {
905 // TODO: Change name to IVName.iv.next.
906 IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
907 } else {
908 IncV = useSubtract ?
909 Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next") :
910 Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next");
911 }
912 return IncV;
913}
914
915/// Check whether we can cheaply express the requested SCEV in terms of
916/// the available PHI SCEV by truncation and/or inversion of the step.
917static bool canBeCheaplyTransformed(ScalarEvolution &SE,
918 const SCEVAddRecExpr *Phi,
919 const SCEVAddRecExpr *Requested,
920 bool &InvertStep) {
921 // We can't transform to match a pointer PHI.
922 Type *PhiTy = Phi->getType();
923 Type *RequestedTy = Requested->getType();
924 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
925 return false;
926
927 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
928 return false;
929
930 // Try truncate it if necessary.
931 Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
932 if (!Phi)
933 return false;
934
935 // Check whether truncation will help.
936 if (Phi == Requested) {
937 InvertStep = false;
938 return true;
939 }
940
941 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
942 if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
943 InvertStep = true;
944 return true;
945 }
946
947 return false;
948}
949
950static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
951 if (!isa<IntegerType>(Val: AR->getType()))
952 return false;
953
954 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
955 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
956 const SCEV *Step = AR->getStepRecurrence(SE);
957 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
958 RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
959 const SCEV *ExtendAfterOp =
960 SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
961 return ExtendAfterOp == OpAfterExtend;
962}
963
964static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
965 if (!isa<IntegerType>(Val: AR->getType()))
966 return false;
967
968 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
969 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
970 const SCEV *Step = AR->getStepRecurrence(SE);
971 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
972 RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
973 const SCEV *ExtendAfterOp =
974 SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
975 return ExtendAfterOp == OpAfterExtend;
976}
977
978/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
979/// the base addrec, which is the addrec without any non-loop-dominating
980/// values, and return the PHI.
981PHINode *
982SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
983 const Loop *L, Type *&TruncTy,
984 bool &InvertStep) {
985 assert((!IVIncInsertLoop || IVIncInsertPos) &&
986 "Uninitialized insert position");
987
988 // Reuse a previously-inserted PHI, if present.
989 BasicBlock *LatchBlock = L->getLoopLatch();
990 if (LatchBlock) {
991 PHINode *AddRecPhiMatch = nullptr;
992 Instruction *IncV = nullptr;
993 TruncTy = nullptr;
994 InvertStep = false;
995
996 // Only try partially matching scevs that need truncation and/or
997 // step-inversion if we know this loop is outside the current loop.
998 bool TryNonMatchingSCEV =
999 IVIncInsertLoop &&
1000 SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
1001
1002 for (PHINode &PN : L->getHeader()->phis()) {
1003 if (!SE.isSCEVable(Ty: PN.getType()))
1004 continue;
1005
1006 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
1007 // PHI has no meaning at all.
1008 if (!PN.isComplete()) {
1009 SCEV_DEBUG_WITH_TYPE(
1010 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
1011 continue;
1012 }
1013
1014 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
1015 if (!PhiSCEV)
1016 continue;
1017
1018 bool IsMatchingSCEV = PhiSCEV == Normalized;
1019 // We only handle truncation and inversion of phi recurrences for the
1020 // expanded expression if the expanded expression's loop dominates the
1021 // loop we insert to. Check now, so we can bail out early.
1022 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
1023 continue;
1024
1025 // TODO: this possibly can be reworked to avoid this cast at all.
1026 Instruction *TempIncV =
1027 dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
1028 if (!TempIncV)
1029 continue;
1030
1031 // Check whether we can reuse this PHI node.
1032 if (LSRMode) {
1033 if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1034 continue;
1035 } else {
1036 if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1037 continue;
1038 }
1039
1040 // Stop if we have found an exact match SCEV.
1041 if (IsMatchingSCEV) {
1042 IncV = TempIncV;
1043 TruncTy = nullptr;
1044 InvertStep = false;
1045 AddRecPhiMatch = &PN;
1046 break;
1047 }
1048
1049 // Try whether the phi can be translated into the requested form
1050 // (truncated and/or offset by a constant).
1051 if ((!TruncTy || InvertStep) &&
1052 canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
1053 // Record the phi node. But don't stop we might find an exact match
1054 // later.
1055 AddRecPhiMatch = &PN;
1056 IncV = TempIncV;
1057 TruncTy = Normalized->getType();
1058 }
1059 }
1060
1061 if (AddRecPhiMatch) {
1062 // Ok, the add recurrence looks usable.
1063 // Remember this PHI, even in post-inc mode.
1064 InsertedValues.insert(V: AddRecPhiMatch);
1065 // Remember the increment.
1066 rememberInstruction(I: IncV);
1067 // Those values were not actually inserted but re-used.
1068 ReusedValues.insert(Ptr: AddRecPhiMatch);
1069 ReusedValues.insert(Ptr: IncV);
1070 return AddRecPhiMatch;
1071 }
1072 }
1073
1074 // Save the original insertion point so we can restore it when we're done.
1075 SCEVInsertPointGuard Guard(Builder, this);
1076
1077 // Another AddRec may need to be recursively expanded below. For example, if
1078 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1079 // loop. Remove this loop from the PostIncLoops set before expanding such
1080 // AddRecs. Otherwise, we cannot find a valid position for the step
1081 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1082 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1083 // so it's not worth implementing SmallPtrSet::swap.
1084 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1085 PostIncLoops.clear();
1086
1087 // Expand code for the start value into the loop preheader.
1088 assert(L->getLoopPreheader() &&
1089 "Can't expand add recurrences without a loop preheader!");
1090 Value *StartV =
1091 expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1092
1093 // StartV must have been be inserted into L's preheader to dominate the new
1094 // phi.
1095 assert(!isa<Instruction>(StartV) ||
1096 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1097 L->getHeader()));
1098
1099 // Expand code for the step value. Do this before creating the PHI so that PHI
1100 // reuse code doesn't see an incomplete PHI.
1101 const SCEV *Step = Normalized->getStepRecurrence(SE);
1102 Type *ExpandTy = Normalized->getType();
1103 // If the stride is negative, insert a sub instead of an add for the increment
1104 // (unless it's a constant, because subtracts of constants are canonicalized
1105 // to adds).
1106 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1107 if (useSubtract)
1108 Step = SE.getNegativeSCEV(V: Step);
1109 // Expand the step somewhere that dominates the loop header.
1110 Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1111
1112 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1113 // we actually do emit an addition. It does not apply if we emit a
1114 // subtraction.
1115 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1116 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1117
1118 // Create the PHI.
1119 BasicBlock *Header = L->getHeader();
1120 Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1121 PHINode *PN =
1122 Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine(IVName) + ".iv");
1123
1124 // Create the step instructions and populate the PHI.
1125 for (BasicBlock *Pred : predecessors(BB: Header)) {
1126 // Add a start value.
1127 if (!L->contains(BB: Pred)) {
1128 PN->addIncoming(V: StartV, BB: Pred);
1129 continue;
1130 }
1131
1132 // Create a step value and add it to the PHI.
1133 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1134 // instructions at IVIncInsertPos.
1135 Instruction *InsertPos = L == IVIncInsertLoop ?
1136 IVIncInsertPos : Pred->getTerminator();
1137 Builder.SetInsertPoint(InsertPos);
1138 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1139
1140 if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1141 if (IncrementIsNUW)
1142 cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1143 if (IncrementIsNSW)
1144 cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1145 }
1146 PN->addIncoming(V: IncV, BB: Pred);
1147 }
1148
1149 // After expanding subexpressions, restore the PostIncLoops set so the caller
1150 // can ensure that IVIncrement dominates the current uses.
1151 PostIncLoops = SavedPostIncLoops;
1152
1153 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1154 // effective when we are able to use an IV inserted here, so record it.
1155 InsertedValues.insert(V: PN);
1156 InsertedIVs.push_back(Elt: PN);
1157 return PN;
1158}
1159
1160Value *
1161SCEVExpander::expandAddRecExprLiterally(SCEVUseT<const SCEVAddRecExpr *> S) {
1162 const Loop *L = S->getLoop();
1163
1164 // Determine a normalized form of this expression, which is the expression
1165 // before any post-inc adjustment is made.
1166 const SCEVAddRecExpr *Normalized = S;
1167 if (PostIncLoops.count(Ptr: L)) {
1168 PostIncLoopSet Loops;
1169 Loops.insert(Ptr: L);
1170 Normalized = cast<SCEVAddRecExpr>(
1171 Val: normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1172 }
1173
1174 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1175 const SCEV *Step = Normalized->getStepRecurrence(SE);
1176 assert(SE.properlyDominates(Start, L->getHeader()) &&
1177 "Start does not properly dominate loop header");
1178 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1179
1180 // In some cases, we decide to reuse an existing phi node but need to truncate
1181 // it and/or invert the step.
1182 Type *TruncTy = nullptr;
1183 bool InvertStep = false;
1184 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1185
1186 // Accommodate post-inc mode, if necessary.
1187 Value *Result;
1188 if (!PostIncLoops.count(Ptr: L))
1189 Result = PN;
1190 else {
1191 // In PostInc mode, use the post-incremented value.
1192 BasicBlock *LatchBlock = L->getLoopLatch();
1193 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1194 Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1195
1196 // We might be introducing a new use of the post-inc IV that is not poison
1197 // safe, in which case we should drop poison generating flags. Only keep
1198 // those flags for which SCEV has proven that they always hold.
1199 if (isa<OverflowingBinaryOperator>(Val: Result)) {
1200 auto *I = cast<Instruction>(Val: Result);
1201 if (!S->hasNoUnsignedWrap())
1202 I->setHasNoUnsignedWrap(false);
1203 if (!S->hasNoSignedWrap())
1204 I->setHasNoSignedWrap(false);
1205 }
1206
1207 // For an expansion to use the postinc form, the client must call
1208 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1209 // or dominated by IVIncInsertPos.
1210 if (isa<Instruction>(Val: Result) &&
1211 !SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1212 User: &*Builder.GetInsertPoint())) {
1213 // The induction variable's postinc expansion does not dominate this use.
1214 // IVUsers tries to prevent this case, so it is rare. However, it can
1215 // happen when an IVUser outside the loop is not dominated by the latch
1216 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1217 // all cases. Consider a phi outside whose operand is replaced during
1218 // expansion with the value of the postinc user. Without fundamentally
1219 // changing the way postinc users are tracked, the only remedy is
1220 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1221 // but hopefully expandCodeFor handles that.
1222 bool useSubtract =
1223 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1224 if (useSubtract)
1225 Step = SE.getNegativeSCEV(V: Step);
1226 Value *StepV;
1227 {
1228 // Expand the step somewhere that dominates the loop header.
1229 SCEVInsertPointGuard Guard(Builder, this);
1230 StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1231 }
1232 Result = expandIVInc(PN, StepV, L, useSubtract);
1233 }
1234 }
1235
1236 // We have decided to reuse an induction variable of a dominating loop. Apply
1237 // truncation and/or inversion of the step.
1238 if (TruncTy) {
1239 // Truncate the result.
1240 if (TruncTy != Result->getType())
1241 Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1242
1243 // Invert the result.
1244 if (InvertStep)
1245 Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1246 }
1247
1248 return Result;
1249}
1250
1251Value *SCEVExpander::tryToReuseLCSSAPhi(SCEVUseT<const SCEVAddRecExpr *> S) {
1252 Type *STy = S->getType();
1253 const Loop *L = S->getLoop();
1254 BasicBlock *EB = L->getExitBlock();
1255 if (!EB || !EB->getSinglePredecessor() ||
1256 !SE.DT.dominates(A: EB, B: Builder.GetInsertBlock()))
1257 return nullptr;
1258
1259 // Helper to check if the diff between S and ExitSCEV is simple enough to
1260 // allow reusing the LCSSA phi.
1261 auto CanReuse = [&](const SCEV *ExitSCEV) -> const SCEV * {
1262 if (isa<SCEVCouldNotCompute>(Val: ExitSCEV))
1263 return nullptr;
1264 const SCEV *Diff = SE.getMinusSCEV(LHS: S, RHS: ExitSCEV);
1265 const SCEV *Op = Diff;
1266 match(S: Op, P: m_scev_Add(Op0: m_SCEVConstant(), Op1: m_SCEV(V&: Op)));
1267 match(S: Op, P: m_scev_Mul(Op0: m_scev_AllOnes(), Op1: m_SCEV(V&: Op)));
1268 match(S: Op, P: m_scev_PtrToAddr(Op0: m_SCEV(V&: Op))) ||
1269 match(S: Op, P: m_scev_PtrToInt(Op0: m_SCEV(V&: Op)));
1270 if (!isa<SCEVConstant, SCEVUnknown>(Val: Op))
1271 return nullptr;
1272 return Diff;
1273 };
1274
1275 for (auto &PN : EB->phis()) {
1276 if (!SE.isSCEVable(Ty: PN.getType()))
1277 continue;
1278 auto *ExitSCEV = SE.getSCEV(V: &PN);
1279 if (!isa<SCEVAddRecExpr>(Val: ExitSCEV))
1280 continue;
1281 Type *PhiTy = PN.getType();
1282 const SCEV *Diff = nullptr;
1283 if (STy->isIntegerTy() && PhiTy->isPointerTy() &&
1284 DL.getAddressType(PtrTy: PhiTy) == STy) {
1285 // Prefer ptrtoaddr over ptrtoint.
1286 const SCEV *AddrSCEV = SE.getPtrToAddrExpr(Op: ExitSCEV);
1287 Diff = CanReuse(AddrSCEV);
1288 if (!Diff) {
1289 const SCEV *IntSCEV = SE.getPtrToIntExpr(Op: ExitSCEV, Ty: STy);
1290 Diff = CanReuse(IntSCEV);
1291 }
1292 } else if (STy == PhiTy) {
1293 Diff = CanReuse(ExitSCEV);
1294 }
1295 if (!Diff)
1296 continue;
1297
1298 assert(Diff->getType()->isIntegerTy() &&
1299 "difference must be of integer type");
1300 Value *DiffV = expand(S: Diff);
1301 Value *BaseV = fixupLCSSAFormFor(V: &PN);
1302 if (PhiTy->isPointerTy()) {
1303 if (STy->isPointerTy())
1304 return Builder.CreatePtrAdd(Ptr: BaseV, Offset: DiffV);
1305 BaseV = Builder.CreatePtrToAddr(V: BaseV);
1306 }
1307 return Builder.CreateAdd(LHS: BaseV, RHS: DiffV);
1308 }
1309
1310 return nullptr;
1311}
1312
1313Value *SCEVExpander::visitAddRecExpr(SCEVUseT<const SCEVAddRecExpr *> S) {
1314 // In canonical mode we compute the addrec as an expression of a canonical IV
1315 // using evaluateAtIteration and expand the resulting SCEV expression. This
1316 // way we avoid introducing new IVs to carry on the computation of the addrec
1317 // throughout the loop.
1318 //
1319 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1320 // type wider than the addrec itself. Emitting a canonical IV of the
1321 // proper type might produce non-legal types, for example expanding an i64
1322 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1323 // back to non-canonical mode for nested addrecs.
1324 if (!CanonicalMode || (S->getNumOperands() > 2))
1325 return expandAddRecExprLiterally(S);
1326
1327 Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1328 const Loop *L = S->getLoop();
1329
1330 // First check for an existing canonical IV in a suitable type.
1331 PHINode *CanonicalIV = nullptr;
1332 if (PHINode *PN = L->getCanonicalInductionVariable())
1333 if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1334 CanonicalIV = PN;
1335
1336 // Rewrite an AddRec in terms of the canonical induction variable, if
1337 // its type is more narrow.
1338 if (CanonicalIV &&
1339 SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1340 !S->getType()->isPointerTy()) {
1341 SmallVector<SCEVUse, 4> NewOps(S->getNumOperands());
1342 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1343 NewOps[i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1344 Value *V = expand(
1345 S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(), Flags: S.getNoWrapFlags(Mask: SCEV::FlagNW)));
1346 BasicBlock::iterator NewInsertPt =
1347 findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1348 V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1349 return V;
1350 }
1351
1352 // If S is expanded outside the defining loop, check if there is a
1353 // matching LCSSA phi node for it.
1354 if (Value *V = tryToReuseLCSSAPhi(S))
1355 return V;
1356
1357 // {X,+,F} --> X + {0,+,F}
1358 if (!S->getStart()->isZero()) {
1359 if (isa<PointerType>(Val: S->getType())) {
1360 Value *StartV = expand(S: SE.getPointerBase(V: S));
1361 return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV,
1362 Flags: S.getNoWrapFlags(Mask: SCEV::FlagNUW));
1363 }
1364
1365 SmallVector<SCEVUse, 4> NewOps(S->operands());
1366 NewOps[0] = SE.getConstant(Ty, V: 0);
1367 const SCEV *Rest =
1368 SE.getAddRecExpr(Operands&: NewOps, L, Flags: S.getNoWrapFlags(Mask: SCEV::FlagNW));
1369
1370 // Just do a normal add. Pre-expand the operands to suppress folding.
1371 //
1372 // The LHS and RHS values are factored out of the expand call to make the
1373 // output independent of the argument evaluation order.
1374 const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1375 const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1376 return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1377 }
1378
1379 // If we don't yet have a canonical IV, create one.
1380 if (!CanonicalIV) {
1381 // Create and insert the PHI node for the induction variable in the
1382 // specified loop.
1383 BasicBlock *Header = L->getHeader();
1384 pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1385 CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1386 CanonicalIV->insertBefore(InsertPos: Header->begin());
1387 rememberInstruction(I: CanonicalIV);
1388
1389 SmallPtrSet<BasicBlock *, 4> PredSeen;
1390 Constant *One = ConstantInt::get(Ty, V: 1);
1391 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1392 BasicBlock *HP = *HPI;
1393 if (!PredSeen.insert(Ptr: HP).second) {
1394 // There must be an incoming value for each predecessor, even the
1395 // duplicates!
1396 CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1397 continue;
1398 }
1399
1400 if (L->contains(BB: HP)) {
1401 // Insert a unit add instruction right before the terminator
1402 // corresponding to the back-edge.
1403 Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1404 Name: "indvar.next",
1405 InsertBefore: HP->getTerminator()->getIterator());
1406 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1407 rememberInstruction(I: Add);
1408 CanonicalIV->addIncoming(V: Add, BB: HP);
1409 } else {
1410 CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1411 }
1412 }
1413 }
1414
1415 // {0,+,1} --> Insert a canonical induction variable into the loop!
1416 if (S->isAffine() && S->getOperand(i: 1)->isOne()) {
1417 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1418 "IVs with types different from the canonical IV should "
1419 "already have been handled!");
1420 return CanonicalIV;
1421 }
1422
1423 // {0,+,F} --> {0,+,1} * F
1424
1425 // If this is a simple linear addrec, emit it now as a special case.
1426 if (S->isAffine()) // {0,+,F} --> i*F
1427 return
1428 expand(S: SE.getTruncateOrNoop(
1429 V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1430 RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: 1),
1431 Ty: CanonicalIV->getType())),
1432 Ty));
1433
1434 // If this is a chain of recurrences, turn it into a closed form, using the
1435 // folders, then expandCodeFor the closed form. This allows the folders to
1436 // simplify the expression without having to build a bunch of special code
1437 // into this folder.
1438 const SCEV *IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.
1439
1440 // Promote S up to the canonical IV type, if the cast is foldable.
1441 const SCEV *NewS = S;
1442 const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1443 if (isa<SCEVAddRecExpr>(Val: Ext))
1444 NewS = Ext;
1445
1446 const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1447
1448 // Truncate the result down to the original type, if needed.
1449 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1450 return expand(S: T);
1451}
1452
1453Value *SCEVExpander::visitPtrToAddrExpr(SCEVUseT<const SCEVPtrToAddrExpr *> S) {
1454 Value *V = expand(S: S->getOperand());
1455 Type *Ty = S->getType();
1456
1457 // ptrtoaddr and ptrtoint produce the same value, so try to reuse either.
1458 if (!isa<Constant>(Val: V)) {
1459 BasicBlock::iterator BIP = Builder.GetInsertPoint();
1460 for (User *U : V->users()) {
1461 auto *CI = dyn_cast<CastInst>(Val: U);
1462 if (CI && CI->getType() == Ty &&
1463 (CI->getOpcode() == CastInst::PtrToAddr ||
1464 CI->getOpcode() == CastInst::PtrToInt) &&
1465 &*BIP != CI && SE.DT.dominates(Def: CI, User: &*BIP))
1466 return CI;
1467 }
1468 }
1469 return ReuseOrCreateCast(V, Ty, Op: CastInst::PtrToAddr,
1470 IP: GetOptimalInsertionPointForCastOf(V));
1471}
1472
1473Value *SCEVExpander::visitPtrToIntExpr(SCEVUseT<const SCEVPtrToIntExpr *> S) {
1474 Value *V = expand(S: S->getOperand());
1475 return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1476 IP: GetOptimalInsertionPointForCastOf(V));
1477}
1478
1479Value *SCEVExpander::visitTruncateExpr(SCEVUseT<const SCEVTruncateExpr *> S) {
1480 Value *V = expand(S: S->getOperand());
1481 return Builder.CreateTrunc(V, DestTy: S->getType());
1482}
1483
1484Value *
1485SCEVExpander::visitZeroExtendExpr(SCEVUseT<const SCEVZeroExtendExpr *> S) {
1486 Value *V = expand(S: S->getOperand());
1487 return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1488 IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1489}
1490
1491Value *
1492SCEVExpander::visitSignExtendExpr(SCEVUseT<const SCEVSignExtendExpr *> S) {
1493 Value *V = expand(S: S->getOperand());
1494 return Builder.CreateSExt(V, DestTy: S->getType());
1495}
1496
1497Value *SCEVExpander::expandMinMaxExpr(SCEVUseT<const SCEVNAryExpr *> S,
1498 Intrinsic::ID IntrinID, Twine Name,
1499 bool IsSequential) {
1500 bool PrevSafeMode = SafeUDivMode;
1501 SafeUDivMode |= IsSequential;
1502 Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - 1));
1503 Type *Ty = LHS->getType();
1504 if (IsSequential)
1505 LHS = Builder.CreateFreeze(V: LHS);
1506 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1507 SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
1508 Value *RHS = expand(S: S->getOperand(i));
1509 if (IsSequential && i != 0)
1510 RHS = Builder.CreateFreeze(V: RHS);
1511 Value *Sel;
1512 if (Ty->isIntegerTy())
1513 Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1514 /*FMFSource=*/nullptr, Name);
1515 else {
1516 Value *ICmp =
1517 Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1518 Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1519 }
1520 LHS = Sel;
1521 }
1522 SafeUDivMode = PrevSafeMode;
1523 return LHS;
1524}
1525
1526Value *SCEVExpander::visitSMaxExpr(SCEVUseT<const SCEVSMaxExpr *> S) {
1527 return expandMinMaxExpr(S, IntrinID: Intrinsic::smax, Name: "smax");
1528}
1529
1530Value *SCEVExpander::visitUMaxExpr(SCEVUseT<const SCEVUMaxExpr *> S) {
1531 return expandMinMaxExpr(S, IntrinID: Intrinsic::umax, Name: "umax");
1532}
1533
1534Value *SCEVExpander::visitSMinExpr(SCEVUseT<const SCEVSMinExpr *> S) {
1535 return expandMinMaxExpr(S, IntrinID: Intrinsic::smin, Name: "smin");
1536}
1537
1538Value *SCEVExpander::visitUMinExpr(SCEVUseT<const SCEVUMinExpr *> S) {
1539 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin");
1540}
1541
1542Value *SCEVExpander::visitSequentialUMinExpr(
1543 SCEVUseT<const SCEVSequentialUMinExpr *> S) {
1544 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin",
1545 /*IsSequential*/ true);
1546}
1547
1548Value *SCEVExpander::visitVScale(SCEVUseT<const SCEVVScale *> S) {
1549 return Builder.CreateVScale(Ty: S->getType());
1550}
1551
1552Value *SCEVExpander::expandCodeFor(SCEVUse SH, Type *Ty,
1553 BasicBlock::iterator IP) {
1554 setInsertPoint(IP);
1555 return expandCodeFor(SH, Ty);
1556}
1557
1558Value *SCEVExpander::expandCodeFor(SCEVUse SH, Type *Ty) {
1559 // Expand the code for this SCEV.
1560 Value *V = expand(S: SH);
1561
1562 if (Ty && Ty != V->getType()) {
1563 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1564 "non-trivial casts should be done with the SCEVs directly!");
1565 V = InsertNoopCastOfTo(V, Ty);
1566 }
1567 return V;
1568}
1569
1570Value *SCEVExpander::FindValueInExprValueMap(
1571 SCEVUse S, const Instruction *InsertPt,
1572 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1573 // If the expansion is not in CanonicalMode, and the SCEV contains any
1574 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1575 if (!CanonicalMode && SE.containsAddRecurrence(S))
1576 return nullptr;
1577
1578 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1579 if (isa<SCEVConstant>(Val: S) || isa<SCEVUnknown>(Val: S))
1580 return nullptr;
1581
1582 for (Value *V : SE.getSCEVValues(S)) {
1583 Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1584 if (!EntInst)
1585 continue;
1586
1587 // Choose a Value from the set which dominates the InsertPt.
1588 // InsertPt should be inside the Value's parent loop so as not to break
1589 // the LCSSA form.
1590 assert(EntInst->getFunction() == InsertPt->getFunction());
1591 if (S->getType() != V->getType() || !SE.DT.dominates(Def: EntInst, User: InsertPt) ||
1592 !(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr ||
1593 SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1594 continue;
1595
1596 // Make sure reusing the instruction is poison-safe.
1597 if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1598 return V;
1599 DropPoisonGeneratingInsts.clear();
1600 }
1601 return nullptr;
1602}
1603
1604// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1605// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1606// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1607// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1608// the expansion will try to reuse Value from ExprValueMap, and only when it
1609// fails, expand the SCEV literally.
1610Value *SCEVExpander::expand(SCEVUse S) {
1611 // Compute an insertion point for this SCEV object. Hoist the instructions
1612 // as far out in the loop nest as possible.
1613 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1614
1615 // We can move insertion point only if there is no div or rem operations
1616 // otherwise we are risky to move it over the check for zero denominator.
1617 auto SafeToHoist = [](const SCEV *S) {
1618 return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1619 if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1620 if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1621 // Division by non-zero constants can be hoisted.
1622 return SC->getValue()->isZero();
1623 // All other divisions should not be moved as they may be
1624 // divisions by zero and should be kept within the
1625 // conditions of the surrounding loops that guard their
1626 // execution (see PR35406).
1627 return true;
1628 }
1629 return false;
1630 });
1631 };
1632 if (SafeToHoist(S)) {
1633 for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1634 L = L->getParentLoop()) {
1635 if (SE.isLoopInvariant(S, L)) {
1636 if (!L) break;
1637 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1638 InsertPt = Preheader->getTerminator()->getIterator();
1639 } else {
1640 // LSR sets the insertion point for AddRec start/step values to the
1641 // block start to simplify value reuse, even though it's an invalid
1642 // position. SCEVExpander must correct for this in all cases.
1643 InsertPt = L->getHeader()->getFirstInsertionPt();
1644 }
1645 } else {
1646 // If the SCEV is computable at this level, insert it into the header
1647 // after the PHIs (and after any other instructions that we've inserted
1648 // there) so that it is guaranteed to dominate any user inside the loop.
1649 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1650 InsertPt = L->getHeader()->getFirstInsertionPt();
1651
1652 while (InsertPt != Builder.GetInsertPoint() &&
1653 (isInsertedInstruction(I: &*InsertPt))) {
1654 InsertPt = std::next(x: InsertPt);
1655 }
1656 break;
1657 }
1658 }
1659 }
1660
1661 // Check to see if we already expanded this here.
1662 auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1663 if (I != InsertedExpressions.end())
1664 return I->second;
1665
1666 SCEVInsertPointGuard Guard(Builder, this);
1667 Builder.SetInsertPoint(TheBB: InsertPt->getParent(), IP: InsertPt);
1668
1669 // Expand the expression into instructions.
1670 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1671 Value *V = FindValueInExprValueMap(S, InsertPt: &*InsertPt, DropPoisonGeneratingInsts);
1672 if (!V) {
1673 V = visit(S);
1674 V = fixupLCSSAFormFor(V);
1675 } else {
1676 for (Instruction *I : DropPoisonGeneratingInsts) {
1677 rememberFlags(I);
1678 I->dropPoisonGeneratingAnnotations();
1679 // See if we can re-infer from first principles any of the flags we just
1680 // dropped.
1681 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1682 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1683 auto *BO = cast<BinaryOperator>(Val: I);
1684 BO->setHasNoUnsignedWrap(
1685 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1686 BO->setHasNoSignedWrap(
1687 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1688 }
1689 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1690 auto *Src = NNI->getOperand(i_nocapture: 0);
1691 if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1692 RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1693 DL).value_or(u: false))
1694 NNI->setNonNeg(true);
1695 }
1696 }
1697 }
1698 // Remember the expanded value for this SCEV at this location.
1699 //
1700 // This is independent of PostIncLoops. The mapped value simply materializes
1701 // the expression at this insertion point. If the mapped value happened to be
1702 // a postinc expansion, it could be reused by a non-postinc user, but only if
1703 // its insertion point was already at the head of the loop.
1704 InsertedExpressions[std::make_pair(x&: S, y: &*InsertPt)] = V;
1705 return V;
1706}
1707
1708void SCEVExpander::rememberInstruction(Value *I) {
1709 auto DoInsert = [this](Value *V) {
1710 if (!PostIncLoops.empty())
1711 InsertedPostIncValues.insert(V);
1712 else
1713 InsertedValues.insert(V);
1714 };
1715 DoInsert(I);
1716}
1717
1718void SCEVExpander::rememberFlags(Instruction *I) {
1719 // If we already have flags for the instruction, keep the existing ones.
1720 OrigFlags.try_emplace(Key: I, Args: PoisonFlags(I));
1721}
1722
1723void SCEVExpander::replaceCongruentIVInc(
1724 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1725 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1726 BasicBlock *LatchBlock = L->getLoopLatch();
1727 if (!LatchBlock)
1728 return;
1729
1730 Instruction *OrigInc =
1731 dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1732 Instruction *IsomorphicInc =
1733 dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1734 if (!OrigInc || !IsomorphicInc)
1735 return;
1736
1737 // If this phi has the same width but is more canonical, replace the
1738 // original with it. As part of the "more canonical" determination,
1739 // respect a prior decision to use an IV chain.
1740 if (OrigPhi->getType() == Phi->getType()) {
1741 bool Chained = ChainedPhis.contains(V: Phi);
1742 if (!(Chained || isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1743 (Chained || isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1744 std::swap(a&: OrigPhi, b&: Phi);
1745 std::swap(a&: OrigInc, b&: IsomorphicInc);
1746 }
1747 }
1748
1749 // Replacing the congruent phi is sufficient because acyclic
1750 // redundancy elimination, CSE/GVN, should handle the
1751 // rest. However, once SCEV proves that a phi is congruent,
1752 // it's often the head of an IV user cycle that is isomorphic
1753 // with the original phi. It's worth eagerly cleaning up the
1754 // common case of a single IV increment so that DeleteDeadPHIs
1755 // can remove cycles that had postinc uses.
1756 // Because we may potentially introduce a new use of OrigIV that didn't
1757 // exist before at this point, its poison flags need readjustment.
1758 const SCEV *TruncExpr =
1759 SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1760 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(V: IsomorphicInc) ||
1761 !SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1762 return;
1763
1764 bool BothHaveNUW = false;
1765 bool BothHaveNSW = false;
1766 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1767 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1768 if (OBOIncV && OBOIsomorphic) {
1769 BothHaveNUW =
1770 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1771 BothHaveNSW =
1772 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1773 }
1774
1775 if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1776 /*RecomputePoisonFlags*/ true))
1777 return;
1778
1779 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1780 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1781 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1782 // make IsomorphicInc's uses more poisonous.
1783 assert(OrigInc->getType()->getScalarSizeInBits() >=
1784 IsomorphicInc->getType()->getScalarSizeInBits() &&
1785 "Should only replace an increment with a wider one.");
1786 if (BothHaveNUW || BothHaveNSW) {
1787 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1788 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1789 }
1790
1791 SCEV_DEBUG_WITH_TYPE(DebugType,
1792 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1793 << *IsomorphicInc << '\n');
1794 Value *NewInc = OrigInc;
1795 if (OrigInc->getType() != IsomorphicInc->getType()) {
1796 BasicBlock::iterator IP;
1797 if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1798 IP = PN->getParent()->getFirstInsertionPt();
1799 else
1800 IP = OrigInc->getNextNode()->getIterator();
1801
1802 IRBuilder<> Builder(IP->getParent(), IP);
1803 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1804 NewInc =
1805 Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1806 }
1807 IsomorphicInc->replaceAllUsesWith(V: NewInc);
1808 DeadInsts.emplace_back(Args&: IsomorphicInc);
1809}
1810
1811/// replaceCongruentIVs - Check for congruent phis in this loop header and
1812/// replace them with their most canonical representative. Return the number of
1813/// phis eliminated.
1814///
1815/// This does not depend on any SCEVExpander state but should be used in
1816/// the same context that SCEVExpander is used.
1817unsigned
1818SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
1819 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1820 const TargetTransformInfo *TTI) {
1821 // Find integer phis in order of increasing width.
1822 SmallVector<PHINode *, 8> Phis(
1823 llvm::make_pointer_range(Range: L->getHeader()->phis()));
1824
1825 if (TTI)
1826 // Use stable_sort to preserve order of equivalent PHIs, so the order
1827 // of the sorted Phis is the same from run to run on the same loop.
1828 llvm::stable_sort(Range&: Phis, C: [](Value *LHS, Value *RHS) {
1829 // Put pointers at the back and make sure pointer < pointer = false.
1830 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1831 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1832 return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1833 LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1834 });
1835
1836 unsigned NumElim = 0;
1837 DenseMap<const SCEV *, PHINode *> ExprToIVMap;
1838 // Process phis from wide to narrow. Map wide phis to their truncation
1839 // so narrow phis can reuse them.
1840 for (PHINode *Phi : Phis) {
1841 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1842 if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1843 return V;
1844 if (!SE.isSCEVable(Ty: PN->getType()))
1845 return nullptr;
1846 auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1847 if (!Const)
1848 return nullptr;
1849 return Const->getValue();
1850 };
1851
1852 // Fold constant phis. They may be congruent to other constant phis and
1853 // would confuse the logic below that expects proper IVs.
1854 if (Value *V = SimplifyPHINode(Phi)) {
1855 if (V->getType() != Phi->getType())
1856 continue;
1857 SE.forgetValue(V: Phi);
1858 Phi->replaceAllUsesWith(V);
1859 DeadInsts.emplace_back(Args&: Phi);
1860 ++NumElim;
1861 SCEV_DEBUG_WITH_TYPE(DebugType,
1862 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1863 << '\n');
1864 continue;
1865 }
1866
1867 if (!SE.isSCEVable(Ty: Phi->getType()))
1868 continue;
1869
1870 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(V: Phi)];
1871 if (!OrigPhiRef) {
1872 OrigPhiRef = Phi;
1873 if (Phi->getType()->isIntegerTy() && TTI &&
1874 TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1875 // Make sure we only rewrite using simple induction variables;
1876 // otherwise, we can make the trip count of a loop unanalyzable
1877 // to SCEV.
1878 const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1879 if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1880 // This phi can be freely truncated to the narrowest phi type. Map the
1881 // truncated expression to it so it will be reused for narrow types.
1882 const SCEV *TruncExpr =
1883 SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1884 ExprToIVMap[TruncExpr] = Phi;
1885 }
1886 }
1887 continue;
1888 }
1889
1890 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1891 // sense.
1892 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1893 continue;
1894
1895 replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1896 SCEV_DEBUG_WITH_TYPE(DebugType,
1897 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1898 << '\n');
1899 SCEV_DEBUG_WITH_TYPE(
1900 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1901 ++NumElim;
1902 Value *NewIV = OrigPhiRef;
1903 if (OrigPhiRef->getType() != Phi->getType()) {
1904 IRBuilder<> Builder(L->getHeader(),
1905 L->getHeader()->getFirstInsertionPt());
1906 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1907 NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1908 }
1909 Phi->replaceAllUsesWith(V: NewIV);
1910 DeadInsts.emplace_back(Args&: Phi);
1911 }
1912 return NumElim;
1913}
1914
1915bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1916 const Instruction *At,
1917 Loop *L) {
1918 using namespace llvm::PatternMatch;
1919
1920 SmallVector<BasicBlock *, 4> ExitingBlocks;
1921 L->getExitingBlocks(ExitingBlocks);
1922
1923 // Look for suitable value in simple conditions at the loop exits.
1924 for (BasicBlock *BB : ExitingBlocks) {
1925 CmpPredicate Pred;
1926 Instruction *LHS, *RHS;
1927
1928 if (!match(V: BB->getTerminator(),
1929 P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1930 T: m_BasicBlock(), F: m_BasicBlock())))
1931 continue;
1932
1933 if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1934 return true;
1935
1936 if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1937 return true;
1938 }
1939
1940 // Use expand's logic which is used for reusing a previous Value in
1941 // ExprValueMap. Note that we don't currently model the cost of
1942 // needing to drop poison generating flags on the instruction if we
1943 // want to reuse it. We effectively assume that has zero cost.
1944 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1945 return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1946}
1947
1948template<typename T> static InstructionCost costAndCollectOperands(
1949 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1950 TargetTransformInfo::TargetCostKind CostKind,
1951 SmallVectorImpl<SCEVOperand> &Worklist) {
1952
1953 const T *S = cast<T>(WorkItem.S);
1954 InstructionCost Cost = 0;
1955 // Object to help map SCEV operands to expanded IR instructions.
1956 struct OperationIndices {
1957 OperationIndices(unsigned Opc, size_t min, size_t max) :
1958 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1959 unsigned Opcode;
1960 size_t MinIdx;
1961 size_t MaxIdx;
1962 };
1963
1964 // Collect the operations of all the instructions that will be needed to
1965 // expand the SCEVExpr. This is so that when we come to cost the operands,
1966 // we know what the generated user(s) will be.
1967 SmallVector<OperationIndices, 2> Operations;
1968
1969 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1970 Operations.emplace_back(Opcode, 0, 0);
1971 return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1972 Src: S->getOperand(0)->getType(),
1973 CCH: TTI::CastContextHint::None, CostKind);
1974 };
1975
1976 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1977 unsigned MinIdx = 0,
1978 unsigned MaxIdx = 1) -> InstructionCost {
1979 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1980 return NumRequired *
1981 TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1982 };
1983
1984 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1985 unsigned MaxIdx) -> InstructionCost {
1986 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1987 Type *OpType = S->getType();
1988 return NumRequired * TTI.getCmpSelInstrCost(
1989 Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1990 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1991 };
1992
1993 switch (S->getSCEVType()) {
1994 case scCouldNotCompute:
1995 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1996 case scUnknown:
1997 case scConstant:
1998 case scVScale:
1999 return 0;
2000 case scPtrToAddr:
2001 Cost = CastCost(Instruction::PtrToAddr);
2002 break;
2003 case scPtrToInt:
2004 Cost = CastCost(Instruction::PtrToInt);
2005 break;
2006 case scTruncate:
2007 Cost = CastCost(Instruction::Trunc);
2008 break;
2009 case scZeroExtend:
2010 Cost = CastCost(Instruction::ZExt);
2011 break;
2012 case scSignExtend:
2013 Cost = CastCost(Instruction::SExt);
2014 break;
2015 case scUDivExpr: {
2016 unsigned Opcode = Instruction::UDiv;
2017 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
2018 if (SC->getAPInt().isPowerOf2())
2019 Opcode = Instruction::LShr;
2020 Cost = ArithCost(Opcode, 1);
2021 break;
2022 }
2023 case scAddExpr:
2024 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
2025 break;
2026 case scMulExpr:
2027 // TODO: this is a very pessimistic cost modelling for Mul,
2028 // because of Bin Pow algorithm actually used by the expander,
2029 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
2030 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
2031 break;
2032 case scSMaxExpr:
2033 case scUMaxExpr:
2034 case scSMinExpr:
2035 case scUMinExpr:
2036 case scSequentialUMinExpr: {
2037 // FIXME: should this ask the cost for Intrinsic's?
2038 // The reduction tree.
2039 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
2040 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
2041 switch (S->getSCEVType()) {
2042 case scSequentialUMinExpr: {
2043 // The safety net against poison.
2044 // FIXME: this is broken.
2045 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
2046 Cost += ArithCost(Instruction::Or,
2047 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
2048 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
2049 break;
2050 }
2051 default:
2052 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
2053 "Unhandled SCEV expression type?");
2054 break;
2055 }
2056 break;
2057 }
2058 case scAddRecExpr: {
2059 // Addrec expands to a phi and add per recurrence.
2060 unsigned NumRecurrences = S->getNumOperands() - 1;
2061 Cost += TTI.getCFInstrCost(Opcode: Instruction::PHI, CostKind) * NumRecurrences;
2062 Cost +=
2063 TTI.getArithmeticInstrCost(Opcode: Instruction::Add, Ty: S->getType(), CostKind) *
2064 NumRecurrences;
2065 // AR start is used in phi.
2066 Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
2067 // Other operands are used in add.
2068 for (const SCEV *Op : S->operands().drop_front())
2069 Worklist.emplace_back(Args: Instruction::Add, Args: 1, Args&: Op);
2070 break;
2071 }
2072 }
2073
2074 for (auto &CostOp : Operations) {
2075 for (auto SCEVOp : enumerate(S->operands())) {
2076 // Clamp the index to account for multiple IR operations being chained.
2077 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
2078 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
2079 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
2080 }
2081 }
2082 return Cost;
2083}
2084
2085bool SCEVExpander::isHighCostExpansionHelper(
2086 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
2087 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
2088 SmallPtrSetImpl<const SCEV *> &Processed,
2089 SmallVectorImpl<SCEVOperand> &Worklist) {
2090 if (Cost > Budget)
2091 return true; // Already run out of budget, give up.
2092
2093 const SCEV *S = WorkItem.S;
2094 // Was the cost of expansion of this expression already accounted for?
2095 if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
2096 return false; // We have already accounted for this expression.
2097
2098 // If we can find an existing value for this scev available at the point "At"
2099 // then consider the expression cheap.
2100 if (hasRelatedExistingExpansion(S, At: &At, L))
2101 return false; // Consider the expression to be free.
2102
2103 TargetTransformInfo::TargetCostKind CostKind =
2104 L->getHeader()->getParent()->hasMinSize()
2105 ? TargetTransformInfo::TCK_CodeSize
2106 : TargetTransformInfo::TCK_RecipThroughput;
2107
2108 switch (S->getSCEVType()) {
2109 case scCouldNotCompute:
2110 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2111 case scUnknown:
2112 case scVScale:
2113 // Assume to be zero-cost.
2114 return false;
2115 case scConstant: {
2116 // Only evalulate the costs of constants when optimizing for size.
2117 if (CostKind != TargetTransformInfo::TCK_CodeSize)
2118 return false;
2119 const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
2120 Type *Ty = S->getType();
2121 Cost += TTI.getIntImmCostInst(
2122 Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
2123 return Cost > Budget;
2124 }
2125 case scTruncate:
2126 case scPtrToAddr:
2127 case scPtrToInt:
2128 case scZeroExtend:
2129 case scSignExtend: {
2130 Cost +=
2131 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2132 return false; // Will answer upon next entry into this function.
2133 }
2134 case scUDivExpr: {
2135 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2136 // HowManyLessThans produced to compute a precise expression, rather than a
2137 // UDiv from the user's code. If we can't find a UDiv in the code with some
2138 // simple searching, we need to account for it's cost.
2139
2140 // At the beginning of this function we already tried to find existing
2141 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2142 // pattern involving division. This is just a simple search heuristic.
2143 if (hasRelatedExistingExpansion(
2144 S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: 1)), At: &At, L))
2145 return false; // Consider it to be free.
2146
2147 Cost +=
2148 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2149 return false; // Will answer upon next entry into this function.
2150 }
2151 case scAddExpr:
2152 case scMulExpr:
2153 case scUMaxExpr:
2154 case scSMaxExpr:
2155 case scUMinExpr:
2156 case scSMinExpr:
2157 case scSequentialUMinExpr: {
2158 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2159 "Nary expr should have more than 1 operand.");
2160 // The simple nary expr will require one less op (or pair of ops)
2161 // than the number of it's terms.
2162 Cost +=
2163 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2164 return Cost > Budget;
2165 }
2166 case scAddRecExpr: {
2167 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2168 "Polynomial should be at least linear");
2169 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2170 WorkItem, TTI, CostKind, Worklist);
2171 return Cost > Budget;
2172 }
2173 }
2174 llvm_unreachable("Unknown SCEV kind!");
2175}
2176
2177Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
2178 Instruction *IP) {
2179 assert(IP);
2180 switch (Pred->getKind()) {
2181 case SCEVPredicate::P_Union:
2182 return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2183 case SCEVPredicate::P_Compare:
2184 return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2185 case SCEVPredicate::P_Wrap: {
2186 auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2187 return expandWrapPredicate(P: AddRecPred, Loc: IP);
2188 }
2189 }
2190 llvm_unreachable("Unknown SCEV predicate type");
2191}
2192
2193Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
2194 Instruction *IP) {
2195 Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2196 Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2197
2198 Builder.SetInsertPoint(IP);
2199 auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2200 auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2201 return I;
2202}
2203
2204Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
2205 Instruction *Loc, bool Signed) {
2206 assert(AR->isAffine() && "Cannot generate RT check for "
2207 "non-affine expression");
2208
2209 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2210 SmallVector<const SCEVPredicate *, 4> Pred;
2211 const SCEV *ExitCount =
2212 SE.getPredicatedSymbolicMaxBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2213
2214 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2215
2216 const SCEV *Step = AR->getStepRecurrence(SE);
2217 const SCEV *Start = AR->getStart();
2218
2219 Type *ARTy = AR->getType();
2220 unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2221 unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2222
2223 // The expression {Start,+,Step} has nusw/nssw if
2224 // Step < 0, Start - |Step| * Backedge <= Start
2225 // Step >= 0, Start + |Step| * Backedge > Start
2226 // and |Step| * Backedge doesn't unsigned overflow.
2227
2228 Builder.SetInsertPoint(Loc);
2229 Value *TripCountVal = expand(S: ExitCount, I: Loc);
2230
2231 IntegerType *Ty =
2232 IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2233
2234 Value *StepValue = expand(S: Step, I: Loc);
2235 Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2236 Value *StartValue = expand(S: Start, I: Loc);
2237
2238 ConstantInt *Zero =
2239 ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2240
2241 Builder.SetInsertPoint(Loc);
2242 // Compute |Step|
2243 Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2244 Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2245
2246 // Compute |Step| * Backedge
2247 // Compute:
2248 // 1. Start + |Step| * Backedge < Start
2249 // 2. Start - |Step| * Backedge > Start
2250 //
2251 // And select either 1. or 2. depending on whether step is positive or
2252 // negative. If Step is known to be positive or negative, only create
2253 // either 1. or 2.
2254 auto ComputeEndCheck = [&]() -> Value * {
2255 // Get the backedge taken count and truncate or extended to the AR type.
2256 Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2257
2258 CallInst *Mul = Builder.CreateIntrinsic(ID: Intrinsic::umul_with_overflow, Types: Ty,
2259 Args: {AbsStep, TruncTripCount},
2260 /*FMFSource=*/nullptr, Name: "mul");
2261 Value *MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: 0, Name: "mul.result");
2262 Value *OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: 1, Name: "mul.overflow");
2263
2264 Value *Add = nullptr, *Sub = nullptr;
2265 bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2266 bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2267
2268 if (isa<PointerType>(Val: ARTy)) {
2269 Value *NegMulV = Builder.CreateNeg(V: MulV);
2270 if (NeedPosCheck)
2271 Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2272 if (NeedNegCheck)
2273 Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2274 } else {
2275 if (NeedPosCheck)
2276 Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2277 if (NeedNegCheck)
2278 Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2279 }
2280
2281 Value *EndCompareLT = nullptr;
2282 Value *EndCompareGT = nullptr;
2283 Value *EndCheck = nullptr;
2284 if (NeedPosCheck)
2285 EndCheck = EndCompareLT = Builder.CreateICmp(
2286 P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2287 if (NeedNegCheck)
2288 EndCheck = EndCompareGT = Builder.CreateICmp(
2289 P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2290 if (NeedPosCheck && NeedNegCheck) {
2291 // Select the answer based on the sign of Step.
2292 EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2293 }
2294 return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2295 };
2296 Value *EndCheck = ComputeEndCheck();
2297
2298 // If the backedge taken count type is larger than the AR type,
2299 // check that we don't drop any bits by truncating it. If we are
2300 // dropping bits, then we have overflow (unless the step is zero).
2301 if (SrcBits > DstBits) {
2302 auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2303 auto *BackedgeCheck =
2304 Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2305 RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2306 BackedgeCheck = Builder.CreateAnd(
2307 LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2308
2309 EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2310 }
2311
2312 return EndCheck;
2313}
2314
2315Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
2316 Instruction *IP) {
2317 const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2318 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2319
2320 // Add a check for NUSW
2321 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2322 NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2323
2324 // Add a check for NSSW
2325 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2326 NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2327
2328 if (NUSWCheck && NSSWCheck)
2329 return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2330
2331 if (NUSWCheck)
2332 return NUSWCheck;
2333
2334 if (NSSWCheck)
2335 return NSSWCheck;
2336
2337 return ConstantInt::getFalse(Context&: IP->getContext());
2338}
2339
2340Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
2341 Instruction *IP) {
2342 // Loop over all checks in this set.
2343 SmallVector<Value *> Checks;
2344 for (const auto *Pred : Union->getPredicates()) {
2345 Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2346 Builder.SetInsertPoint(IP);
2347 }
2348
2349 if (Checks.empty())
2350 return ConstantInt::getFalse(Context&: IP->getContext());
2351 return Builder.CreateOr(Ops: Checks);
2352}
2353
2354Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2355 auto *DefI = dyn_cast<Instruction>(Val: V);
2356 if (!PreserveLCSSA || !DefI)
2357 return V;
2358
2359 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2360 Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2361 Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt->getParent());
2362 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(L: UseLoop))
2363 return V;
2364
2365 // Create a temporary instruction to at the current insertion point, so we
2366 // can hand it off to the helper to create LCSSA PHIs if required for the
2367 // new use.
2368 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2369 // would accept a insertion point and return an LCSSA phi for that
2370 // insertion point, so there is no need to insert & remove the temporary
2371 // instruction.
2372 Type *ToTy;
2373 if (DefI->getType()->isIntegerTy())
2374 ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: 0);
2375 else
2376 ToTy = Type::getInt32Ty(C&: DefI->getContext());
2377 Instruction *User =
2378 CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2379 llvm::scope_exit RemoveUserOnExit([User]() { User->eraseFromParent(); });
2380
2381 SmallVector<Instruction *, 1> ToUpdate;
2382 ToUpdate.push_back(Elt: DefI);
2383 SmallVector<PHINode *, 16> PHIsToRemove;
2384 SmallVector<PHINode *, 16> InsertedPHIs;
2385 formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2386 InsertedPHIs: &InsertedPHIs);
2387 for (PHINode *PN : InsertedPHIs)
2388 rememberInstruction(I: PN);
2389 for (PHINode *PN : PHIsToRemove) {
2390 if (!PN->use_empty())
2391 continue;
2392 InsertedValues.erase(V: PN);
2393 InsertedPostIncValues.erase(V: PN);
2394 PN->eraseFromParent();
2395 }
2396
2397 return User->getOperand(i: 0);
2398}
2399
2400namespace {
2401// Search for a SCEV subexpression that is not safe to expand. Any expression
2402// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2403// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2404// instruction, but the important thing is that we prove the denominator is
2405// nonzero before expansion.
2406//
2407// IVUsers already checks that IV-derived expressions are safe. So this check is
2408// only needed when the expression includes some subexpression that is not IV
2409// derived.
2410//
2411// Currently, we only allow division by a value provably non-zero here.
2412//
2413// We cannot generally expand recurrences unless the step dominates the loop
2414// header. The expander handles the special case of affine recurrences by
2415// scaling the recurrence outside the loop, but this technique isn't generally
2416// applicable. Expanding a nested recurrence outside a loop requires computing
2417// binomial coefficients. This could be done, but the recurrence has to be in a
2418// perfectly reduced form, which can't be guaranteed.
2419struct SCEVFindUnsafe {
2420 ScalarEvolution &SE;
2421 bool CanonicalMode;
2422 bool IsUnsafe = false;
2423
2424 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2425 : SE(SE), CanonicalMode(CanonicalMode) {}
2426
2427 bool follow(const SCEV *S) {
2428 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2429 if (!SE.isKnownNonZero(S: D->getRHS())) {
2430 IsUnsafe = true;
2431 return false;
2432 }
2433 }
2434 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2435 // For non-affine addrecs or in non-canonical mode we need a preheader
2436 // to insert into.
2437 if (!AR->getLoop()->getLoopPreheader() &&
2438 (!CanonicalMode || !AR->isAffine())) {
2439 IsUnsafe = true;
2440 return false;
2441 }
2442 }
2443 return true;
2444 }
2445 bool isDone() const { return IsUnsafe; }
2446};
2447} // namespace
2448
2449bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
2450 SCEVFindUnsafe Search(SE, CanonicalMode);
2451 visitAll(Root: S, Visitor&: Search);
2452 return !Search.IsUnsafe;
2453}
2454
2455bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2456 const Instruction *InsertionPoint) const {
2457 if (!isSafeToExpand(S))
2458 return false;
2459 // We have to prove that the expanded site of S dominates InsertionPoint.
2460 // This is easy when not in the same block, but hard when S is an instruction
2461 // to be expanded somewhere inside the same block as our insertion point.
2462 // What we really need here is something analogous to an OrderedBasicBlock,
2463 // but for the moment, we paper over the problem by handling two common and
2464 // cheap to check cases.
2465 if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2466 return true;
2467 if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2468 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2469 return true;
2470 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2471 if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2472 return true;
2473 }
2474 return false;
2475}
2476
2477void SCEVExpanderCleaner::cleanup() {
2478 // Result is used, nothing to remove.
2479 if (ResultUsed)
2480 return;
2481
2482 // Restore original poison flags.
2483 for (auto [I, Flags] : Expander.OrigFlags)
2484 Flags.apply(I);
2485
2486 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2487#ifndef NDEBUG
2488 SmallPtrSet<Instruction *, 8> InsertedSet(llvm::from_range,
2489 InsertedInstructions);
2490 (void)InsertedSet;
2491#endif
2492 // Remove sets with value handles.
2493 Expander.clear();
2494
2495 // Remove all inserted instructions.
2496 for (Instruction *I : reverse(C&: InsertedInstructions)) {
2497#ifndef NDEBUG
2498 assert(all_of(I->users(),
2499 [&InsertedSet](Value *U) {
2500 return InsertedSet.contains(cast<Instruction>(U));
2501 }) &&
2502 "removed instruction should only be used by instructions inserted "
2503 "during expansion");
2504#endif
2505 assert(!I->getType()->isVoidTy() &&
2506 "inserted instruction should have non-void types");
2507 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2508 I->eraseFromParent();
2509 }
2510}
2511