1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Transforms/Utils/LoopUtils.h"
31
32#if LLVM_ENABLE_ABI_BREAKING_CHECKS
33#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
34#else
35#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
36#endif
37
38using namespace llvm;
39
40cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
41 "scev-cheap-expansion-budget", cl::Hidden, cl::init(Val: 4),
42 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
43 "controls the budget that is considered cheap (default = 4)"));
44
45using namespace PatternMatch;
46using namespace SCEVPatternMatch;
47
48PoisonFlags::PoisonFlags(const Instruction *I) {
49 NUW = false;
50 NSW = false;
51 Exact = false;
52 Disjoint = false;
53 NNeg = false;
54 SameSign = false;
55 GEPNW = GEPNoWrapFlags::none();
56 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I)) {
57 NUW = OBO->hasNoUnsignedWrap();
58 NSW = OBO->hasNoSignedWrap();
59 }
60 if (auto *PEO = dyn_cast<PossiblyExactOperator>(Val: I))
61 Exact = PEO->isExact();
62 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
63 Disjoint = PDI->isDisjoint();
64 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
65 NNeg = PNI->hasNonNeg();
66 if (auto *TI = dyn_cast<TruncInst>(Val: I)) {
67 NUW = TI->hasNoUnsignedWrap();
68 NSW = TI->hasNoSignedWrap();
69 }
70 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
71 GEPNW = GEP->getNoWrapFlags();
72 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
73 SameSign = ICmp->hasSameSign();
74}
75
76void PoisonFlags::apply(Instruction *I) {
77 if (isa<OverflowingBinaryOperator>(Val: I)) {
78 I->setHasNoUnsignedWrap(NUW);
79 I->setHasNoSignedWrap(NSW);
80 }
81 if (isa<PossiblyExactOperator>(Val: I))
82 I->setIsExact(Exact);
83 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: I))
84 PDI->setIsDisjoint(Disjoint);
85 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: I))
86 PNI->setNonNeg(NNeg);
87 if (isa<TruncInst>(Val: I)) {
88 I->setHasNoUnsignedWrap(NUW);
89 I->setHasNoSignedWrap(NSW);
90 }
91 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: I))
92 GEP->setNoWrapFlags(GEPNW);
93 if (auto *ICmp = dyn_cast<ICmpInst>(Val: I))
94 ICmp->setSameSign(SameSign);
95}
96
97/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
98/// reusing an existing cast if a suitable one (= dominating IP) exists, or
99/// creating a new one.
100Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
101 Instruction::CastOps Op,
102 BasicBlock::iterator IP) {
103 // This function must be called with the builder having a valid insertion
104 // point. It doesn't need to be the actual IP where the uses of the returned
105 // cast will be added, but it must dominate such IP.
106 // We use this precondition to produce a cast that will dominate all its
107 // uses. In particular, this is crucial for the case where the builder's
108 // insertion point *is* the point where we were asked to put the cast.
109 // Since we don't know the builder's insertion point is actually
110 // where the uses will be added (only that it dominates it), we are
111 // not allowed to move it.
112 BasicBlock::iterator BIP = Builder.GetInsertPoint();
113
114 Value *Ret = nullptr;
115
116 if (!isa<Constant>(Val: V)) {
117 // Check to see if there is already a cast!
118 for (User *U : V->users()) {
119 if (U->getType() != Ty)
120 continue;
121 CastInst *CI = dyn_cast<CastInst>(Val: U);
122 if (!CI || CI->getOpcode() != Op)
123 continue;
124
125 // Found a suitable cast that is at IP or comes before IP. Use it. Note
126 // that the cast must also properly dominate the Builder's insertion
127 // point.
128 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
129 (&*IP == CI || CI->comesBefore(Other: &*IP))) {
130 Ret = CI;
131 break;
132 }
133 }
134 }
135
136 // Create a new cast.
137 if (!Ret) {
138 SCEVInsertPointGuard Guard(Builder, this);
139 Builder.SetInsertPoint(&*IP);
140 Ret = Builder.CreateCast(Op, V, DestTy: Ty, Name: V->getName());
141 }
142
143 // We assert at the end of the function since IP might point to an
144 // instruction with different dominance properties than a cast
145 // (an invoke for example) and not dominate BIP (but the cast does).
146 assert(!isa<Instruction>(Ret) ||
147 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
148
149 return Ret;
150}
151
152BasicBlock::iterator
153SCEVExpander::findInsertPointAfter(Instruction *I,
154 Instruction *MustDominate) const {
155 BasicBlock::iterator IP = ++I->getIterator();
156 if (auto *II = dyn_cast<InvokeInst>(Val: I))
157 IP = II->getNormalDest()->begin();
158
159 while (isa<PHINode>(Val: IP))
160 ++IP;
161
162 if (isa<FuncletPadInst>(Val: IP) || isa<LandingPadInst>(Val: IP)) {
163 ++IP;
164 } else if (isa<CatchSwitchInst>(Val: IP)) {
165 IP = MustDominate->getParent()->getFirstInsertionPt();
166 } else {
167 assert(!IP->isEHPad() && "unexpected eh pad!");
168 }
169
170 // Adjust insert point to be after instructions inserted by the expander, so
171 // we can re-use already inserted instructions. Avoid skipping past the
172 // original \p MustDominate, in case it is an inserted instruction.
173 while (isInsertedInstruction(I: &*IP) && &*IP != MustDominate)
174 ++IP;
175
176 return IP;
177}
178
179void SCEVExpander::eraseDeadInstructions(Value *Root) {
180 SmallVector<Value *> WorkList;
181 SmallPtrSet<Value *, 8> DeletedValues;
182 append_range(C&: WorkList, R: getAllInsertedInstructions());
183 while (!WorkList.empty()) {
184 Value *V = WorkList.pop_back_val();
185 if (DeletedValues.contains(Ptr: V))
186 continue;
187 auto *I = dyn_cast<Instruction>(Val: V);
188 if (!I || I == Root || !isInsertedInstruction(I) ||
189 !isInstructionTriviallyDead(I))
190 continue;
191 append_range(C&: WorkList, R: I->operands());
192 InsertedValues.erase(V: I);
193 InsertedPostIncValues.erase(V: I);
194 DeletedValues.insert(Ptr: I);
195 I->eraseFromParent();
196 }
197}
198
199BasicBlock::iterator
200SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
201 // Cast the argument at the beginning of the entry block, after
202 // any bitcasts of other arguments.
203 if (Argument *A = dyn_cast<Argument>(Val: V)) {
204 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
205 while ((isa<BitCastInst>(Val: IP) &&
206 isa<Argument>(Val: cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0)) &&
207 cast<BitCastInst>(Val&: IP)->getOperand(i_nocapture: 0) != A))
208 ++IP;
209 return IP;
210 }
211
212 // Cast the instruction immediately after the instruction.
213 if (Instruction *I = dyn_cast<Instruction>(Val: V))
214 return findInsertPointAfter(I, MustDominate: &*Builder.GetInsertPoint());
215
216 // Otherwise, this must be some kind of a constant,
217 // so let's plop this cast into the function's entry block.
218 assert(isa<Constant>(V) &&
219 "Expected the cast argument to be a global/constant");
220 return Builder.GetInsertBlock()
221 ->getParent()
222 ->getEntryBlock()
223 .getFirstInsertionPt();
224}
225
226/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
227/// which must be possible with a noop cast, doing what we can to share
228/// the casts.
229Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
230 Instruction::CastOps Op = CastInst::getCastOpcode(Val: V, SrcIsSigned: false, Ty, DstIsSigned: false);
231 assert((Op == Instruction::BitCast ||
232 Op == Instruction::PtrToInt ||
233 Op == Instruction::IntToPtr) &&
234 "InsertNoopCastOfTo cannot perform non-noop casts!");
235 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
236 "InsertNoopCastOfTo cannot change sizes!");
237
238 // inttoptr only works for integral pointers. For non-integral pointers, we
239 // can create a GEP on null with the integral value as index. Note that
240 // it is safe to use GEP of null instead of inttoptr here, because only
241 // expressions already based on a GEP of null should be converted to pointers
242 // during expansion.
243 if (Op == Instruction::IntToPtr) {
244 auto *PtrTy = cast<PointerType>(Val: Ty);
245 if (DL.isNonIntegralPointerType(PT: PtrTy))
246 return Builder.CreatePtrAdd(Ptr: Constant::getNullValue(Ty: PtrTy), Offset: V, Name: "scevgep");
247 }
248 // Short-circuit unnecessary bitcasts.
249 if (Op == Instruction::BitCast) {
250 if (V->getType() == Ty)
251 return V;
252 if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
253 if (CI->getOperand(i_nocapture: 0)->getType() == Ty)
254 return CI->getOperand(i_nocapture: 0);
255 }
256 }
257 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
258 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
259 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(Ty: V->getType())) {
260 if (CastInst *CI = dyn_cast<CastInst>(Val: V))
261 if ((CI->getOpcode() == Instruction::PtrToInt ||
262 CI->getOpcode() == Instruction::IntToPtr) &&
263 SE.getTypeSizeInBits(Ty: CI->getType()) ==
264 SE.getTypeSizeInBits(Ty: CI->getOperand(i_nocapture: 0)->getType()))
265 return CI->getOperand(i_nocapture: 0);
266 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
267 if ((CE->getOpcode() == Instruction::PtrToInt ||
268 CE->getOpcode() == Instruction::IntToPtr) &&
269 SE.getTypeSizeInBits(Ty: CE->getType()) ==
270 SE.getTypeSizeInBits(Ty: CE->getOperand(i_nocapture: 0)->getType()))
271 return CE->getOperand(i_nocapture: 0);
272 }
273
274 // Fold a cast of a constant.
275 if (Constant *C = dyn_cast<Constant>(Val: V))
276 return ConstantExpr::getCast(ops: Op, C, Ty);
277
278 // Try to reuse existing cast, or insert one.
279 return ReuseOrCreateCast(V, Ty, Op, IP: GetOptimalInsertionPointForCastOf(V));
280}
281
282/// InsertBinop - Insert the specified binary operator, doing a small amount
283/// of work to avoid inserting an obviously redundant operation, and hoisting
284/// to an outer loop when the opportunity is there and it is safe.
285Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
286 Value *LHS, Value *RHS,
287 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
288 // Fold a binop with constant operands.
289 if (Constant *CLHS = dyn_cast<Constant>(Val: LHS))
290 if (Constant *CRHS = dyn_cast<Constant>(Val: RHS))
291 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: CLHS, RHS: CRHS, DL))
292 return Res;
293
294 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
295 unsigned ScanLimit = 6;
296 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
297 // Scanning starts from the last instruction before the insertion point.
298 BasicBlock::iterator IP = Builder.GetInsertPoint();
299 if (IP != BlockBegin) {
300 --IP;
301 for (; ScanLimit; --IP, --ScanLimit) {
302 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
303 // Ensure that no-wrap flags match.
304 if (isa<OverflowingBinaryOperator>(Val: I)) {
305 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
306 return true;
307 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
308 return true;
309 }
310 // Conservatively, do not use any instruction which has any of exact
311 // flags installed.
312 if (isa<PossiblyExactOperator>(Val: I) && I->isExact())
313 return true;
314 return false;
315 };
316 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(i: 0) == LHS &&
317 IP->getOperand(i: 1) == RHS && !canGenerateIncompatiblePoison(&*IP))
318 return &*IP;
319 if (IP == BlockBegin) break;
320 }
321 }
322
323 // Save the original insertion point so we can restore it when we're done.
324 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
325 SCEVInsertPointGuard Guard(Builder, this);
326
327 if (IsSafeToHoist) {
328 // Move the insertion point out of as many loops as we can.
329 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
330 if (!L->isLoopInvariant(V: LHS) || !L->isLoopInvariant(V: RHS)) break;
331 BasicBlock *Preheader = L->getLoopPreheader();
332 if (!Preheader) break;
333
334 // Ok, move up a level.
335 Builder.SetInsertPoint(Preheader->getTerminator());
336 }
337 }
338
339 // If we haven't found this binop, insert it.
340 // TODO: Use the Builder, which will make CreateBinOp below fold with
341 // InstSimplifyFolder.
342 Instruction *BO = Builder.Insert(I: BinaryOperator::Create(Op: Opcode, S1: LHS, S2: RHS));
343 BO->setDebugLoc(Loc);
344 if (Flags & SCEV::FlagNUW)
345 BO->setHasNoUnsignedWrap();
346 if (Flags & SCEV::FlagNSW)
347 BO->setHasNoSignedWrap();
348
349 return BO;
350}
351
352/// expandAddToGEP - Expand an addition expression with a pointer type into
353/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
354/// BasicAliasAnalysis and other passes analyze the result. See the rules
355/// for getelementptr vs. inttoptr in
356/// http://llvm.org/docs/LangRef.html#pointeraliasing
357/// for details.
358///
359/// Design note: The correctness of using getelementptr here depends on
360/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
361/// they may introduce pointer arithmetic which may not be safely converted
362/// into getelementptr.
363///
364/// Design note: It might seem desirable for this function to be more
365/// loop-aware. If some of the indices are loop-invariant while others
366/// aren't, it might seem desirable to emit multiple GEPs, keeping the
367/// loop-invariant portions of the overall computation outside the loop.
368/// However, there are a few reasons this is not done here. Hoisting simple
369/// arithmetic is a low-level optimization that often isn't very
370/// important until late in the optimization process. In fact, passes
371/// like InstructionCombining will combine GEPs, even if it means
372/// pushing loop-invariant computation down into loops, so even if the
373/// GEPs were split here, the work would quickly be undone. The
374/// LoopStrengthReduction pass, which is usually run quite late (and
375/// after the last InstructionCombining pass), takes care of hoisting
376/// loop-invariant portions of expressions, after considering what
377/// can be folded using target addressing modes.
378///
379Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V,
380 SCEV::NoWrapFlags Flags) {
381 assert(!isa<Instruction>(V) ||
382 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
383
384 Value *Idx = expand(S: Offset);
385 GEPNoWrapFlags NW = (Flags & SCEV::FlagNUW) ? GEPNoWrapFlags::noUnsignedWrap()
386 : GEPNoWrapFlags::none();
387
388 // Fold a GEP with constant operands.
389 if (Constant *CLHS = dyn_cast<Constant>(Val: V))
390 if (Constant *CRHS = dyn_cast<Constant>(Val: Idx))
391 return Builder.CreatePtrAdd(Ptr: CLHS, Offset: CRHS, Name: "", NW);
392
393 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
394 unsigned ScanLimit = 6;
395 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
396 // Scanning starts from the last instruction before the insertion point.
397 BasicBlock::iterator IP = Builder.GetInsertPoint();
398 if (IP != BlockBegin) {
399 --IP;
400 for (; ScanLimit; --IP, --ScanLimit) {
401 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val&: IP)) {
402 if (GEP->getPointerOperand() == V &&
403 GEP->getSourceElementType() == Builder.getInt8Ty() &&
404 GEP->getOperand(i_nocapture: 1) == Idx) {
405 rememberFlags(I: GEP);
406 GEP->setNoWrapFlags(GEP->getNoWrapFlags() & NW);
407 return &*IP;
408 }
409 }
410 if (IP == BlockBegin) break;
411 }
412 }
413
414 // Save the original insertion point so we can restore it when we're done.
415 SCEVInsertPointGuard Guard(Builder, this);
416
417 // Move the insertion point out of as many loops as we can.
418 while (const Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock())) {
419 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(V: Idx)) break;
420 BasicBlock *Preheader = L->getLoopPreheader();
421 if (!Preheader) break;
422
423 // Ok, move up a level.
424 Builder.SetInsertPoint(Preheader->getTerminator());
425 }
426
427 // Emit a GEP.
428 return Builder.CreatePtrAdd(Ptr: V, Offset: Idx, Name: "scevgep", NW);
429}
430
431/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
432/// SCEV expansion. If they are nested, this is the most nested. If they are
433/// neighboring, pick the later.
434static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
435 DominatorTree &DT) {
436 if (!A) return B;
437 if (!B) return A;
438 if (A->contains(L: B)) return B;
439 if (B->contains(L: A)) return A;
440 if (DT.dominates(A: A->getHeader(), B: B->getHeader())) return B;
441 if (DT.dominates(A: B->getHeader(), B: A->getHeader())) return A;
442 return A; // Arbitrarily break the tie.
443}
444
445/// getRelevantLoop - Get the most relevant loop associated with the given
446/// expression, according to PickMostRelevantLoop.
447const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
448 // Test whether we've already computed the most relevant loop for this SCEV.
449 auto Pair = RelevantLoops.try_emplace(Key: S);
450 if (!Pair.second)
451 return Pair.first->second;
452
453 switch (S->getSCEVType()) {
454 case scConstant:
455 case scVScale:
456 return nullptr; // A constant has no relevant loops.
457 case scTruncate:
458 case scZeroExtend:
459 case scSignExtend:
460 case scPtrToAddr:
461 case scPtrToInt:
462 case scAddExpr:
463 case scMulExpr:
464 case scUDivExpr:
465 case scAddRecExpr:
466 case scUMaxExpr:
467 case scSMaxExpr:
468 case scUMinExpr:
469 case scSMinExpr:
470 case scSequentialUMinExpr: {
471 const Loop *L = nullptr;
472 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
473 L = AR->getLoop();
474 for (const SCEV *Op : S->operands())
475 L = PickMostRelevantLoop(A: L, B: getRelevantLoop(S: Op), DT&: SE.DT);
476 return RelevantLoops[S] = L;
477 }
478 case scUnknown: {
479 const SCEVUnknown *U = cast<SCEVUnknown>(Val: S);
480 if (const Instruction *I = dyn_cast<Instruction>(Val: U->getValue()))
481 return Pair.first->second = SE.LI.getLoopFor(BB: I->getParent());
482 // A non-instruction has no relevant loops.
483 return nullptr;
484 }
485 case scCouldNotCompute:
486 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
487 }
488 llvm_unreachable("Unexpected SCEV type!");
489}
490
491namespace {
492
493/// LoopCompare - Compare loops by PickMostRelevantLoop.
494class LoopCompare {
495 DominatorTree &DT;
496public:
497 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
498
499 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
500 std::pair<const Loop *, const SCEV *> RHS) const {
501 // Keep pointer operands sorted at the end.
502 if (LHS.second->getType()->isPointerTy() !=
503 RHS.second->getType()->isPointerTy())
504 return LHS.second->getType()->isPointerTy();
505
506 // Compare loops with PickMostRelevantLoop.
507 if (LHS.first != RHS.first)
508 return PickMostRelevantLoop(A: LHS.first, B: RHS.first, DT) != LHS.first;
509
510 // If one operand is a non-constant negative and the other is not,
511 // put the non-constant negative on the right so that a sub can
512 // be used instead of a negate and add.
513 if (LHS.second->isNonConstantNegative()) {
514 if (!RHS.second->isNonConstantNegative())
515 return false;
516 } else if (RHS.second->isNonConstantNegative())
517 return true;
518
519 // Otherwise they are equivalent according to this comparison.
520 return false;
521 }
522};
523
524}
525
526Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
527 // Recognize the canonical representation of an unsimplifed urem.
528 const SCEV *URemLHS = nullptr;
529 const SCEV *URemRHS = nullptr;
530 if (match(V: S, P: m_scev_URem(LHS: m_SCEV(V&: URemLHS), RHS: m_SCEV(V&: URemRHS), SE))) {
531 Value *LHS = expand(S: URemLHS);
532 Value *RHS = expand(S: URemRHS);
533 return InsertBinop(Opcode: Instruction::URem, LHS, RHS, Flags: SCEV::FlagAnyWrap,
534 /*IsSafeToHoist*/ false);
535 }
536
537 // Collect all the add operands in a loop, along with their associated loops.
538 // Iterate in reverse so that constants are emitted last, all else equal, and
539 // so that pointer operands are inserted first, which the code below relies on
540 // to form more involved GEPs.
541 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
542 for (const SCEV *Op : reverse(C: S->operands()))
543 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
544
545 // Sort by loop. Use a stable sort so that constants follow non-constants and
546 // pointer operands precede non-pointer operands.
547 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
548
549 // Emit instructions to add all the operands. Hoist as much as possible
550 // out of loops, and form meaningful getelementptrs where possible.
551 Value *Sum = nullptr;
552 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
553 const Loop *CurLoop = I->first;
554 const SCEV *Op = I->second;
555 if (!Sum) {
556 // This is the first operand. Just expand it.
557 Sum = expand(S: Op);
558 ++I;
559 continue;
560 }
561
562 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
563 if (isa<PointerType>(Val: Sum->getType())) {
564 // The running sum expression is a pointer. Try to form a getelementptr
565 // at this level with that as the base.
566 SmallVector<const SCEV *, 4> NewOps;
567 for (; I != E && I->first == CurLoop; ++I) {
568 // If the operand is SCEVUnknown and not instructions, peek through
569 // it, to enable more of it to be folded into the GEP.
570 const SCEV *X = I->second;
571 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: X))
572 if (!isa<Instruction>(Val: U->getValue()))
573 X = SE.getSCEV(V: U->getValue());
574 NewOps.push_back(Elt: X);
575 }
576 Sum = expandAddToGEP(Offset: SE.getAddExpr(Ops&: NewOps), V: Sum, Flags: S->getNoWrapFlags());
577 } else if (Op->isNonConstantNegative()) {
578 // Instead of doing a negate and add, just do a subtract.
579 Value *W = expand(S: SE.getNegativeSCEV(V: Op));
580 Sum = InsertBinop(Opcode: Instruction::Sub, LHS: Sum, RHS: W, Flags: SCEV::FlagAnyWrap,
581 /*IsSafeToHoist*/ true);
582 ++I;
583 } else {
584 // A simple add.
585 Value *W = expand(S: Op);
586 // Canonicalize a constant to the RHS.
587 if (isa<Constant>(Val: Sum))
588 std::swap(a&: Sum, b&: W);
589 Sum = InsertBinop(Opcode: Instruction::Add, LHS: Sum, RHS: W, Flags: S->getNoWrapFlags(),
590 /*IsSafeToHoist*/ true);
591 ++I;
592 }
593 }
594
595 return Sum;
596}
597
598Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
599 Type *Ty = S->getType();
600
601 // Collect all the mul operands in a loop, along with their associated loops.
602 // Iterate in reverse so that constants are emitted last, all else equal.
603 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
604 for (const SCEV *Op : reverse(C: S->operands()))
605 OpsAndLoops.push_back(Elt: std::make_pair(x: getRelevantLoop(S: Op), y&: Op));
606
607 // Sort by loop. Use a stable sort so that constants follow non-constants.
608 llvm::stable_sort(Range&: OpsAndLoops, C: LoopCompare(SE.DT));
609
610 // Emit instructions to mul all the operands. Hoist as much as possible
611 // out of loops.
612 Value *Prod = nullptr;
613 auto I = OpsAndLoops.begin();
614
615 // Expand the calculation of X pow N in the following manner:
616 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
617 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
618 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
619 auto E = I;
620 // Calculate how many times the same operand from the same loop is included
621 // into this power.
622 uint64_t Exponent = 0;
623 const uint64_t MaxExponent = UINT64_MAX >> 1;
624 // No one sane will ever try to calculate such huge exponents, but if we
625 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
626 // below when the power of 2 exceeds our Exponent, and we want it to be
627 // 1u << 31 at most to not deal with unsigned overflow.
628 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
629 ++Exponent;
630 ++E;
631 }
632 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
633
634 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
635 // that are needed into the result.
636 Value *P = expand(S: I->second);
637 Value *Result = nullptr;
638 if (Exponent & 1)
639 Result = P;
640 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
641 P = InsertBinop(Opcode: Instruction::Mul, LHS: P, RHS: P, Flags: SCEV::FlagAnyWrap,
642 /*IsSafeToHoist*/ true);
643 if (Exponent & BinExp)
644 Result = Result ? InsertBinop(Opcode: Instruction::Mul, LHS: Result, RHS: P,
645 Flags: SCEV::FlagAnyWrap,
646 /*IsSafeToHoist*/ true)
647 : P;
648 }
649
650 I = E;
651 assert(Result && "Nothing was expanded?");
652 return Result;
653 };
654
655 while (I != OpsAndLoops.end()) {
656 if (!Prod) {
657 // This is the first operand. Just expand it.
658 Prod = ExpandOpBinPowN();
659 } else if (I->second->isAllOnesValue()) {
660 // Instead of doing a multiply by negative one, just do a negate.
661 Prod = InsertBinop(Opcode: Instruction::Sub, LHS: Constant::getNullValue(Ty), RHS: Prod,
662 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
663 ++I;
664 } else {
665 // A simple mul.
666 Value *W = ExpandOpBinPowN();
667 // Canonicalize a constant to the RHS.
668 if (isa<Constant>(Val: Prod)) std::swap(a&: Prod, b&: W);
669 const APInt *RHS;
670 if (match(V: W, P: m_Power2(V&: RHS))) {
671 // Canonicalize Prod*(1<<C) to Prod<<C.
672 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
673 auto NWFlags = S->getNoWrapFlags();
674 // clear nsw flag if shl will produce poison value.
675 if (RHS->logBase2() == RHS->getBitWidth() - 1)
676 NWFlags = ScalarEvolution::clearFlags(Flags: NWFlags, OffFlags: SCEV::FlagNSW);
677 Prod = InsertBinop(Opcode: Instruction::Shl, LHS: Prod,
678 RHS: ConstantInt::get(Ty, V: RHS->logBase2()), Flags: NWFlags,
679 /*IsSafeToHoist*/ true);
680 } else {
681 Prod = InsertBinop(Opcode: Instruction::Mul, LHS: Prod, RHS: W, Flags: S->getNoWrapFlags(),
682 /*IsSafeToHoist*/ true);
683 }
684 }
685 }
686
687 return Prod;
688}
689
690Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
691 Value *LHS = expand(S: S->getLHS());
692 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Val: S->getRHS())) {
693 const APInt &RHS = SC->getAPInt();
694 if (RHS.isPowerOf2())
695 return InsertBinop(Opcode: Instruction::LShr, LHS,
696 RHS: ConstantInt::get(Ty: SC->getType(), V: RHS.logBase2()),
697 Flags: SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
698 }
699
700 const SCEV *RHSExpr = S->getRHS();
701 Value *RHS = expand(S: RHSExpr);
702 if (SafeUDivMode) {
703 bool GuaranteedNotPoison =
704 ScalarEvolution::isGuaranteedNotToBePoison(Op: RHSExpr);
705 if (!GuaranteedNotPoison)
706 RHS = Builder.CreateFreeze(V: RHS);
707
708 // We need an umax if either RHSExpr is not known to be zero, or if it is
709 // not guaranteed to be non-poison. In the later case, the frozen poison may
710 // be 0.
711 if (!SE.isKnownNonZero(S: RHSExpr) || !GuaranteedNotPoison)
712 RHS = Builder.CreateIntrinsic(RetTy: RHS->getType(), ID: Intrinsic::umax,
713 Args: {RHS, ConstantInt::get(Ty: RHS->getType(), V: 1)});
714 }
715 return InsertBinop(Opcode: Instruction::UDiv, LHS, RHS, Flags: SCEV::FlagAnyWrap,
716 /*IsSafeToHoist*/ SE.isKnownNonZero(S: S->getRHS()));
717}
718
719/// Determine if this is a well-behaved chain of instructions leading back to
720/// the PHI. If so, it may be reused by expanded expressions.
721bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
722 const Loop *L) {
723 if (IncV->getNumOperands() == 0 || isa<PHINode>(Val: IncV) ||
724 (isa<CastInst>(Val: IncV) && !isa<BitCastInst>(Val: IncV)))
725 return false;
726 // If any of the operands don't dominate the insert position, bail.
727 // Addrec operands are always loop-invariant, so this can only happen
728 // if there are instructions which haven't been hoisted.
729 if (L == IVIncInsertLoop) {
730 for (Use &Op : llvm::drop_begin(RangeOrContainer: IncV->operands()))
731 if (Instruction *OInst = dyn_cast<Instruction>(Val&: Op))
732 if (!SE.DT.dominates(Def: OInst, User: IVIncInsertPos))
733 return false;
734 }
735 // Advance to the next instruction.
736 IncV = dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
737 if (!IncV)
738 return false;
739
740 if (IncV->mayHaveSideEffects())
741 return false;
742
743 if (IncV == PN)
744 return true;
745
746 return isNormalAddRecExprPHI(PN, IncV, L);
747}
748
749/// getIVIncOperand returns an induction variable increment's induction
750/// variable operand.
751///
752/// If allowScale is set, any type of GEP is allowed as long as the nonIV
753/// operands dominate InsertPos.
754///
755/// If allowScale is not set, ensure that a GEP increment conforms to one of the
756/// simple patterns generated by getAddRecExprPHILiterally and
757/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
758Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
759 Instruction *InsertPos,
760 bool allowScale) {
761 if (IncV == InsertPos)
762 return nullptr;
763
764 switch (IncV->getOpcode()) {
765 default:
766 return nullptr;
767 // Check for a simple Add/Sub or GEP of a loop invariant step.
768 case Instruction::Add:
769 case Instruction::Sub: {
770 Instruction *OInst = dyn_cast<Instruction>(Val: IncV->getOperand(i: 1));
771 if (!OInst || SE.DT.dominates(Def: OInst, User: InsertPos))
772 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
773 return nullptr;
774 }
775 case Instruction::BitCast:
776 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
777 case Instruction::GetElementPtr:
778 for (Use &U : llvm::drop_begin(RangeOrContainer: IncV->operands())) {
779 if (isa<Constant>(Val: U))
780 continue;
781 if (Instruction *OInst = dyn_cast<Instruction>(Val&: U)) {
782 if (!SE.DT.dominates(Def: OInst, User: InsertPos))
783 return nullptr;
784 }
785 if (allowScale) {
786 // allow any kind of GEP as long as it can be hoisted.
787 continue;
788 }
789 // GEPs produced by SCEVExpander use i8 element type.
790 if (!cast<GEPOperator>(Val: IncV)->getSourceElementType()->isIntegerTy(Bitwidth: 8))
791 return nullptr;
792 break;
793 }
794 return dyn_cast<Instruction>(Val: IncV->getOperand(i: 0));
795 }
796}
797
798/// If the insert point of the current builder or any of the builders on the
799/// stack of saved builders has 'I' as its insert point, update it to point to
800/// the instruction after 'I'. This is intended to be used when the instruction
801/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
802/// different block, the inconsistent insert point (with a mismatched
803/// Instruction and Block) can lead to an instruction being inserted in a block
804/// other than its parent.
805void SCEVExpander::fixupInsertPoints(Instruction *I) {
806 BasicBlock::iterator It(*I);
807 BasicBlock::iterator NewInsertPt = std::next(x: It);
808 if (Builder.GetInsertPoint() == It)
809 Builder.SetInsertPoint(&*NewInsertPt);
810 for (auto *InsertPtGuard : InsertPointGuards)
811 if (InsertPtGuard->GetInsertPoint() == It)
812 InsertPtGuard->SetInsertPoint(NewInsertPt);
813}
814
815/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
816/// it available to other uses in this loop. Recursively hoist any operands,
817/// until we reach a value that dominates InsertPos.
818bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos,
819 bool RecomputePoisonFlags) {
820 auto FixupPoisonFlags = [this](Instruction *I) {
821 // Drop flags that are potentially inferred from old context and infer flags
822 // in new context.
823 rememberFlags(I);
824 I->dropPoisonGeneratingFlags();
825 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
826 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
827 auto *BO = cast<BinaryOperator>(Val: I);
828 BO->setHasNoUnsignedWrap(
829 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
830 BO->setHasNoSignedWrap(
831 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
832 }
833 };
834
835 if (SE.DT.dominates(Def: IncV, User: InsertPos)) {
836 if (RecomputePoisonFlags)
837 FixupPoisonFlags(IncV);
838 return true;
839 }
840
841 // InsertPos must itself dominate IncV so that IncV's new position satisfies
842 // its existing users.
843 if (isa<PHINode>(Val: InsertPos) ||
844 !SE.DT.dominates(A: InsertPos->getParent(), B: IncV->getParent()))
845 return false;
846
847 if (!SE.LI.movementPreservesLCSSAForm(Inst: IncV, NewLoc: InsertPos))
848 return false;
849
850 // Check that the chain of IV operands leading back to Phi can be hoisted.
851 SmallVector<Instruction*, 4> IVIncs;
852 for(;;) {
853 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
854 if (!Oper)
855 return false;
856 // IncV is safe to hoist.
857 IVIncs.push_back(Elt: IncV);
858 IncV = Oper;
859 if (SE.DT.dominates(Def: IncV, User: InsertPos))
860 break;
861 }
862 for (Instruction *I : llvm::reverse(C&: IVIncs)) {
863 fixupInsertPoints(I);
864 I->moveBefore(InsertPos: InsertPos->getIterator());
865 if (RecomputePoisonFlags)
866 FixupPoisonFlags(I);
867 }
868 return true;
869}
870
871bool SCEVExpander::canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi,
872 PHINode *WidePhi,
873 Instruction *OrigInc,
874 Instruction *WideInc) {
875 return match(V: OrigInc, P: m_c_BinOp(L: m_Specific(V: OrigPhi), R: m_Value())) &&
876 match(V: WideInc, P: m_c_BinOp(L: m_Specific(V: WidePhi), R: m_Value())) &&
877 OrigInc->getOpcode() == WideInc->getOpcode();
878}
879
880/// Determine if this cyclic phi is in a form that would have been generated by
881/// LSR. We don't care if the phi was actually expanded in this pass, as long
882/// as it is in a low-cost form, for example, no implied multiplication. This
883/// should match any patterns generated by getAddRecExprPHILiterally and
884/// expandAddtoGEP.
885bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
886 const Loop *L) {
887 for(Instruction *IVOper = IncV;
888 (IVOper = getIVIncOperand(IncV: IVOper, InsertPos: L->getLoopPreheader()->getTerminator(),
889 /*allowScale=*/false));) {
890 if (IVOper == PN)
891 return true;
892 }
893 return false;
894}
895
896/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
897/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
898/// need to materialize IV increments elsewhere to handle difficult situations.
899Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
900 bool useSubtract) {
901 Value *IncV;
902 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
903 if (PN->getType()->isPointerTy()) {
904 // TODO: Change name to IVName.iv.next.
905 IncV = Builder.CreatePtrAdd(Ptr: PN, Offset: StepV, Name: "scevgep");
906 } else {
907 IncV = useSubtract ?
908 Builder.CreateSub(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next") :
909 Builder.CreateAdd(LHS: PN, RHS: StepV, Name: Twine(IVName) + ".iv.next");
910 }
911 return IncV;
912}
913
914/// Check whether we can cheaply express the requested SCEV in terms of
915/// the available PHI SCEV by truncation and/or inversion of the step.
916static bool canBeCheaplyTransformed(ScalarEvolution &SE,
917 const SCEVAddRecExpr *Phi,
918 const SCEVAddRecExpr *Requested,
919 bool &InvertStep) {
920 // We can't transform to match a pointer PHI.
921 Type *PhiTy = Phi->getType();
922 Type *RequestedTy = Requested->getType();
923 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
924 return false;
925
926 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
927 return false;
928
929 // Try truncate it if necessary.
930 Phi = dyn_cast<SCEVAddRecExpr>(Val: SE.getTruncateOrNoop(V: Phi, Ty: RequestedTy));
931 if (!Phi)
932 return false;
933
934 // Check whether truncation will help.
935 if (Phi == Requested) {
936 InvertStep = false;
937 return true;
938 }
939
940 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
941 if (SE.getMinusSCEV(LHS: Requested->getStart(), RHS: Requested) == Phi) {
942 InvertStep = true;
943 return true;
944 }
945
946 return false;
947}
948
949static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
950 if (!isa<IntegerType>(Val: AR->getType()))
951 return false;
952
953 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
954 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
955 const SCEV *Step = AR->getStepRecurrence(SE);
956 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getSignExtendExpr(Op: Step, Ty: WideTy),
957 RHS: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
958 const SCEV *ExtendAfterOp =
959 SE.getSignExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
960 return ExtendAfterOp == OpAfterExtend;
961}
962
963static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
964 if (!isa<IntegerType>(Val: AR->getType()))
965 return false;
966
967 unsigned BitWidth = cast<IntegerType>(Val: AR->getType())->getBitWidth();
968 Type *WideTy = IntegerType::get(C&: AR->getType()->getContext(), NumBits: BitWidth * 2);
969 const SCEV *Step = AR->getStepRecurrence(SE);
970 const SCEV *OpAfterExtend = SE.getAddExpr(LHS: SE.getZeroExtendExpr(Op: Step, Ty: WideTy),
971 RHS: SE.getZeroExtendExpr(Op: AR, Ty: WideTy));
972 const SCEV *ExtendAfterOp =
973 SE.getZeroExtendExpr(Op: SE.getAddExpr(LHS: AR, RHS: Step), Ty: WideTy);
974 return ExtendAfterOp == OpAfterExtend;
975}
976
977/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
978/// the base addrec, which is the addrec without any non-loop-dominating
979/// values, and return the PHI.
980PHINode *
981SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
982 const Loop *L, Type *&TruncTy,
983 bool &InvertStep) {
984 assert((!IVIncInsertLoop || IVIncInsertPos) &&
985 "Uninitialized insert position");
986
987 // Reuse a previously-inserted PHI, if present.
988 BasicBlock *LatchBlock = L->getLoopLatch();
989 if (LatchBlock) {
990 PHINode *AddRecPhiMatch = nullptr;
991 Instruction *IncV = nullptr;
992 TruncTy = nullptr;
993 InvertStep = false;
994
995 // Only try partially matching scevs that need truncation and/or
996 // step-inversion if we know this loop is outside the current loop.
997 bool TryNonMatchingSCEV =
998 IVIncInsertLoop &&
999 SE.DT.properlyDominates(A: LatchBlock, B: IVIncInsertLoop->getHeader());
1000
1001 for (PHINode &PN : L->getHeader()->phis()) {
1002 if (!SE.isSCEVable(Ty: PN.getType()))
1003 continue;
1004
1005 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
1006 // PHI has no meaning at all.
1007 if (!PN.isComplete()) {
1008 SCEV_DEBUG_WITH_TYPE(
1009 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
1010 continue;
1011 }
1012
1013 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: &PN));
1014 if (!PhiSCEV)
1015 continue;
1016
1017 bool IsMatchingSCEV = PhiSCEV == Normalized;
1018 // We only handle truncation and inversion of phi recurrences for the
1019 // expanded expression if the expanded expression's loop dominates the
1020 // loop we insert to. Check now, so we can bail out early.
1021 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
1022 continue;
1023
1024 // TODO: this possibly can be reworked to avoid this cast at all.
1025 Instruction *TempIncV =
1026 dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: LatchBlock));
1027 if (!TempIncV)
1028 continue;
1029
1030 // Check whether we can reuse this PHI node.
1031 if (LSRMode) {
1032 if (!isExpandedAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1033 continue;
1034 } else {
1035 if (!isNormalAddRecExprPHI(PN: &PN, IncV: TempIncV, L))
1036 continue;
1037 }
1038
1039 // Stop if we have found an exact match SCEV.
1040 if (IsMatchingSCEV) {
1041 IncV = TempIncV;
1042 TruncTy = nullptr;
1043 InvertStep = false;
1044 AddRecPhiMatch = &PN;
1045 break;
1046 }
1047
1048 // Try whether the phi can be translated into the requested form
1049 // (truncated and/or offset by a constant).
1050 if ((!TruncTy || InvertStep) &&
1051 canBeCheaplyTransformed(SE, Phi: PhiSCEV, Requested: Normalized, InvertStep)) {
1052 // Record the phi node. But don't stop we might find an exact match
1053 // later.
1054 AddRecPhiMatch = &PN;
1055 IncV = TempIncV;
1056 TruncTy = Normalized->getType();
1057 }
1058 }
1059
1060 if (AddRecPhiMatch) {
1061 // Ok, the add recurrence looks usable.
1062 // Remember this PHI, even in post-inc mode.
1063 InsertedValues.insert(V: AddRecPhiMatch);
1064 // Remember the increment.
1065 rememberInstruction(I: IncV);
1066 // Those values were not actually inserted but re-used.
1067 ReusedValues.insert(Ptr: AddRecPhiMatch);
1068 ReusedValues.insert(Ptr: IncV);
1069 return AddRecPhiMatch;
1070 }
1071 }
1072
1073 // Save the original insertion point so we can restore it when we're done.
1074 SCEVInsertPointGuard Guard(Builder, this);
1075
1076 // Another AddRec may need to be recursively expanded below. For example, if
1077 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1078 // loop. Remove this loop from the PostIncLoops set before expanding such
1079 // AddRecs. Otherwise, we cannot find a valid position for the step
1080 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1081 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1082 // so it's not worth implementing SmallPtrSet::swap.
1083 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1084 PostIncLoops.clear();
1085
1086 // Expand code for the start value into the loop preheader.
1087 assert(L->getLoopPreheader() &&
1088 "Can't expand add recurrences without a loop preheader!");
1089 Value *StartV =
1090 expand(S: Normalized->getStart(), I: L->getLoopPreheader()->getTerminator());
1091
1092 // StartV must have been be inserted into L's preheader to dominate the new
1093 // phi.
1094 assert(!isa<Instruction>(StartV) ||
1095 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1096 L->getHeader()));
1097
1098 // Expand code for the step value. Do this before creating the PHI so that PHI
1099 // reuse code doesn't see an incomplete PHI.
1100 const SCEV *Step = Normalized->getStepRecurrence(SE);
1101 Type *ExpandTy = Normalized->getType();
1102 // If the stride is negative, insert a sub instead of an add for the increment
1103 // (unless it's a constant, because subtracts of constants are canonicalized
1104 // to adds).
1105 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1106 if (useSubtract)
1107 Step = SE.getNegativeSCEV(V: Step);
1108 // Expand the step somewhere that dominates the loop header.
1109 Value *StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1110
1111 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1112 // we actually do emit an addition. It does not apply if we emit a
1113 // subtraction.
1114 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, AR: Normalized);
1115 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, AR: Normalized);
1116
1117 // Create the PHI.
1118 BasicBlock *Header = L->getHeader();
1119 Builder.SetInsertPoint(TheBB: Header, IP: Header->begin());
1120 PHINode *PN =
1121 Builder.CreatePHI(Ty: ExpandTy, NumReservedValues: pred_size(BB: Header), Name: Twine(IVName) + ".iv");
1122
1123 // Create the step instructions and populate the PHI.
1124 for (BasicBlock *Pred : predecessors(BB: Header)) {
1125 // Add a start value.
1126 if (!L->contains(BB: Pred)) {
1127 PN->addIncoming(V: StartV, BB: Pred);
1128 continue;
1129 }
1130
1131 // Create a step value and add it to the PHI.
1132 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1133 // instructions at IVIncInsertPos.
1134 Instruction *InsertPos = L == IVIncInsertLoop ?
1135 IVIncInsertPos : Pred->getTerminator();
1136 Builder.SetInsertPoint(InsertPos);
1137 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1138
1139 if (isa<OverflowingBinaryOperator>(Val: IncV)) {
1140 if (IncrementIsNUW)
1141 cast<BinaryOperator>(Val: IncV)->setHasNoUnsignedWrap();
1142 if (IncrementIsNSW)
1143 cast<BinaryOperator>(Val: IncV)->setHasNoSignedWrap();
1144 }
1145 PN->addIncoming(V: IncV, BB: Pred);
1146 }
1147
1148 // After expanding subexpressions, restore the PostIncLoops set so the caller
1149 // can ensure that IVIncrement dominates the current uses.
1150 PostIncLoops = SavedPostIncLoops;
1151
1152 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1153 // effective when we are able to use an IV inserted here, so record it.
1154 InsertedValues.insert(V: PN);
1155 InsertedIVs.push_back(Elt: PN);
1156 return PN;
1157}
1158
1159Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1160 const Loop *L = S->getLoop();
1161
1162 // Determine a normalized form of this expression, which is the expression
1163 // before any post-inc adjustment is made.
1164 const SCEVAddRecExpr *Normalized = S;
1165 if (PostIncLoops.count(Ptr: L)) {
1166 PostIncLoopSet Loops;
1167 Loops.insert(Ptr: L);
1168 Normalized = cast<SCEVAddRecExpr>(
1169 Val: normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1170 }
1171
1172 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1173 const SCEV *Step = Normalized->getStepRecurrence(SE);
1174 assert(SE.properlyDominates(Start, L->getHeader()) &&
1175 "Start does not properly dominate loop header");
1176 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1177
1178 // In some cases, we decide to reuse an existing phi node but need to truncate
1179 // it and/or invert the step.
1180 Type *TruncTy = nullptr;
1181 bool InvertStep = false;
1182 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1183
1184 // Accommodate post-inc mode, if necessary.
1185 Value *Result;
1186 if (!PostIncLoops.count(Ptr: L))
1187 Result = PN;
1188 else {
1189 // In PostInc mode, use the post-incremented value.
1190 BasicBlock *LatchBlock = L->getLoopLatch();
1191 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1192 Result = PN->getIncomingValueForBlock(BB: LatchBlock);
1193
1194 // We might be introducing a new use of the post-inc IV that is not poison
1195 // safe, in which case we should drop poison generating flags. Only keep
1196 // those flags for which SCEV has proven that they always hold.
1197 if (isa<OverflowingBinaryOperator>(Val: Result)) {
1198 auto *I = cast<Instruction>(Val: Result);
1199 if (!S->hasNoUnsignedWrap())
1200 I->setHasNoUnsignedWrap(false);
1201 if (!S->hasNoSignedWrap())
1202 I->setHasNoSignedWrap(false);
1203 }
1204
1205 // For an expansion to use the postinc form, the client must call
1206 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1207 // or dominated by IVIncInsertPos.
1208 if (isa<Instruction>(Val: Result) &&
1209 !SE.DT.dominates(Def: cast<Instruction>(Val: Result),
1210 User: &*Builder.GetInsertPoint())) {
1211 // The induction variable's postinc expansion does not dominate this use.
1212 // IVUsers tries to prevent this case, so it is rare. However, it can
1213 // happen when an IVUser outside the loop is not dominated by the latch
1214 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1215 // all cases. Consider a phi outside whose operand is replaced during
1216 // expansion with the value of the postinc user. Without fundamentally
1217 // changing the way postinc users are tracked, the only remedy is
1218 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1219 // but hopefully expandCodeFor handles that.
1220 bool useSubtract =
1221 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1222 if (useSubtract)
1223 Step = SE.getNegativeSCEV(V: Step);
1224 Value *StepV;
1225 {
1226 // Expand the step somewhere that dominates the loop header.
1227 SCEVInsertPointGuard Guard(Builder, this);
1228 StepV = expand(S: Step, I: L->getHeader()->getFirstInsertionPt());
1229 }
1230 Result = expandIVInc(PN, StepV, L, useSubtract);
1231 }
1232 }
1233
1234 // We have decided to reuse an induction variable of a dominating loop. Apply
1235 // truncation and/or inversion of the step.
1236 if (TruncTy) {
1237 // Truncate the result.
1238 if (TruncTy != Result->getType())
1239 Result = Builder.CreateTrunc(V: Result, DestTy: TruncTy);
1240
1241 // Invert the result.
1242 if (InvertStep)
1243 Result = Builder.CreateSub(LHS: expand(S: Normalized->getStart()), RHS: Result);
1244 }
1245
1246 return Result;
1247}
1248
1249Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
1250 Type *STy = S->getType();
1251 const Loop *L = S->getLoop();
1252 BasicBlock *EB = L->getExitBlock();
1253 if (!EB || !EB->getSinglePredecessor() ||
1254 !SE.DT.dominates(A: EB, B: Builder.GetInsertBlock()))
1255 return nullptr;
1256
1257 // Helper to check if the diff between S and ExitSCEV is simple enough to
1258 // allow reusing the LCSSA phi.
1259 auto CanReuse = [&](const SCEV *ExitSCEV) -> const SCEV * {
1260 if (isa<SCEVCouldNotCompute>(Val: ExitSCEV))
1261 return nullptr;
1262 const SCEV *Diff = SE.getMinusSCEV(LHS: S, RHS: ExitSCEV);
1263 const SCEV *Op = Diff;
1264 match(S: Op, P: m_scev_Add(Op0: m_SCEVConstant(), Op1: m_SCEV(V&: Op)));
1265 match(S: Op, P: m_scev_Mul(Op0: m_scev_AllOnes(), Op1: m_SCEV(V&: Op)));
1266 match(S: Op, P: m_scev_PtrToAddr(Op0: m_SCEV(V&: Op))) ||
1267 match(S: Op, P: m_scev_PtrToInt(Op0: m_SCEV(V&: Op)));
1268 if (!isa<SCEVConstant, SCEVUnknown>(Val: Op))
1269 return nullptr;
1270 return Diff;
1271 };
1272
1273 for (auto &PN : EB->phis()) {
1274 if (!SE.isSCEVable(Ty: PN.getType()))
1275 continue;
1276 auto *ExitSCEV = SE.getSCEV(V: &PN);
1277 if (!isa<SCEVAddRecExpr>(Val: ExitSCEV))
1278 continue;
1279 Type *PhiTy = PN.getType();
1280 const SCEV *Diff = nullptr;
1281 if (STy->isIntegerTy() && PhiTy->isPointerTy() &&
1282 DL.getAddressType(PtrTy: PhiTy) == STy) {
1283 // Prefer ptrtoaddr over ptrtoint.
1284 const SCEV *AddrSCEV = SE.getPtrToAddrExpr(Op: ExitSCEV);
1285 Diff = CanReuse(AddrSCEV);
1286 if (!Diff) {
1287 const SCEV *IntSCEV = SE.getPtrToIntExpr(Op: ExitSCEV, Ty: STy);
1288 Diff = CanReuse(IntSCEV);
1289 }
1290 } else if (STy == PhiTy) {
1291 Diff = CanReuse(ExitSCEV);
1292 }
1293 if (!Diff)
1294 continue;
1295
1296 assert(Diff->getType()->isIntegerTy() &&
1297 "difference must be of integer type");
1298 Value *DiffV = expand(S: Diff);
1299 Value *BaseV = fixupLCSSAFormFor(V: &PN);
1300 if (PhiTy->isPointerTy()) {
1301 if (STy->isPointerTy())
1302 return Builder.CreatePtrAdd(Ptr: BaseV, Offset: DiffV);
1303 BaseV = Builder.CreatePtrToAddr(V: BaseV);
1304 }
1305 return Builder.CreateAdd(LHS: BaseV, RHS: DiffV);
1306 }
1307
1308 return nullptr;
1309}
1310
1311Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1312 // In canonical mode we compute the addrec as an expression of a canonical IV
1313 // using evaluateAtIteration and expand the resulting SCEV expression. This
1314 // way we avoid introducing new IVs to carry on the computation of the addrec
1315 // throughout the loop.
1316 //
1317 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1318 // type wider than the addrec itself. Emitting a canonical IV of the
1319 // proper type might produce non-legal types, for example expanding an i64
1320 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1321 // back to non-canonical mode for nested addrecs.
1322 if (!CanonicalMode || (S->getNumOperands() > 2))
1323 return expandAddRecExprLiterally(S);
1324
1325 Type *Ty = SE.getEffectiveSCEVType(Ty: S->getType());
1326 const Loop *L = S->getLoop();
1327
1328 // First check for an existing canonical IV in a suitable type.
1329 PHINode *CanonicalIV = nullptr;
1330 if (PHINode *PN = L->getCanonicalInductionVariable())
1331 if (SE.getTypeSizeInBits(Ty: PN->getType()) >= SE.getTypeSizeInBits(Ty))
1332 CanonicalIV = PN;
1333
1334 // Rewrite an AddRec in terms of the canonical induction variable, if
1335 // its type is more narrow.
1336 if (CanonicalIV &&
1337 SE.getTypeSizeInBits(Ty: CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1338 !S->getType()->isPointerTy()) {
1339 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
1340 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1341 NewOps[i] = SE.getAnyExtendExpr(Op: S->getOperand(i), Ty: CanonicalIV->getType());
1342 Value *V = expand(S: SE.getAddRecExpr(Operands&: NewOps, L: S->getLoop(),
1343 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW)));
1344 BasicBlock::iterator NewInsertPt =
1345 findInsertPointAfter(I: cast<Instruction>(Val: V), MustDominate: &*Builder.GetInsertPoint());
1346 V = expand(S: SE.getTruncateExpr(Op: SE.getUnknown(V), Ty), I: NewInsertPt);
1347 return V;
1348 }
1349
1350 // If S is expanded outside the defining loop, check if there is a
1351 // matching LCSSA phi node for it.
1352 if (Value *V = tryToReuseLCSSAPhi(S))
1353 return V;
1354
1355 // {X,+,F} --> X + {0,+,F}
1356 if (!S->getStart()->isZero()) {
1357 if (isa<PointerType>(Val: S->getType())) {
1358 Value *StartV = expand(S: SE.getPointerBase(V: S));
1359 return expandAddToGEP(Offset: SE.removePointerBase(S), V: StartV,
1360 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNUW));
1361 }
1362
1363 SmallVector<const SCEV *, 4> NewOps(S->operands());
1364 NewOps[0] = SE.getConstant(Ty, V: 0);
1365 const SCEV *Rest = SE.getAddRecExpr(Operands&: NewOps, L,
1366 Flags: S->getNoWrapFlags(Mask: SCEV::FlagNW));
1367
1368 // Just do a normal add. Pre-expand the operands to suppress folding.
1369 //
1370 // The LHS and RHS values are factored out of the expand call to make the
1371 // output independent of the argument evaluation order.
1372 const SCEV *AddExprLHS = SE.getUnknown(V: expand(S: S->getStart()));
1373 const SCEV *AddExprRHS = SE.getUnknown(V: expand(S: Rest));
1374 return expand(S: SE.getAddExpr(LHS: AddExprLHS, RHS: AddExprRHS));
1375 }
1376
1377 // If we don't yet have a canonical IV, create one.
1378 if (!CanonicalIV) {
1379 // Create and insert the PHI node for the induction variable in the
1380 // specified loop.
1381 BasicBlock *Header = L->getHeader();
1382 pred_iterator HPB = pred_begin(BB: Header), HPE = pred_end(BB: Header);
1383 CanonicalIV = PHINode::Create(Ty, NumReservedValues: std::distance(first: HPB, last: HPE), NameStr: "indvar");
1384 CanonicalIV->insertBefore(InsertPos: Header->begin());
1385 rememberInstruction(I: CanonicalIV);
1386
1387 SmallPtrSet<BasicBlock *, 4> PredSeen;
1388 Constant *One = ConstantInt::get(Ty, V: 1);
1389 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1390 BasicBlock *HP = *HPI;
1391 if (!PredSeen.insert(Ptr: HP).second) {
1392 // There must be an incoming value for each predecessor, even the
1393 // duplicates!
1394 CanonicalIV->addIncoming(V: CanonicalIV->getIncomingValueForBlock(BB: HP), BB: HP);
1395 continue;
1396 }
1397
1398 if (L->contains(BB: HP)) {
1399 // Insert a unit add instruction right before the terminator
1400 // corresponding to the back-edge.
1401 Instruction *Add = BinaryOperator::CreateAdd(V1: CanonicalIV, V2: One,
1402 Name: "indvar.next",
1403 InsertBefore: HP->getTerminator()->getIterator());
1404 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1405 rememberInstruction(I: Add);
1406 CanonicalIV->addIncoming(V: Add, BB: HP);
1407 } else {
1408 CanonicalIV->addIncoming(V: Constant::getNullValue(Ty), BB: HP);
1409 }
1410 }
1411 }
1412
1413 // {0,+,1} --> Insert a canonical induction variable into the loop!
1414 if (S->isAffine() && S->getOperand(i: 1)->isOne()) {
1415 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1416 "IVs with types different from the canonical IV should "
1417 "already have been handled!");
1418 return CanonicalIV;
1419 }
1420
1421 // {0,+,F} --> {0,+,1} * F
1422
1423 // If this is a simple linear addrec, emit it now as a special case.
1424 if (S->isAffine()) // {0,+,F} --> i*F
1425 return
1426 expand(S: SE.getTruncateOrNoop(
1427 V: SE.getMulExpr(LHS: SE.getUnknown(V: CanonicalIV),
1428 RHS: SE.getNoopOrAnyExtend(V: S->getOperand(i: 1),
1429 Ty: CanonicalIV->getType())),
1430 Ty));
1431
1432 // If this is a chain of recurrences, turn it into a closed form, using the
1433 // folders, then expandCodeFor the closed form. This allows the folders to
1434 // simplify the expression without having to build a bunch of special code
1435 // into this folder.
1436 const SCEV *IH = SE.getUnknown(V: CanonicalIV); // Get I as a "symbolic" SCEV.
1437
1438 // Promote S up to the canonical IV type, if the cast is foldable.
1439 const SCEV *NewS = S;
1440 const SCEV *Ext = SE.getNoopOrAnyExtend(V: S, Ty: CanonicalIV->getType());
1441 if (isa<SCEVAddRecExpr>(Val: Ext))
1442 NewS = Ext;
1443
1444 const SCEV *V = cast<SCEVAddRecExpr>(Val: NewS)->evaluateAtIteration(It: IH, SE);
1445
1446 // Truncate the result down to the original type, if needed.
1447 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1448 return expand(S: T);
1449}
1450
1451Value *SCEVExpander::visitPtrToAddrExpr(const SCEVPtrToAddrExpr *S) {
1452 Value *V = expand(S: S->getOperand());
1453 Type *Ty = S->getType();
1454
1455 // ptrtoaddr and ptrtoint produce the same value, so try to reuse either.
1456 if (!isa<Constant>(Val: V)) {
1457 BasicBlock::iterator BIP = Builder.GetInsertPoint();
1458 for (User *U : V->users()) {
1459 auto *CI = dyn_cast<CastInst>(Val: U);
1460 if (CI && CI->getType() == Ty &&
1461 (CI->getOpcode() == CastInst::PtrToAddr ||
1462 CI->getOpcode() == CastInst::PtrToInt) &&
1463 &*BIP != CI && SE.DT.dominates(Def: CI, User: &*BIP))
1464 return CI;
1465 }
1466 }
1467 return ReuseOrCreateCast(V, Ty, Op: CastInst::PtrToAddr,
1468 IP: GetOptimalInsertionPointForCastOf(V));
1469}
1470
1471Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1472 Value *V = expand(S: S->getOperand());
1473 return ReuseOrCreateCast(V, Ty: S->getType(), Op: CastInst::PtrToInt,
1474 IP: GetOptimalInsertionPointForCastOf(V));
1475}
1476
1477Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1478 Value *V = expand(S: S->getOperand());
1479 return Builder.CreateTrunc(V, DestTy: S->getType());
1480}
1481
1482Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1483 Value *V = expand(S: S->getOperand());
1484 return Builder.CreateZExt(V, DestTy: S->getType(), Name: "",
1485 IsNonNeg: SE.isKnownNonNegative(S: S->getOperand()));
1486}
1487
1488Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1489 Value *V = expand(S: S->getOperand());
1490 return Builder.CreateSExt(V, DestTy: S->getType());
1491}
1492
1493Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1494 Intrinsic::ID IntrinID, Twine Name,
1495 bool IsSequential) {
1496 bool PrevSafeMode = SafeUDivMode;
1497 SafeUDivMode |= IsSequential;
1498 Value *LHS = expand(S: S->getOperand(i: S->getNumOperands() - 1));
1499 Type *Ty = LHS->getType();
1500 if (IsSequential)
1501 LHS = Builder.CreateFreeze(V: LHS);
1502 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1503 SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
1504 Value *RHS = expand(S: S->getOperand(i));
1505 if (IsSequential && i != 0)
1506 RHS = Builder.CreateFreeze(V: RHS);
1507 Value *Sel;
1508 if (Ty->isIntegerTy())
1509 Sel = Builder.CreateIntrinsic(ID: IntrinID, Types: {Ty}, Args: {LHS, RHS},
1510 /*FMFSource=*/nullptr, Name);
1511 else {
1512 Value *ICmp =
1513 Builder.CreateICmp(P: MinMaxIntrinsic::getPredicate(ID: IntrinID), LHS, RHS);
1514 Sel = Builder.CreateSelect(C: ICmp, True: LHS, False: RHS, Name);
1515 }
1516 LHS = Sel;
1517 }
1518 SafeUDivMode = PrevSafeMode;
1519 return LHS;
1520}
1521
1522Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1523 return expandMinMaxExpr(S, IntrinID: Intrinsic::smax, Name: "smax");
1524}
1525
1526Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1527 return expandMinMaxExpr(S, IntrinID: Intrinsic::umax, Name: "umax");
1528}
1529
1530Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1531 return expandMinMaxExpr(S, IntrinID: Intrinsic::smin, Name: "smin");
1532}
1533
1534Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1535 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin");
1536}
1537
1538Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1539 return expandMinMaxExpr(S, IntrinID: Intrinsic::umin, Name: "umin", /*IsSequential*/true);
1540}
1541
1542Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1543 return Builder.CreateVScale(Ty: S->getType());
1544}
1545
1546Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
1547 BasicBlock::iterator IP) {
1548 setInsertPoint(IP);
1549 Value *V = expandCodeFor(SH, Ty);
1550 return V;
1551}
1552
1553Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
1554 // Expand the code for this SCEV.
1555 Value *V = expand(S: SH);
1556
1557 if (Ty && Ty != V->getType()) {
1558 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1559 "non-trivial casts should be done with the SCEVs directly!");
1560 V = InsertNoopCastOfTo(V, Ty);
1561 }
1562 return V;
1563}
1564
1565Value *SCEVExpander::FindValueInExprValueMap(
1566 const SCEV *S, const Instruction *InsertPt,
1567 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1568 // If the expansion is not in CanonicalMode, and the SCEV contains any
1569 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1570 if (!CanonicalMode && SE.containsAddRecurrence(S))
1571 return nullptr;
1572
1573 // If S is a constant or unknown, it may be worse to reuse an existing Value.
1574 if (isa<SCEVConstant>(Val: S) || isa<SCEVUnknown>(Val: S))
1575 return nullptr;
1576
1577 for (Value *V : SE.getSCEVValues(S)) {
1578 Instruction *EntInst = dyn_cast<Instruction>(Val: V);
1579 if (!EntInst)
1580 continue;
1581
1582 // Choose a Value from the set which dominates the InsertPt.
1583 // InsertPt should be inside the Value's parent loop so as not to break
1584 // the LCSSA form.
1585 assert(EntInst->getFunction() == InsertPt->getFunction());
1586 if (S->getType() != V->getType() || !SE.DT.dominates(Def: EntInst, User: InsertPt) ||
1587 !(SE.LI.getLoopFor(BB: EntInst->getParent()) == nullptr ||
1588 SE.LI.getLoopFor(BB: EntInst->getParent())->contains(Inst: InsertPt)))
1589 continue;
1590
1591 // Make sure reusing the instruction is poison-safe.
1592 if (SE.canReuseInstruction(S, I: EntInst, DropPoisonGeneratingInsts))
1593 return V;
1594 DropPoisonGeneratingInsts.clear();
1595 }
1596 return nullptr;
1597}
1598
1599// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1600// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1601// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1602// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1603// the expansion will try to reuse Value from ExprValueMap, and only when it
1604// fails, expand the SCEV literally.
1605Value *SCEVExpander::expand(const SCEV *S) {
1606 // Compute an insertion point for this SCEV object. Hoist the instructions
1607 // as far out in the loop nest as possible.
1608 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1609
1610 // We can move insertion point only if there is no div or rem operations
1611 // otherwise we are risky to move it over the check for zero denominator.
1612 auto SafeToHoist = [](const SCEV *S) {
1613 return !SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
1614 if (const auto *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
1615 if (const auto *SC = dyn_cast<SCEVConstant>(Val: D->getRHS()))
1616 // Division by non-zero constants can be hoisted.
1617 return SC->getValue()->isZero();
1618 // All other divisions should not be moved as they may be
1619 // divisions by zero and should be kept within the
1620 // conditions of the surrounding loops that guard their
1621 // execution (see PR35406).
1622 return true;
1623 }
1624 return false;
1625 });
1626 };
1627 if (SafeToHoist(S)) {
1628 for (Loop *L = SE.LI.getLoopFor(BB: Builder.GetInsertBlock());;
1629 L = L->getParentLoop()) {
1630 if (SE.isLoopInvariant(S, L)) {
1631 if (!L) break;
1632 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1633 InsertPt = Preheader->getTerminator()->getIterator();
1634 } else {
1635 // LSR sets the insertion point for AddRec start/step values to the
1636 // block start to simplify value reuse, even though it's an invalid
1637 // position. SCEVExpander must correct for this in all cases.
1638 InsertPt = L->getHeader()->getFirstInsertionPt();
1639 }
1640 } else {
1641 // If the SCEV is computable at this level, insert it into the header
1642 // after the PHIs (and after any other instructions that we've inserted
1643 // there) so that it is guaranteed to dominate any user inside the loop.
1644 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(Ptr: L))
1645 InsertPt = L->getHeader()->getFirstInsertionPt();
1646
1647 while (InsertPt != Builder.GetInsertPoint() &&
1648 (isInsertedInstruction(I: &*InsertPt))) {
1649 InsertPt = std::next(x: InsertPt);
1650 }
1651 break;
1652 }
1653 }
1654 }
1655
1656 // Check to see if we already expanded this here.
1657 auto I = InsertedExpressions.find(Val: std::make_pair(x&: S, y: &*InsertPt));
1658 if (I != InsertedExpressions.end())
1659 return I->second;
1660
1661 SCEVInsertPointGuard Guard(Builder, this);
1662 Builder.SetInsertPoint(TheBB: InsertPt->getParent(), IP: InsertPt);
1663
1664 // Expand the expression into instructions.
1665 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1666 Value *V = FindValueInExprValueMap(S, InsertPt: &*InsertPt, DropPoisonGeneratingInsts);
1667 if (!V) {
1668 V = visit(S);
1669 V = fixupLCSSAFormFor(V);
1670 } else {
1671 for (Instruction *I : DropPoisonGeneratingInsts) {
1672 rememberFlags(I);
1673 I->dropPoisonGeneratingAnnotations();
1674 // See if we can re-infer from first principles any of the flags we just
1675 // dropped.
1676 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: I))
1677 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1678 auto *BO = cast<BinaryOperator>(Val: I);
1679 BO->setHasNoUnsignedWrap(
1680 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNUW) == SCEV::FlagNUW);
1681 BO->setHasNoSignedWrap(
1682 ScalarEvolution::maskFlags(Flags: *Flags, Mask: SCEV::FlagNSW) == SCEV::FlagNSW);
1683 }
1684 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(Val: I)) {
1685 auto *Src = NNI->getOperand(i_nocapture: 0);
1686 if (isImpliedByDomCondition(Pred: ICmpInst::ICMP_SGE, LHS: Src,
1687 RHS: Constant::getNullValue(Ty: Src->getType()), ContextI: I,
1688 DL).value_or(u: false))
1689 NNI->setNonNeg(true);
1690 }
1691 }
1692 }
1693 // Remember the expanded value for this SCEV at this location.
1694 //
1695 // This is independent of PostIncLoops. The mapped value simply materializes
1696 // the expression at this insertion point. If the mapped value happened to be
1697 // a postinc expansion, it could be reused by a non-postinc user, but only if
1698 // its insertion point was already at the head of the loop.
1699 InsertedExpressions[std::make_pair(x&: S, y: &*InsertPt)] = V;
1700 return V;
1701}
1702
1703void SCEVExpander::rememberInstruction(Value *I) {
1704 auto DoInsert = [this](Value *V) {
1705 if (!PostIncLoops.empty())
1706 InsertedPostIncValues.insert(V);
1707 else
1708 InsertedValues.insert(V);
1709 };
1710 DoInsert(I);
1711}
1712
1713void SCEVExpander::rememberFlags(Instruction *I) {
1714 // If we already have flags for the instruction, keep the existing ones.
1715 OrigFlags.try_emplace(Key: I, Args: PoisonFlags(I));
1716}
1717
1718void SCEVExpander::replaceCongruentIVInc(
1719 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1720 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1721 BasicBlock *LatchBlock = L->getLoopLatch();
1722 if (!LatchBlock)
1723 return;
1724
1725 Instruction *OrigInc =
1726 dyn_cast<Instruction>(Val: OrigPhi->getIncomingValueForBlock(BB: LatchBlock));
1727 Instruction *IsomorphicInc =
1728 dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: LatchBlock));
1729 if (!OrigInc || !IsomorphicInc)
1730 return;
1731
1732 // If this phi has the same width but is more canonical, replace the
1733 // original with it. As part of the "more canonical" determination,
1734 // respect a prior decision to use an IV chain.
1735 if (OrigPhi->getType() == Phi->getType()) {
1736 bool Chained = ChainedPhis.contains(V: Phi);
1737 if (!(Chained || isExpandedAddRecExprPHI(PN: OrigPhi, IncV: OrigInc, L)) &&
1738 (Chained || isExpandedAddRecExprPHI(PN: Phi, IncV: IsomorphicInc, L))) {
1739 std::swap(a&: OrigPhi, b&: Phi);
1740 std::swap(a&: OrigInc, b&: IsomorphicInc);
1741 }
1742 }
1743
1744 // Replacing the congruent phi is sufficient because acyclic
1745 // redundancy elimination, CSE/GVN, should handle the
1746 // rest. However, once SCEV proves that a phi is congruent,
1747 // it's often the head of an IV user cycle that is isomorphic
1748 // with the original phi. It's worth eagerly cleaning up the
1749 // common case of a single IV increment so that DeleteDeadPHIs
1750 // can remove cycles that had postinc uses.
1751 // Because we may potentially introduce a new use of OrigIV that didn't
1752 // exist before at this point, its poison flags need readjustment.
1753 const SCEV *TruncExpr =
1754 SE.getTruncateOrNoop(V: SE.getSCEV(V: OrigInc), Ty: IsomorphicInc->getType());
1755 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(V: IsomorphicInc) ||
1756 !SE.LI.replacementPreservesLCSSAForm(From: IsomorphicInc, To: OrigInc))
1757 return;
1758
1759 bool BothHaveNUW = false;
1760 bool BothHaveNSW = false;
1761 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(Val: OrigInc);
1762 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(Val: IsomorphicInc);
1763 if (OBOIncV && OBOIsomorphic) {
1764 BothHaveNUW =
1765 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1766 BothHaveNSW =
1767 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1768 }
1769
1770 if (!hoistIVInc(IncV: OrigInc, InsertPos: IsomorphicInc,
1771 /*RecomputePoisonFlags*/ true))
1772 return;
1773
1774 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1775 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1776 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1777 // make IsomorphicInc's uses more poisonous.
1778 assert(OrigInc->getType()->getScalarSizeInBits() >=
1779 IsomorphicInc->getType()->getScalarSizeInBits() &&
1780 "Should only replace an increment with a wider one.");
1781 if (BothHaveNUW || BothHaveNSW) {
1782 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1783 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1784 }
1785
1786 SCEV_DEBUG_WITH_TYPE(DebugType,
1787 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1788 << *IsomorphicInc << '\n');
1789 Value *NewInc = OrigInc;
1790 if (OrigInc->getType() != IsomorphicInc->getType()) {
1791 BasicBlock::iterator IP;
1792 if (PHINode *PN = dyn_cast<PHINode>(Val: OrigInc))
1793 IP = PN->getParent()->getFirstInsertionPt();
1794 else
1795 IP = OrigInc->getNextNode()->getIterator();
1796
1797 IRBuilder<> Builder(IP->getParent(), IP);
1798 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1799 NewInc =
1800 Builder.CreateTruncOrBitCast(V: OrigInc, DestTy: IsomorphicInc->getType(), Name: IVName);
1801 }
1802 IsomorphicInc->replaceAllUsesWith(V: NewInc);
1803 DeadInsts.emplace_back(Args&: IsomorphicInc);
1804}
1805
1806/// replaceCongruentIVs - Check for congruent phis in this loop header and
1807/// replace them with their most canonical representative. Return the number of
1808/// phis eliminated.
1809///
1810/// This does not depend on any SCEVExpander state but should be used in
1811/// the same context that SCEVExpander is used.
1812unsigned
1813SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
1814 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1815 const TargetTransformInfo *TTI) {
1816 // Find integer phis in order of increasing width.
1817 SmallVector<PHINode *, 8> Phis(
1818 llvm::make_pointer_range(Range: L->getHeader()->phis()));
1819
1820 if (TTI)
1821 // Use stable_sort to preserve order of equivalent PHIs, so the order
1822 // of the sorted Phis is the same from run to run on the same loop.
1823 llvm::stable_sort(Range&: Phis, C: [](Value *LHS, Value *RHS) {
1824 // Put pointers at the back and make sure pointer < pointer = false.
1825 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1826 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1827 return RHS->getType()->getPrimitiveSizeInBits().getFixedValue() <
1828 LHS->getType()->getPrimitiveSizeInBits().getFixedValue();
1829 });
1830
1831 unsigned NumElim = 0;
1832 DenseMap<const SCEV *, PHINode *> ExprToIVMap;
1833 // Process phis from wide to narrow. Map wide phis to their truncation
1834 // so narrow phis can reuse them.
1835 for (PHINode *Phi : Phis) {
1836 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1837 if (Value *V = simplifyInstruction(I: PN, Q: {DL, &SE.TLI, &SE.DT, &SE.AC}))
1838 return V;
1839 if (!SE.isSCEVable(Ty: PN->getType()))
1840 return nullptr;
1841 auto *Const = dyn_cast<SCEVConstant>(Val: SE.getSCEV(V: PN));
1842 if (!Const)
1843 return nullptr;
1844 return Const->getValue();
1845 };
1846
1847 // Fold constant phis. They may be congruent to other constant phis and
1848 // would confuse the logic below that expects proper IVs.
1849 if (Value *V = SimplifyPHINode(Phi)) {
1850 if (V->getType() != Phi->getType())
1851 continue;
1852 SE.forgetValue(V: Phi);
1853 Phi->replaceAllUsesWith(V);
1854 DeadInsts.emplace_back(Args&: Phi);
1855 ++NumElim;
1856 SCEV_DEBUG_WITH_TYPE(DebugType,
1857 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1858 << '\n');
1859 continue;
1860 }
1861
1862 if (!SE.isSCEVable(Ty: Phi->getType()))
1863 continue;
1864
1865 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(V: Phi)];
1866 if (!OrigPhiRef) {
1867 OrigPhiRef = Phi;
1868 if (Phi->getType()->isIntegerTy() && TTI &&
1869 TTI->isTruncateFree(Ty1: Phi->getType(), Ty2: Phis.back()->getType())) {
1870 // Make sure we only rewrite using simple induction variables;
1871 // otherwise, we can make the trip count of a loop unanalyzable
1872 // to SCEV.
1873 const SCEV *PhiExpr = SE.getSCEV(V: Phi);
1874 if (isa<SCEVAddRecExpr>(Val: PhiExpr)) {
1875 // This phi can be freely truncated to the narrowest phi type. Map the
1876 // truncated expression to it so it will be reused for narrow types.
1877 const SCEV *TruncExpr =
1878 SE.getTruncateExpr(Op: PhiExpr, Ty: Phis.back()->getType());
1879 ExprToIVMap[TruncExpr] = Phi;
1880 }
1881 }
1882 continue;
1883 }
1884
1885 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1886 // sense.
1887 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1888 continue;
1889
1890 replaceCongruentIVInc(Phi, OrigPhi&: OrigPhiRef, L, DT, DeadInsts);
1891 SCEV_DEBUG_WITH_TYPE(DebugType,
1892 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1893 << '\n');
1894 SCEV_DEBUG_WITH_TYPE(
1895 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1896 ++NumElim;
1897 Value *NewIV = OrigPhiRef;
1898 if (OrigPhiRef->getType() != Phi->getType()) {
1899 IRBuilder<> Builder(L->getHeader(),
1900 L->getHeader()->getFirstInsertionPt());
1901 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1902 NewIV = Builder.CreateTruncOrBitCast(V: OrigPhiRef, DestTy: Phi->getType(), Name: IVName);
1903 }
1904 Phi->replaceAllUsesWith(V: NewIV);
1905 DeadInsts.emplace_back(Args&: Phi);
1906 }
1907 return NumElim;
1908}
1909
1910bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
1911 const Instruction *At,
1912 Loop *L) {
1913 using namespace llvm::PatternMatch;
1914
1915 SmallVector<BasicBlock *, 4> ExitingBlocks;
1916 L->getExitingBlocks(ExitingBlocks);
1917
1918 // Look for suitable value in simple conditions at the loop exits.
1919 for (BasicBlock *BB : ExitingBlocks) {
1920 CmpPredicate Pred;
1921 Instruction *LHS, *RHS;
1922
1923 if (!match(V: BB->getTerminator(),
1924 P: m_Br(C: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Instruction(I&: RHS)),
1925 T: m_BasicBlock(), F: m_BasicBlock())))
1926 continue;
1927
1928 if (SE.getSCEV(V: LHS) == S && SE.DT.dominates(Def: LHS, User: At))
1929 return true;
1930
1931 if (SE.getSCEV(V: RHS) == S && SE.DT.dominates(Def: RHS, User: At))
1932 return true;
1933 }
1934
1935 // Use expand's logic which is used for reusing a previous Value in
1936 // ExprValueMap. Note that we don't currently model the cost of
1937 // needing to drop poison generating flags on the instruction if we
1938 // want to reuse it. We effectively assume that has zero cost.
1939 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1940 return FindValueInExprValueMap(S, InsertPt: At, DropPoisonGeneratingInsts) != nullptr;
1941}
1942
1943template<typename T> static InstructionCost costAndCollectOperands(
1944 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
1945 TargetTransformInfo::TargetCostKind CostKind,
1946 SmallVectorImpl<SCEVOperand> &Worklist) {
1947
1948 const T *S = cast<T>(WorkItem.S);
1949 InstructionCost Cost = 0;
1950 // Object to help map SCEV operands to expanded IR instructions.
1951 struct OperationIndices {
1952 OperationIndices(unsigned Opc, size_t min, size_t max) :
1953 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1954 unsigned Opcode;
1955 size_t MinIdx;
1956 size_t MaxIdx;
1957 };
1958
1959 // Collect the operations of all the instructions that will be needed to
1960 // expand the SCEVExpr. This is so that when we come to cost the operands,
1961 // we know what the generated user(s) will be.
1962 SmallVector<OperationIndices, 2> Operations;
1963
1964 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1965 Operations.emplace_back(Opcode, 0, 0);
1966 return TTI.getCastInstrCost(Opcode, Dst: S->getType(),
1967 Src: S->getOperand(0)->getType(),
1968 CCH: TTI::CastContextHint::None, CostKind);
1969 };
1970
1971 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1972 unsigned MinIdx = 0,
1973 unsigned MaxIdx = 1) -> InstructionCost {
1974 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1975 return NumRequired *
1976 TTI.getArithmeticInstrCost(Opcode, Ty: S->getType(), CostKind);
1977 };
1978
1979 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1980 unsigned MaxIdx) -> InstructionCost {
1981 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1982 Type *OpType = S->getType();
1983 return NumRequired * TTI.getCmpSelInstrCost(
1984 Opcode, ValTy: OpType, CondTy: CmpInst::makeCmpResultType(opnd_type: OpType),
1985 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
1986 };
1987
1988 switch (S->getSCEVType()) {
1989 case scCouldNotCompute:
1990 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1991 case scUnknown:
1992 case scConstant:
1993 case scVScale:
1994 return 0;
1995 case scPtrToAddr:
1996 Cost = CastCost(Instruction::PtrToAddr);
1997 break;
1998 case scPtrToInt:
1999 Cost = CastCost(Instruction::PtrToInt);
2000 break;
2001 case scTruncate:
2002 Cost = CastCost(Instruction::Trunc);
2003 break;
2004 case scZeroExtend:
2005 Cost = CastCost(Instruction::ZExt);
2006 break;
2007 case scSignExtend:
2008 Cost = CastCost(Instruction::SExt);
2009 break;
2010 case scUDivExpr: {
2011 unsigned Opcode = Instruction::UDiv;
2012 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
2013 if (SC->getAPInt().isPowerOf2())
2014 Opcode = Instruction::LShr;
2015 Cost = ArithCost(Opcode, 1);
2016 break;
2017 }
2018 case scAddExpr:
2019 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
2020 break;
2021 case scMulExpr:
2022 // TODO: this is a very pessimistic cost modelling for Mul,
2023 // because of Bin Pow algorithm actually used by the expander,
2024 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
2025 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
2026 break;
2027 case scSMaxExpr:
2028 case scUMaxExpr:
2029 case scSMinExpr:
2030 case scUMinExpr:
2031 case scSequentialUMinExpr: {
2032 // FIXME: should this ask the cost for Intrinsic's?
2033 // The reduction tree.
2034 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
2035 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
2036 switch (S->getSCEVType()) {
2037 case scSequentialUMinExpr: {
2038 // The safety net against poison.
2039 // FIXME: this is broken.
2040 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
2041 Cost += ArithCost(Instruction::Or,
2042 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
2043 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
2044 break;
2045 }
2046 default:
2047 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
2048 "Unhandled SCEV expression type?");
2049 break;
2050 }
2051 break;
2052 }
2053 case scAddRecExpr: {
2054 // Addrec expands to a phi and add per recurrence.
2055 unsigned NumRecurrences = S->getNumOperands() - 1;
2056 Cost += TTI.getCFInstrCost(Opcode: Instruction::PHI, CostKind) * NumRecurrences;
2057 Cost +=
2058 TTI.getArithmeticInstrCost(Opcode: Instruction::Add, Ty: S->getType(), CostKind) *
2059 NumRecurrences;
2060 // AR start is used in phi.
2061 Worklist.emplace_back(Instruction::PHI, 0, S->getOperand(0));
2062 // Other operands are used in add.
2063 for (const SCEV *Op : S->operands().drop_front())
2064 Worklist.emplace_back(Args: Instruction::Add, Args: 1, Args&: Op);
2065 break;
2066 }
2067 }
2068
2069 for (auto &CostOp : Operations) {
2070 for (auto SCEVOp : enumerate(S->operands())) {
2071 // Clamp the index to account for multiple IR operations being chained.
2072 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
2073 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
2074 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
2075 }
2076 }
2077 return Cost;
2078}
2079
2080bool SCEVExpander::isHighCostExpansionHelper(
2081 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
2082 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
2083 SmallPtrSetImpl<const SCEV *> &Processed,
2084 SmallVectorImpl<SCEVOperand> &Worklist) {
2085 if (Cost > Budget)
2086 return true; // Already run out of budget, give up.
2087
2088 const SCEV *S = WorkItem.S;
2089 // Was the cost of expansion of this expression already accounted for?
2090 if (!isa<SCEVConstant>(Val: S) && !Processed.insert(Ptr: S).second)
2091 return false; // We have already accounted for this expression.
2092
2093 // If we can find an existing value for this scev available at the point "At"
2094 // then consider the expression cheap.
2095 if (hasRelatedExistingExpansion(S, At: &At, L))
2096 return false; // Consider the expression to be free.
2097
2098 TargetTransformInfo::TargetCostKind CostKind =
2099 L->getHeader()->getParent()->hasMinSize()
2100 ? TargetTransformInfo::TCK_CodeSize
2101 : TargetTransformInfo::TCK_RecipThroughput;
2102
2103 switch (S->getSCEVType()) {
2104 case scCouldNotCompute:
2105 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2106 case scUnknown:
2107 case scVScale:
2108 // Assume to be zero-cost.
2109 return false;
2110 case scConstant: {
2111 // Only evalulate the costs of constants when optimizing for size.
2112 if (CostKind != TargetTransformInfo::TCK_CodeSize)
2113 return false;
2114 const APInt &Imm = cast<SCEVConstant>(Val: S)->getAPInt();
2115 Type *Ty = S->getType();
2116 Cost += TTI.getIntImmCostInst(
2117 Opc: WorkItem.ParentOpcode, Idx: WorkItem.OperandIdx, Imm, Ty, CostKind);
2118 return Cost > Budget;
2119 }
2120 case scTruncate:
2121 case scPtrToAddr:
2122 case scPtrToInt:
2123 case scZeroExtend:
2124 case scSignExtend: {
2125 Cost +=
2126 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2127 return false; // Will answer upon next entry into this function.
2128 }
2129 case scUDivExpr: {
2130 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2131 // HowManyLessThans produced to compute a precise expression, rather than a
2132 // UDiv from the user's code. If we can't find a UDiv in the code with some
2133 // simple searching, we need to account for it's cost.
2134
2135 // At the beginning of this function we already tried to find existing
2136 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2137 // pattern involving division. This is just a simple search heuristic.
2138 if (hasRelatedExistingExpansion(
2139 S: SE.getAddExpr(LHS: S, RHS: SE.getConstant(Ty: S->getType(), V: 1)), At: &At, L))
2140 return false; // Consider it to be free.
2141
2142 Cost +=
2143 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2144 return false; // Will answer upon next entry into this function.
2145 }
2146 case scAddExpr:
2147 case scMulExpr:
2148 case scUMaxExpr:
2149 case scSMaxExpr:
2150 case scUMinExpr:
2151 case scSMinExpr:
2152 case scSequentialUMinExpr: {
2153 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2154 "Nary expr should have more than 1 operand.");
2155 // The simple nary expr will require one less op (or pair of ops)
2156 // than the number of it's terms.
2157 Cost +=
2158 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2159 return Cost > Budget;
2160 }
2161 case scAddRecExpr: {
2162 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2163 "Polynomial should be at least linear");
2164 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2165 WorkItem, TTI, CostKind, Worklist);
2166 return Cost > Budget;
2167 }
2168 }
2169 llvm_unreachable("Unknown SCEV kind!");
2170}
2171
2172Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
2173 Instruction *IP) {
2174 assert(IP);
2175 switch (Pred->getKind()) {
2176 case SCEVPredicate::P_Union:
2177 return expandUnionPredicate(Pred: cast<SCEVUnionPredicate>(Val: Pred), Loc: IP);
2178 case SCEVPredicate::P_Compare:
2179 return expandComparePredicate(Pred: cast<SCEVComparePredicate>(Val: Pred), Loc: IP);
2180 case SCEVPredicate::P_Wrap: {
2181 auto *AddRecPred = cast<SCEVWrapPredicate>(Val: Pred);
2182 return expandWrapPredicate(P: AddRecPred, Loc: IP);
2183 }
2184 }
2185 llvm_unreachable("Unknown SCEV predicate type");
2186}
2187
2188Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
2189 Instruction *IP) {
2190 Value *Expr0 = expand(S: Pred->getLHS(), I: IP);
2191 Value *Expr1 = expand(S: Pred->getRHS(), I: IP);
2192
2193 Builder.SetInsertPoint(IP);
2194 auto InvPred = ICmpInst::getInversePredicate(pred: Pred->getPredicate());
2195 auto *I = Builder.CreateICmp(P: InvPred, LHS: Expr0, RHS: Expr1, Name: "ident.check");
2196 return I;
2197}
2198
2199Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
2200 Instruction *Loc, bool Signed) {
2201 assert(AR->isAffine() && "Cannot generate RT check for "
2202 "non-affine expression");
2203
2204 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2205 SmallVector<const SCEVPredicate *, 4> Pred;
2206 const SCEV *ExitCount =
2207 SE.getPredicatedSymbolicMaxBackedgeTakenCount(L: AR->getLoop(), Predicates&: Pred);
2208
2209 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2210
2211 const SCEV *Step = AR->getStepRecurrence(SE);
2212 const SCEV *Start = AR->getStart();
2213
2214 Type *ARTy = AR->getType();
2215 unsigned SrcBits = SE.getTypeSizeInBits(Ty: ExitCount->getType());
2216 unsigned DstBits = SE.getTypeSizeInBits(Ty: ARTy);
2217
2218 // The expression {Start,+,Step} has nusw/nssw if
2219 // Step < 0, Start - |Step| * Backedge <= Start
2220 // Step >= 0, Start + |Step| * Backedge > Start
2221 // and |Step| * Backedge doesn't unsigned overflow.
2222
2223 Builder.SetInsertPoint(Loc);
2224 Value *TripCountVal = expand(S: ExitCount, I: Loc);
2225
2226 IntegerType *Ty =
2227 IntegerType::get(C&: Loc->getContext(), NumBits: SE.getTypeSizeInBits(Ty: ARTy));
2228
2229 Value *StepValue = expand(S: Step, I: Loc);
2230 Value *NegStepValue = expand(S: SE.getNegativeSCEV(V: Step), I: Loc);
2231 Value *StartValue = expand(S: Start, I: Loc);
2232
2233 ConstantInt *Zero =
2234 ConstantInt::get(Context&: Loc->getContext(), V: APInt::getZero(numBits: DstBits));
2235
2236 Builder.SetInsertPoint(Loc);
2237 // Compute |Step|
2238 Value *StepCompare = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: StepValue, RHS: Zero);
2239 Value *AbsStep = Builder.CreateSelect(C: StepCompare, True: NegStepValue, False: StepValue);
2240
2241 // Compute |Step| * Backedge
2242 // Compute:
2243 // 1. Start + |Step| * Backedge < Start
2244 // 2. Start - |Step| * Backedge > Start
2245 //
2246 // And select either 1. or 2. depending on whether step is positive or
2247 // negative. If Step is known to be positive or negative, only create
2248 // either 1. or 2.
2249 auto ComputeEndCheck = [&]() -> Value * {
2250 // Get the backedge taken count and truncate or extended to the AR type.
2251 Value *TruncTripCount = Builder.CreateZExtOrTrunc(V: TripCountVal, DestTy: Ty);
2252
2253 CallInst *Mul = Builder.CreateIntrinsic(ID: Intrinsic::umul_with_overflow, Types: Ty,
2254 Args: {AbsStep, TruncTripCount},
2255 /*FMFSource=*/nullptr, Name: "mul");
2256 Value *MulV = Builder.CreateExtractValue(Agg: Mul, Idxs: 0, Name: "mul.result");
2257 Value *OfMul = Builder.CreateExtractValue(Agg: Mul, Idxs: 1, Name: "mul.overflow");
2258
2259 Value *Add = nullptr, *Sub = nullptr;
2260 bool NeedPosCheck = !SE.isKnownNegative(S: Step);
2261 bool NeedNegCheck = !SE.isKnownPositive(S: Step);
2262
2263 if (isa<PointerType>(Val: ARTy)) {
2264 Value *NegMulV = Builder.CreateNeg(V: MulV);
2265 if (NeedPosCheck)
2266 Add = Builder.CreatePtrAdd(Ptr: StartValue, Offset: MulV);
2267 if (NeedNegCheck)
2268 Sub = Builder.CreatePtrAdd(Ptr: StartValue, Offset: NegMulV);
2269 } else {
2270 if (NeedPosCheck)
2271 Add = Builder.CreateAdd(LHS: StartValue, RHS: MulV);
2272 if (NeedNegCheck)
2273 Sub = Builder.CreateSub(LHS: StartValue, RHS: MulV);
2274 }
2275
2276 Value *EndCompareLT = nullptr;
2277 Value *EndCompareGT = nullptr;
2278 Value *EndCheck = nullptr;
2279 if (NeedPosCheck)
2280 EndCheck = EndCompareLT = Builder.CreateICmp(
2281 P: Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LHS: Add, RHS: StartValue);
2282 if (NeedNegCheck)
2283 EndCheck = EndCompareGT = Builder.CreateICmp(
2284 P: Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, LHS: Sub, RHS: StartValue);
2285 if (NeedPosCheck && NeedNegCheck) {
2286 // Select the answer based on the sign of Step.
2287 EndCheck = Builder.CreateSelect(C: StepCompare, True: EndCompareGT, False: EndCompareLT);
2288 }
2289 return Builder.CreateOr(LHS: EndCheck, RHS: OfMul);
2290 };
2291 Value *EndCheck = ComputeEndCheck();
2292
2293 // If the backedge taken count type is larger than the AR type,
2294 // check that we don't drop any bits by truncating it. If we are
2295 // dropping bits, then we have overflow (unless the step is zero).
2296 if (SrcBits > DstBits) {
2297 auto MaxVal = APInt::getMaxValue(numBits: DstBits).zext(width: SrcBits);
2298 auto *BackedgeCheck =
2299 Builder.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: TripCountVal,
2300 RHS: ConstantInt::get(Context&: Loc->getContext(), V: MaxVal));
2301 BackedgeCheck = Builder.CreateAnd(
2302 LHS: BackedgeCheck, RHS: Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: StepValue, RHS: Zero));
2303
2304 EndCheck = Builder.CreateOr(LHS: EndCheck, RHS: BackedgeCheck);
2305 }
2306
2307 return EndCheck;
2308}
2309
2310Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
2311 Instruction *IP) {
2312 const auto *A = cast<SCEVAddRecExpr>(Val: Pred->getExpr());
2313 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2314
2315 // Add a check for NUSW
2316 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW)
2317 NUSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: false);
2318
2319 // Add a check for NSSW
2320 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW)
2321 NSSWCheck = generateOverflowCheck(AR: A, Loc: IP, Signed: true);
2322
2323 if (NUSWCheck && NSSWCheck)
2324 return Builder.CreateOr(LHS: NUSWCheck, RHS: NSSWCheck);
2325
2326 if (NUSWCheck)
2327 return NUSWCheck;
2328
2329 if (NSSWCheck)
2330 return NSSWCheck;
2331
2332 return ConstantInt::getFalse(Context&: IP->getContext());
2333}
2334
2335Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
2336 Instruction *IP) {
2337 // Loop over all checks in this set.
2338 SmallVector<Value *> Checks;
2339 for (const auto *Pred : Union->getPredicates()) {
2340 Checks.push_back(Elt: expandCodeForPredicate(Pred, IP));
2341 Builder.SetInsertPoint(IP);
2342 }
2343
2344 if (Checks.empty())
2345 return ConstantInt::getFalse(Context&: IP->getContext());
2346 return Builder.CreateOr(Ops: Checks);
2347}
2348
2349Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2350 auto *DefI = dyn_cast<Instruction>(Val: V);
2351 if (!PreserveLCSSA || !DefI)
2352 return V;
2353
2354 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2355 Loop *DefLoop = SE.LI.getLoopFor(BB: DefI->getParent());
2356 Loop *UseLoop = SE.LI.getLoopFor(BB: InsertPt->getParent());
2357 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(L: UseLoop))
2358 return V;
2359
2360 // Create a temporary instruction to at the current insertion point, so we
2361 // can hand it off to the helper to create LCSSA PHIs if required for the
2362 // new use.
2363 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2364 // would accept a insertion point and return an LCSSA phi for that
2365 // insertion point, so there is no need to insert & remove the temporary
2366 // instruction.
2367 Type *ToTy;
2368 if (DefI->getType()->isIntegerTy())
2369 ToTy = PointerType::get(C&: DefI->getContext(), AddressSpace: 0);
2370 else
2371 ToTy = Type::getInt32Ty(C&: DefI->getContext());
2372 Instruction *User =
2373 CastInst::CreateBitOrPointerCast(S: DefI, Ty: ToTy, Name: "tmp.lcssa.user", InsertBefore: InsertPt);
2374 llvm::scope_exit RemoveUserOnExit([User]() { User->eraseFromParent(); });
2375
2376 SmallVector<Instruction *, 1> ToUpdate;
2377 ToUpdate.push_back(Elt: DefI);
2378 SmallVector<PHINode *, 16> PHIsToRemove;
2379 SmallVector<PHINode *, 16> InsertedPHIs;
2380 formLCSSAForInstructions(Worklist&: ToUpdate, DT: SE.DT, LI: SE.LI, SE: &SE, PHIsToRemove: &PHIsToRemove,
2381 InsertedPHIs: &InsertedPHIs);
2382 for (PHINode *PN : InsertedPHIs)
2383 rememberInstruction(I: PN);
2384 for (PHINode *PN : PHIsToRemove) {
2385 if (!PN->use_empty())
2386 continue;
2387 InsertedValues.erase(V: PN);
2388 InsertedPostIncValues.erase(V: PN);
2389 PN->eraseFromParent();
2390 }
2391
2392 return User->getOperand(i: 0);
2393}
2394
2395namespace {
2396// Search for a SCEV subexpression that is not safe to expand. Any expression
2397// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2398// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2399// instruction, but the important thing is that we prove the denominator is
2400// nonzero before expansion.
2401//
2402// IVUsers already checks that IV-derived expressions are safe. So this check is
2403// only needed when the expression includes some subexpression that is not IV
2404// derived.
2405//
2406// Currently, we only allow division by a value provably non-zero here.
2407//
2408// We cannot generally expand recurrences unless the step dominates the loop
2409// header. The expander handles the special case of affine recurrences by
2410// scaling the recurrence outside the loop, but this technique isn't generally
2411// applicable. Expanding a nested recurrence outside a loop requires computing
2412// binomial coefficients. This could be done, but the recurrence has to be in a
2413// perfectly reduced form, which can't be guaranteed.
2414struct SCEVFindUnsafe {
2415 ScalarEvolution &SE;
2416 bool CanonicalMode;
2417 bool IsUnsafe = false;
2418
2419 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2420 : SE(SE), CanonicalMode(CanonicalMode) {}
2421
2422 bool follow(const SCEV *S) {
2423 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
2424 if (!SE.isKnownNonZero(S: D->getRHS())) {
2425 IsUnsafe = true;
2426 return false;
2427 }
2428 }
2429 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2430 // For non-affine addrecs or in non-canonical mode we need a preheader
2431 // to insert into.
2432 if (!AR->getLoop()->getLoopPreheader() &&
2433 (!CanonicalMode || !AR->isAffine())) {
2434 IsUnsafe = true;
2435 return false;
2436 }
2437 }
2438 return true;
2439 }
2440 bool isDone() const { return IsUnsafe; }
2441};
2442} // namespace
2443
2444bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
2445 SCEVFindUnsafe Search(SE, CanonicalMode);
2446 visitAll(Root: S, Visitor&: Search);
2447 return !Search.IsUnsafe;
2448}
2449
2450bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
2451 const Instruction *InsertionPoint) const {
2452 if (!isSafeToExpand(S))
2453 return false;
2454 // We have to prove that the expanded site of S dominates InsertionPoint.
2455 // This is easy when not in the same block, but hard when S is an instruction
2456 // to be expanded somewhere inside the same block as our insertion point.
2457 // What we really need here is something analogous to an OrderedBasicBlock,
2458 // but for the moment, we paper over the problem by handling two common and
2459 // cheap to check cases.
2460 if (SE.properlyDominates(S, BB: InsertionPoint->getParent()))
2461 return true;
2462 if (SE.dominates(S, BB: InsertionPoint->getParent())) {
2463 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2464 return true;
2465 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S))
2466 if (llvm::is_contained(Range: InsertionPoint->operand_values(), Element: U->getValue()))
2467 return true;
2468 }
2469 return false;
2470}
2471
2472void SCEVExpanderCleaner::cleanup() {
2473 // Result is used, nothing to remove.
2474 if (ResultUsed)
2475 return;
2476
2477 // Restore original poison flags.
2478 for (auto [I, Flags] : Expander.OrigFlags)
2479 Flags.apply(I);
2480
2481 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2482#ifndef NDEBUG
2483 SmallPtrSet<Instruction *, 8> InsertedSet(llvm::from_range,
2484 InsertedInstructions);
2485 (void)InsertedSet;
2486#endif
2487 // Remove sets with value handles.
2488 Expander.clear();
2489
2490 // Remove all inserted instructions.
2491 for (Instruction *I : reverse(C&: InsertedInstructions)) {
2492#ifndef NDEBUG
2493 assert(all_of(I->users(),
2494 [&InsertedSet](Value *U) {
2495 return InsertedSet.contains(cast<Instruction>(U));
2496 }) &&
2497 "removed instruction should only be used by instructions inserted "
2498 "during expansion");
2499#endif
2500 assert(!I->getType()->isVoidTy() &&
2501 "inserted instruction should have non-void types");
2502 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
2503 I->eraseFromParent();
2504 }
2505}
2506