1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/iterator_range.h"
34#include "llvm/Analysis/LoopInfo.h"
35#include "llvm/Analysis/LoopPass.h"
36#include "llvm/Analysis/MemorySSA.h"
37#include "llvm/Analysis/MemorySSAUpdater.h"
38#include "llvm/Analysis/ScalarEvolution.h"
39#include "llvm/Analysis/ScalarEvolutionExpressions.h"
40#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
41#include "llvm/Analysis/TargetLibraryInfo.h"
42#include "llvm/Analysis/TargetTransformInfo.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/ConstantRange.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/DerivedTypes.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PatternMatch.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CommandLine.h"
67#include "llvm/Support/Debug.h"
68#include "llvm/Support/MathExtras.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71#include "llvm/Transforms/Utils/BasicBlockUtils.h"
72#include "llvm/Transforms/Utils/Local.h"
73#include "llvm/Transforms/Utils/LoopUtils.h"
74#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75#include "llvm/Transforms/Utils/SimplifyIndVar.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
92static cl::opt<ReplaceExitVal> ReplaceExitValue(
93 "replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95 cl::values(
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
97 clEnumValN(OnlyCheapRepl, "cheap",
98 "only replace exit value when the cost is cheap"),
99 clEnumValN(
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
108static cl::opt<bool> UsePostIncrementRanges(
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(Val: true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
119 cl::desc("Predicate conditions in read only loops"));
120
121static cl::opt<bool> LoopPredicationTraps(
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(Val: true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
136 TargetLibraryInfo *TLI;
137 const TargetTransformInfo *TTI;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
140 SmallVector<WeakTrackingVH, 16> DeadInsts;
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167public:
168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172 WidenIndVars(WidenIndVars) {
173 if (MSSA)
174 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
175 }
176
177 bool run(Loop *L);
178
179 bool runUnswitching() const { return RunUnswitching; }
180};
181
182} // end anonymous namespace
183
184//===----------------------------------------------------------------------===//
185// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186//===----------------------------------------------------------------------===//
187
188/// Convert APF to an integer, if possible.
189static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190 bool isExact = false;
191 // See if we can convert this to an int64_t
192 uint64_t UIntVal;
193 if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: 64, IsSigned: true,
194 RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK ||
195 !isExact)
196 return false;
197 IntVal = UIntVal;
198 return true;
199}
200
201/// Ensure we stay within the bounds of fp values that can be represented as
202/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
203/// double precision respectively (both on negative and positive side).
204static bool isRepresentableAsExactInteger(const APFloat &FPVal,
205 int64_t IntVal) {
206 const auto &FltSema = FPVal.getSemantics();
207 if (!APFloat::isIEEELikeFP(FltSema))
208 return false;
209 return isUIntN(N: APFloat::semanticsPrecision(FltSema), x: AbsoluteValue(X: IntVal));
210}
211
212/// Represents a floating-point induction variable pattern that may be
213/// convertible to integer form.
214struct FloatingPointIV {
215 APFloat InitValue;
216 APFloat IncrValue;
217 APFloat ExitValue;
218 FCmpInst *Compare;
219 BinaryOperator *Add;
220
221 FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare,
222 BinaryOperator *Add)
223 : InitValue(std::move(Init)), IncrValue(std::move(Incr)),
224 ExitValue(std::move(Exit)), Compare(Compare), Add(Add) {}
225};
226
227/// Represents the integer values for a converted IV.
228struct IntegerIV {
229 int64_t InitValue;
230 int64_t IncrValue;
231 int64_t ExitValue;
232 CmpInst::Predicate NewPred;
233};
234
235static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred) {
236 switch (FPPred) {
237 case CmpInst::FCMP_OEQ:
238 case CmpInst::FCMP_UEQ:
239 return CmpInst::ICMP_EQ;
240 case CmpInst::FCMP_ONE:
241 case CmpInst::FCMP_UNE:
242 return CmpInst::ICMP_NE;
243 case CmpInst::FCMP_OGT:
244 case CmpInst::FCMP_UGT:
245 return CmpInst::ICMP_SGT;
246 case CmpInst::FCMP_OGE:
247 case CmpInst::FCMP_UGE:
248 return CmpInst::ICMP_SGE;
249 case CmpInst::FCMP_OLT:
250 case CmpInst::FCMP_ULT:
251 return CmpInst::ICMP_SLT;
252 case CmpInst::FCMP_OLE:
253 case CmpInst::FCMP_ULE:
254 return CmpInst::ICMP_SLE;
255 default:
256 return CmpInst::BAD_ICMP_PREDICATE;
257 }
258}
259
260/// Analyze a PN to determine whether it represents a simple floating-point
261/// induction variable, with constant fp init, increment, and exit values.
262///
263/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
264static std::optional<FloatingPointIV>
265maybeFloatingPointRecurrence(Loop *L, PHINode *PN) {
266 // Identify incoming and backedge for the PN.
267 unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: 0));
268 unsigned BackEdge = IncomingEdge ^ 1;
269
270 // Check incoming value.
271 auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
272 if (!InitValueVal)
273 return std::nullopt;
274
275 // Check IV increment. Reject this PN if increment operation is not
276 // an add or increment value can not be represented by an integer.
277 auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
278 if (!Incr || Incr->getOpcode() != Instruction::FAdd)
279 return std::nullopt;
280
281 // If this is not an add of the PHI with a constantfp, or if the constant fp
282 // is not an integer, bail out.
283 auto *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: 1));
284 if (!IncValueVal || Incr->getOperand(i_nocapture: 0) != PN)
285 return std::nullopt;
286
287 // Check Incr uses. One user is PN and the other user is an exit condition
288 // used by the conditional terminator.
289 // TODO: Should relax this, so as to allow any `fpext` that may occur.
290 if (!Incr->hasNUses(N: 2))
291 return std::nullopt;
292
293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
294 // only used by a branch, we can't transform it.
295 auto It = llvm::find_if(Range: Incr->users(),
296 P: [](const User *U) { return isa<FCmpInst>(Val: U); });
297 if (It == Incr->users().end())
298 return std::nullopt;
299
300 FCmpInst *Compare = cast<FCmpInst>(Val: *It);
301 if (!Compare->hasOneUse())
302 return std::nullopt;
303
304 // We need to verify that the branch actually controls the iteration count
305 // of the loop. If not, the new IV can overflow and no one will notice.
306 // The branch block must be in the loop and one of the successors must be out
307 // of the loop.
308 auto *BI = dyn_cast<BranchInst>(Val: Compare->user_back());
309 if (!BI)
310 return std::nullopt;
311
312 assert(BI->isConditional() && "Can't use fcmp if not conditional");
313 if (!L->contains(BB: BI->getParent()) ||
314 (L->contains(BB: BI->getSuccessor(i: 0)) && L->contains(BB: BI->getSuccessor(i: 1))))
315 return std::nullopt;
316
317 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
318 // transform it.
319 auto *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: 1));
320 if (!ExitValueVal)
321 return std::nullopt;
322
323 return FloatingPointIV(InitValueVal->getValueAPF(),
324 IncValueVal->getValueAPF(),
325 ExitValueVal->getValueAPF(), Compare, Incr);
326}
327
328/// Ensure that the floating-point IV can be converted to a semantics-preserving
329/// signed 32-bit integer IV.
330///
331/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
332static std::optional<IntegerIV>
333tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
334 // Convert floating-point predicate to integer.
335 auto NewPred = getIntegerPredicate(FPPred: FPIV.Compare->getPredicate());
336 if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
337 return std::nullopt;
338
339 // Convert APFloat values to signed integers.
340 int64_t InitValue, IncrValue, ExitValue;
341 if (!ConvertToSInt(APF: FPIV.InitValue, IntVal&: InitValue) ||
342 !ConvertToSInt(APF: FPIV.IncrValue, IntVal&: IncrValue) ||
343 !ConvertToSInt(APF: FPIV.ExitValue, IntVal&: ExitValue))
344 return std::nullopt;
345
346 // Bail out if integers cannot be represented exactly.
347 if (!isRepresentableAsExactInteger(FPVal: FPIV.InitValue, IntVal: InitValue) ||
348 !isRepresentableAsExactInteger(FPVal: FPIV.ExitValue, IntVal: ExitValue))
349 return std::nullopt;
350
351 // We convert the floating point induction variable to a signed i32 value if
352 // we can. This is only safe if the comparison will not overflow in a way that
353 // won't be trapped by the integer equivalent operations. Check for this now.
354 // TODO: We could use i64 if it is native and the range requires it.
355
356 // The start/stride/exit values must all fit in signed i32.
357 if (!isInt<32>(x: InitValue) || !isInt<32>(x: IncrValue) || !isInt<32>(x: ExitValue))
358 return std::nullopt;
359
360 // If not actually striding (add x, 0.0), avoid touching the code.
361 if (IncrValue == 0)
362 return std::nullopt;
363
364 // Positive and negative strides have different safety conditions.
365 if (IncrValue > 0) {
366 // If we have a positive stride, we require the init to be less than the
367 // exit value.
368 if (InitValue >= ExitValue)
369 return std::nullopt;
370
371 uint32_t Range = uint32_t(ExitValue - InitValue);
372 // Check for infinite loop, either:
373 // while (i <= Exit) or until (i > Exit)
374 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
375 if (++Range == 0)
376 return std::nullopt; // Range overflows.
377 }
378
379 unsigned Leftover = Range % uint32_t(IncrValue);
380
381 // If this is an equality comparison, we require that the strided value
382 // exactly land on the exit value, otherwise the IV condition will wrap
383 // around and do things the fp IV wouldn't.
384 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
385 Leftover != 0)
386 return std::nullopt;
387
388 // If the stride would wrap around the i32 before exiting, we can't
389 // transform the IV.
390 if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
391 return std::nullopt;
392 } else {
393 // If we have a negative stride, we require the init to be greater than the
394 // exit value.
395 if (InitValue <= ExitValue)
396 return std::nullopt;
397
398 uint32_t Range = uint32_t(InitValue - ExitValue);
399 // Check for infinite loop, either:
400 // while (i >= Exit) or until (i < Exit)
401 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
402 if (++Range == 0)
403 return std::nullopt; // Range overflows.
404 }
405
406 unsigned Leftover = Range % uint32_t(-IncrValue);
407
408 // If this is an equality comparison, we require that the strided value
409 // exactly land on the exit value, otherwise the IV condition will wrap
410 // around and do things the fp IV wouldn't.
411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412 Leftover != 0)
413 return std::nullopt;
414
415 // If the stride would wrap around the i32 before exiting, we can't
416 // transform the IV.
417 if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
418 return std::nullopt;
419 }
420
421 return IntegerIV{.InitValue: InitValue, .IncrValue: IncrValue, .ExitValue: ExitValue, .NewPred: NewPred};
422}
423
424/// Rewrite the floating-point IV as an integer IV.
425static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
426 const FloatingPointIV &FPIV,
427 const IntegerIV &IIV,
428 const TargetLibraryInfo *TLI,
429 std::unique_ptr<MemorySSAUpdater> &MSSAU) {
430 unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: 0));
431 unsigned BackEdge = IncomingEdge ^ 1;
432
433 IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
434 auto *Incr = cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
435 auto *BI = cast<BranchInst>(Val: FPIV.Compare->user_back());
436
437 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
438 << " Init: " << IIV.InitValue << "\n"
439 << " Incr: " << IIV.IncrValue << "\n"
440 << " Exit: " << IIV.ExitValue << "\n"
441 << " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
442 << "\n"
443 << " Original PN: " << *PN << "\n");
444
445 // Insert new integer induction variable.
446 PHINode *NewPHI =
447 PHINode::Create(Ty: Int32Ty, NumReservedValues: 2, NameStr: PN->getName() + ".int", InsertBefore: PN->getIterator());
448 NewPHI->addIncoming(V: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.InitValue),
449 BB: PN->getIncomingBlock(i: IncomingEdge));
450 NewPHI->setDebugLoc(PN->getDebugLoc());
451
452 Instruction *NewAdd = BinaryOperator::CreateAdd(
453 V1: NewPHI, V2: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.IncrValue),
454 Name: Incr->getName() + ".int", InsertBefore: Incr->getIterator());
455 NewAdd->setDebugLoc(Incr->getDebugLoc());
456 NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
457
458 ICmpInst *NewCompare = new ICmpInst(
459 BI->getIterator(), IIV.NewPred, NewAdd,
460 ConstantInt::getSigned(Ty: Int32Ty, V: IIV.ExitValue), FPIV.Compare->getName());
461 NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
462
463 // In the following deletions, PN may become dead and may be deleted.
464 // Use a WeakTrackingVH to observe whether this happens.
465 WeakTrackingVH WeakPH = PN;
466
467 // Delete the old floating point exit comparison. The branch starts using the
468 // new comparison.
469 NewCompare->takeName(V: FPIV.Compare);
470 FPIV.Compare->replaceAllUsesWith(V: NewCompare);
471 RecursivelyDeleteTriviallyDeadInstructions(V: FPIV.Compare, TLI, MSSAU: MSSAU.get());
472
473 // Delete the old floating point increment.
474 Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
475 RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
476
477 // If the FP induction variable still has uses, this is because something else
478 // in the loop uses its value. In order to canonicalize the induction
479 // variable, we chose to eliminate the IV and rewrite it in terms of an
480 // int->fp cast.
481 //
482 // We give preference to sitofp over uitofp because it is faster on most
483 // platforms.
484 if (WeakPH) {
485 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486 PN->getParent()->getFirstInsertionPt());
487 Conv->setDebugLoc(PN->getDebugLoc());
488 PN->replaceAllUsesWith(V: Conv);
489 RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
490 }
491}
492
493/// If the loop has a floating induction variable, then insert corresponding
494/// integer induction variable if possible. For example, the following:
495/// for(double i = 0; i < 10000; ++i)
496/// bar(i)
497/// is converted into
498/// for(int i = 0; i < 10000; ++i)
499/// bar((double)i);
500bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
501 // See if the PN matches a floating-point IV pattern.
502 auto FPIV = maybeFloatingPointRecurrence(L, PN);
503 if (!FPIV)
504 return false;
505
506 // Can we safely convert the floating-point values to integer ones?
507 auto IIV = tryConvertToIntegerIV(FPIV: *FPIV);
508 if (!IIV)
509 return false;
510
511 // Perform the rewriting.
512 canonicalizeToIntegerIV(L, PN, FPIV: *FPIV, IIV: *IIV, TLI, MSSAU);
513 return true;
514}
515
516bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
517 // First step. Check to see if there are any floating-point recurrences.
518 // If there are, change them into integer recurrences, permitting analysis by
519 // the SCEV routines.
520 BasicBlock *Header = L->getHeader();
521
522 SmallVector<WeakTrackingVH, 8> PHIs(llvm::make_pointer_range(Range: Header->phis()));
523
524 bool Changed = false;
525 for (WeakTrackingVH &PHI : PHIs)
526 if (PHINode *PN = dyn_cast_or_null<PHINode>(Val: &*PHI))
527 Changed |= handleFloatingPointIV(L, PN);
528
529 // If the loop previously had floating-point IV, ScalarEvolution
530 // may not have been able to compute a trip count. Now that we've done some
531 // re-writing, the trip count may be computable.
532 if (Changed)
533 SE->forgetLoop(L);
534 return Changed;
535}
536
537//===---------------------------------------------------------------------===//
538// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
539// they will exit at the first iteration.
540//===---------------------------------------------------------------------===//
541
542/// Check to see if this loop has loop invariant conditions which lead to loop
543/// exits. If so, we know that if the exit path is taken, it is at the first
544/// loop iteration. This lets us predict exit values of PHI nodes that live in
545/// loop header.
546bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
547 // Verify the input to the pass is already in LCSSA form.
548 assert(L->isLCSSAForm(*DT));
549
550 SmallVector<BasicBlock *, 8> ExitBlocks;
551 L->getUniqueExitBlocks(ExitBlocks);
552
553 bool MadeAnyChanges = false;
554 for (auto *ExitBB : ExitBlocks) {
555 // If there are no more PHI nodes in this exit block, then no more
556 // values defined inside the loop are used on this path.
557 for (PHINode &PN : ExitBB->phis()) {
558 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
559 IncomingValIdx != E; ++IncomingValIdx) {
560 auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
561
562 // Can we prove that the exit must run on the first iteration if it
563 // runs at all? (i.e. early exits are fine for our purposes, but
564 // traces which lead to this exit being taken on the 2nd iteration
565 // aren't.) Note that this is about whether the exit branch is
566 // executed, not about whether it is taken.
567 if (!L->getLoopLatch() ||
568 !DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
569 continue;
570
571 // Get condition that leads to the exit path.
572 auto *TermInst = IncomingBB->getTerminator();
573
574 Value *Cond = nullptr;
575 if (auto *BI = dyn_cast<BranchInst>(Val: TermInst)) {
576 // Must be a conditional branch, otherwise the block
577 // should not be in the loop.
578 Cond = BI->getCondition();
579 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
580 Cond = SI->getCondition();
581 else
582 continue;
583
584 if (!L->isLoopInvariant(V: Cond))
585 continue;
586
587 auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
588
589 // Only deal with PHIs in the loop header.
590 if (!ExitVal || ExitVal->getParent() != L->getHeader())
591 continue;
592
593 // If ExitVal is a PHI on the loop header, then we know its
594 // value along this exit because the exit can only be taken
595 // on the first iteration.
596 auto *LoopPreheader = L->getLoopPreheader();
597 assert(LoopPreheader && "Invalid loop");
598 int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
599 if (PreheaderIdx != -1) {
600 assert(ExitVal->getParent() == L->getHeader() &&
601 "ExitVal must be in loop header");
602 MadeAnyChanges = true;
603 PN.setIncomingValue(i: IncomingValIdx,
604 V: ExitVal->getIncomingValue(i: PreheaderIdx));
605 SE->forgetValue(V: &PN);
606 }
607 }
608 }
609 }
610 return MadeAnyChanges;
611}
612
613//===----------------------------------------------------------------------===//
614// IV Widening - Extend the width of an IV to cover its widest uses.
615//===----------------------------------------------------------------------===//
616
617/// Update information about the induction variable that is extended by this
618/// sign or zero extend operation. This is used to determine the final width of
619/// the IV before actually widening it.
620static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
621 ScalarEvolution *SE,
622 const TargetTransformInfo *TTI) {
623 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
624 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
625 return;
626
627 Type *Ty = Cast->getType();
628 uint64_t Width = SE->getTypeSizeInBits(Ty);
629 if (!Cast->getDataLayout().isLegalInteger(Width))
630 return;
631
632 // Check that `Cast` actually extends the induction variable (we rely on this
633 // later). This takes care of cases where `Cast` is extending a truncation of
634 // the narrow induction variable, and thus can end up being narrower than the
635 // "narrow" induction variable.
636 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
637 if (NarrowIVWidth >= Width)
638 return;
639
640 // Cast is either an sext or zext up to this point.
641 // We should not widen an indvar if arithmetics on the wider indvar are more
642 // expensive than those on the narrower indvar. We check only the cost of ADD
643 // because at least an ADD is required to increment the induction variable. We
644 // could compute more comprehensively the cost of all instructions on the
645 // induction variable when necessary.
646 if (TTI &&
647 TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
648 TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
649 Ty: Cast->getOperand(i_nocapture: 0)->getType())) {
650 return;
651 }
652
653 if (!WI.WidestNativeType ||
654 Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
655 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
656 WI.IsSigned = IsSigned;
657 return;
658 }
659
660 // We extend the IV to satisfy the sign of its user(s), or 'signed'
661 // if there are multiple users with both sign- and zero extensions,
662 // in order not to introduce nondeterministic behaviour based on the
663 // unspecified order of a PHI nodes' users-iterator.
664 WI.IsSigned |= IsSigned;
665}
666
667//===----------------------------------------------------------------------===//
668// Live IV Reduction - Minimize IVs live across the loop.
669//===----------------------------------------------------------------------===//
670
671//===----------------------------------------------------------------------===//
672// Simplification of IV users based on SCEV evaluation.
673//===----------------------------------------------------------------------===//
674
675namespace {
676
677class IndVarSimplifyVisitor : public IVVisitor {
678 ScalarEvolution *SE;
679 const TargetTransformInfo *TTI;
680 PHINode *IVPhi;
681
682public:
683 WideIVInfo WI;
684
685 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
686 const TargetTransformInfo *TTI,
687 const DominatorTree *DTree)
688 : SE(SCEV), TTI(TTI), IVPhi(IV) {
689 DT = DTree;
690 WI.NarrowIV = IVPhi;
691 }
692
693 // Implement the interface used by simplifyUsersOfIV.
694 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
695};
696
697} // end anonymous namespace
698
699/// Iteratively perform simplification on a worklist of IV users. Each
700/// successive simplification may push more users which may themselves be
701/// candidates for simplification.
702///
703/// Sign/Zero extend elimination is interleaved with IV simplification.
704bool IndVarSimplify::simplifyAndExtend(Loop *L,
705 SCEVExpander &Rewriter,
706 LoopInfo *LI) {
707 SmallVector<WideIVInfo, 8> WideIVs;
708
709 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
710 M: L->getBlocks()[0]->getModule(), id: Intrinsic::experimental_guard);
711 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
712
713 SmallVector<PHINode *, 8> LoopPhis(
714 llvm::make_pointer_range(Range: L->getHeader()->phis()));
715
716 // Each round of simplification iterates through the SimplifyIVUsers worklist
717 // for all current phis, then determines whether any IVs can be
718 // widened. Widening adds new phis to LoopPhis, inducing another round of
719 // simplification on the wide IVs.
720 bool Changed = false;
721 while (!LoopPhis.empty()) {
722 // Evaluate as many IV expressions as possible before widening any IVs. This
723 // forces SCEV to set no-wrap flags before evaluating sign/zero
724 // extension. The first time SCEV attempts to normalize sign/zero extension,
725 // the result becomes final. So for the most predictable results, we delay
726 // evaluation of sign/zero extend evaluation until needed, and avoid running
727 // other SCEV based analysis prior to simplifyAndExtend.
728 do {
729 PHINode *CurrIV = LoopPhis.pop_back_val();
730
731 // Information about sign/zero extensions of CurrIV.
732 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
733
734 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts,
735 Rewriter, V: &Visitor);
736
737 Changed |= C;
738 RunUnswitching |= U;
739 if (Visitor.WI.WidestNativeType) {
740 WideIVs.push_back(Elt: Visitor.WI);
741 }
742 } while(!LoopPhis.empty());
743
744 // Continue if we disallowed widening.
745 if (!WidenIndVars)
746 continue;
747
748 for (; !WideIVs.empty(); WideIVs.pop_back()) {
749 unsigned ElimExt;
750 unsigned Widened;
751 if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
752 DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
753 HasGuards, UsePostIncrementRanges)) {
754 NumElimExt += ElimExt;
755 NumWidened += Widened;
756 Changed = true;
757 LoopPhis.push_back(Elt: WidePhi);
758 }
759 }
760 }
761 return Changed;
762}
763
764//===----------------------------------------------------------------------===//
765// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
766//===----------------------------------------------------------------------===//
767
768/// Given an Value which is hoped to be part of an add recurance in the given
769/// loop, return the associated Phi node if so. Otherwise, return null. Note
770/// that this is less general than SCEVs AddRec checking.
771static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
772 Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
773 if (!IncI)
774 return nullptr;
775
776 switch (IncI->getOpcode()) {
777 case Instruction::Add:
778 case Instruction::Sub:
779 break;
780 case Instruction::GetElementPtr:
781 // An IV counter must preserve its type.
782 if (IncI->getNumOperands() == 2)
783 break;
784 [[fallthrough]];
785 default:
786 return nullptr;
787 }
788
789 PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 0));
790 if (Phi && Phi->getParent() == L->getHeader()) {
791 if (L->isLoopInvariant(V: IncI->getOperand(i: 1)))
792 return Phi;
793 return nullptr;
794 }
795 if (IncI->getOpcode() == Instruction::GetElementPtr)
796 return nullptr;
797
798 // Allow add/sub to be commuted.
799 Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 1));
800 if (Phi && Phi->getParent() == L->getHeader()) {
801 if (L->isLoopInvariant(V: IncI->getOperand(i: 0)))
802 return Phi;
803 }
804 return nullptr;
805}
806
807/// Whether the current loop exit test is based on this value. Currently this
808/// is limited to a direct use in the loop condition.
809static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
810 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
811 ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
812 // TODO: Allow non-icmp loop test.
813 if (!ICmp)
814 return false;
815
816 // TODO: Allow indirect use.
817 return ICmp->getOperand(i_nocapture: 0) == V || ICmp->getOperand(i_nocapture: 1) == V;
818}
819
820/// linearFunctionTestReplace policy. Return true unless we can show that the
821/// current exit test is already sufficiently canonical.
822static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
823 assert(L->getLoopLatch() && "Must be in simplified form");
824
825 // Avoid converting a constant or loop invariant test back to a runtime
826 // test. This is critical for when SCEV's cached ExitCount is less precise
827 // than the current IR (such as after we've proven a particular exit is
828 // actually dead and thus the BE count never reaches our ExitCount.)
829 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
830 if (L->isLoopInvariant(V: BI->getCondition()))
831 return false;
832
833 // Do LFTR to simplify the exit condition to an ICMP.
834 ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
835 if (!Cond)
836 return true;
837
838 // Do LFTR to simplify the exit ICMP to EQ/NE
839 ICmpInst::Predicate Pred = Cond->getPredicate();
840 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
841 return true;
842
843 // Look for a loop invariant RHS
844 Value *LHS = Cond->getOperand(i_nocapture: 0);
845 Value *RHS = Cond->getOperand(i_nocapture: 1);
846 if (!L->isLoopInvariant(V: RHS)) {
847 if (!L->isLoopInvariant(V: LHS))
848 return true;
849 std::swap(a&: LHS, b&: RHS);
850 }
851 // Look for a simple IV counter LHS
852 PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
853 if (!Phi)
854 Phi = getLoopPhiForCounter(IncV: LHS, L);
855
856 if (!Phi)
857 return true;
858
859 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
860 int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
861 if (Idx < 0)
862 return true;
863
864 // Do LFTR if the exit condition's IV is *not* a simple counter.
865 Value *IncV = Phi->getIncomingValue(i: Idx);
866 return Phi != getLoopPhiForCounter(IncV, L);
867}
868
869/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
870/// down to checking that all operands are constant and listing instructions
871/// that may hide undef.
872static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
873 unsigned Depth) {
874 if (isa<Constant>(Val: V))
875 return !isa<UndefValue>(Val: V);
876
877 if (Depth >= 6)
878 return false;
879
880 // Conservatively handle non-constant non-instructions. For example, Arguments
881 // may be undef.
882 Instruction *I = dyn_cast<Instruction>(Val: V);
883 if (!I)
884 return false;
885
886 // Load and return values may be undef.
887 if(I->mayReadFromMemory() || isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I))
888 return false;
889
890 // Optimistically handle other instructions.
891 for (Value *Op : I->operands()) {
892 if (!Visited.insert(Ptr: Op).second)
893 continue;
894 if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+1))
895 return false;
896 }
897 return true;
898}
899
900/// Return true if the given value is concrete. We must prove that undef can
901/// never reach it.
902///
903/// TODO: If we decide that this is a good approach to checking for undef, we
904/// may factor it into a common location.
905static bool hasConcreteDef(Value *V) {
906 SmallPtrSet<Value*, 8> Visited;
907 Visited.insert(Ptr: V);
908 return hasConcreteDefImpl(V, Visited, Depth: 0);
909}
910
911/// Return true if the given phi is a "counter" in L. A counter is an
912/// add recurance (of integer or pointer type) with an arbitrary start, and a
913/// step of 1. Note that L must have exactly one latch.
914static bool isLoopCounter(PHINode* Phi, Loop *L,
915 ScalarEvolution *SE) {
916 assert(Phi->getParent() == L->getHeader());
917 assert(L->getLoopLatch());
918
919 if (!SE->isSCEVable(Ty: Phi->getType()))
920 return false;
921
922 const SCEV *S = SE->getSCEV(V: Phi);
923 if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L))))
924 return false;
925
926 int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
927 Value *IncV = Phi->getIncomingValue(i: LatchIdx);
928 return (getLoopPhiForCounter(IncV, L) == Phi &&
929 isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
930}
931
932/// Search the loop header for a loop counter (anadd rec w/step of one)
933/// suitable for use by LFTR. If multiple counters are available, select the
934/// "best" one based profitable heuristics.
935///
936/// BECount may be an i8* pointer type. The pointer difference is already
937/// valid count without scaling the address stride, so it remains a pointer
938/// expression as far as SCEV is concerned.
939static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
940 const SCEV *BECount,
941 ScalarEvolution *SE, DominatorTree *DT) {
942 uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
943
944 Value *Cond = cast<BranchInst>(Val: ExitingBB->getTerminator())->getCondition();
945
946 // Loop over all of the PHI nodes, looking for a simple counter.
947 PHINode *BestPhi = nullptr;
948 const SCEV *BestInit = nullptr;
949 BasicBlock *LatchBlock = L->getLoopLatch();
950 assert(LatchBlock && "Must be in simplified form");
951 const DataLayout &DL = L->getHeader()->getDataLayout();
952
953 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
954 PHINode *Phi = cast<PHINode>(Val&: I);
955 if (!isLoopCounter(Phi, L, SE))
956 continue;
957
958 const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
959
960 // AR may be a pointer type, while BECount is an integer type.
961 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
962 // AR may not be a narrower type, or we may never exit.
963 uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
964 if (PhiWidth < BCWidth || !DL.isLegalInteger(Width: PhiWidth))
965 continue;
966
967 // Avoid reusing a potentially undef value to compute other values that may
968 // have originally had a concrete definition.
969 if (!hasConcreteDef(V: Phi)) {
970 // We explicitly allow unknown phis as long as they are already used by
971 // the loop exit test. This is legal since performing LFTR could not
972 // increase the number of undef users.
973 Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
974 if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
975 !isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
976 continue;
977 }
978
979 // Avoid introducing undefined behavior due to poison which didn't exist in
980 // the original program. (Annoyingly, the rules for poison and undef
981 // propagation are distinct, so this does NOT cover the undef case above.)
982 // We have to ensure that we don't introduce UB by introducing a use on an
983 // iteration where said IV produces poison. Our strategy here differs for
984 // pointers and integer IVs. For integers, we strip and reinfer as needed,
985 // see code in linearFunctionTestReplace. For pointers, we restrict
986 // transforms as there is no good way to reinfer inbounds once lost.
987 if (!Phi->getType()->isIntegerTy() &&
988 !mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
989 continue;
990
991 const SCEV *Init = AR->getStart();
992
993 if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
994 // Don't force a live loop counter if another IV can be used.
995 if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
996 continue;
997
998 // Prefer to count-from-zero. This is a more "canonical" counter form. It
999 // also prefers integer to pointer IVs.
1000 if (BestInit->isZero() != Init->isZero()) {
1001 if (BestInit->isZero())
1002 continue;
1003 }
1004 // If two IVs both count from zero or both count from nonzero then the
1005 // narrower is likely a dead phi that has been widened. Use the wider phi
1006 // to allow the other to be eliminated.
1007 else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
1008 continue;
1009 }
1010 BestPhi = Phi;
1011 BestInit = Init;
1012 }
1013 return BestPhi;
1014}
1015
1016/// Insert an IR expression which computes the value held by the IV IndVar
1017/// (which must be an loop counter w/unit stride) after the backedge of loop L
1018/// is taken ExitCount times.
1019static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1020 const SCEV *ExitCount, bool UsePostInc, Loop *L,
1021 SCEVExpander &Rewriter, ScalarEvolution *SE) {
1022 assert(isLoopCounter(IndVar, L, SE));
1023 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
1024 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
1025 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1026
1027 // For integer IVs, truncate the IV before computing the limit unless we
1028 // know apriori that the limit must be a constant when evaluated in the
1029 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
1030 // IV in the loop over a (potentially) expensive expansion of the widened
1031 // exit count add(zext(add)) expression.
1032 if (IndVar->getType()->isIntegerTy() &&
1033 SE->getTypeSizeInBits(Ty: AR->getType()) >
1034 SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
1035 const SCEV *IVInit = AR->getStart();
1036 if (!isa<SCEVConstant>(Val: IVInit) || !isa<SCEVConstant>(Val: ExitCount)) {
1037 const SCEV *TruncExpr = SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType());
1038
1039 // The following bailout is necessary due to the interaction with
1040 // the depth limit in SCEV analysis.
1041 if (!isa<SCEVAddRecExpr>(Val: TruncExpr))
1042 return nullptr;
1043 AR = cast<SCEVAddRecExpr>(Val: TruncExpr);
1044 }
1045 }
1046
1047 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(SE&: *SE) : AR;
1048 const SCEV *IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: *SE);
1049 assert(SE->isLoopInvariant(IVLimit, L) &&
1050 "Computed iteration count is not loop invariant!");
1051 return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
1052 I: ExitingBB->getTerminator());
1053}
1054
1055/// This method rewrites the exit condition of the loop to be a canonical !=
1056/// comparison against the incremented loop induction variable. This pass is
1057/// able to rewrite the exit tests of any loop where the SCEV analysis can
1058/// determine a loop-invariant trip count of the loop, which is actually a much
1059/// broader range than just linear tests.
1060bool IndVarSimplify::
1061linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1062 const SCEV *ExitCount,
1063 PHINode *IndVar, SCEVExpander &Rewriter) {
1064 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1065 assert(isLoopCounter(IndVar, L, SE));
1066 Instruction * const IncVar =
1067 cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
1068
1069 // Initialize CmpIndVar to the preincremented IV.
1070 Value *CmpIndVar = IndVar;
1071 bool UsePostInc = false;
1072
1073 // If the exiting block is the same as the backedge block, we prefer to
1074 // compare against the post-incremented value, otherwise we must compare
1075 // against the preincremented value.
1076 if (ExitingBB == L->getLoopLatch()) {
1077 // For pointer IVs, we chose to not strip inbounds which requires us not
1078 // to add a potentially UB introducing use. We need to either a) show
1079 // the loop test we're modifying is already in post-inc form, or b) show
1080 // that adding a use must not introduce UB.
1081 bool SafeToPostInc =
1082 IndVar->getType()->isIntegerTy() ||
1083 isLoopExitTestBasedOn(V: IncVar, ExitingBB) ||
1084 mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
1085 if (SafeToPostInc) {
1086 UsePostInc = true;
1087 CmpIndVar = IncVar;
1088 }
1089 }
1090
1091 Value *ExitCnt =
1092 genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1093 if (!ExitCnt)
1094 return false;
1095
1096 assert(ExitCnt->getType()->isPointerTy() ==
1097 IndVar->getType()->isPointerTy() &&
1098 "genLoopLimit missed a cast");
1099
1100 // It may be necessary to drop nowrap flags on the incrementing instruction
1101 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1102 // case the increment might have previously been poison on the last iteration
1103 // only) or if LFTR switches to a different IV that was previously dynamically
1104 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1105 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1106 // check), because the pre-inc addrec flags may be adopted from the original
1107 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1108 // TODO: This handling is inaccurate for one case: If we switch to a
1109 // dynamically dead IV that wraps on the first loop iteration only, which is
1110 // not covered by the post-inc addrec. (If the new IV was not dynamically
1111 // dead, it could not be poison on the first iteration in the first place.)
1112 if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
1113 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
1114 if (BO->hasNoUnsignedWrap())
1115 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1116 if (BO->hasNoSignedWrap())
1117 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1118 }
1119
1120 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1121 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1122 ICmpInst::Predicate P;
1123 if (L->contains(BB: BI->getSuccessor(i: 0)))
1124 P = ICmpInst::ICMP_NE;
1125 else
1126 P = ICmpInst::ICMP_EQ;
1127
1128 IRBuilder<> Builder(BI);
1129
1130 // The new loop exit condition should reuse the debug location of the
1131 // original loop exit condition.
1132 if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1133 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1134
1135 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1136 // avoid the expensive expansion of the limit expression in the wider type,
1137 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1138 // since we know (from the exit count bitwidth), that we can't self-wrap in
1139 // the narrower type.
1140 unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1141 unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1142 if (CmpIndVarSize > ExitCntSize) {
1143 assert(!CmpIndVar->getType()->isPointerTy() &&
1144 !ExitCnt->getType()->isPointerTy());
1145
1146 // Before resorting to actually inserting the truncate, use the same
1147 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1148 // the other side of the comparison instead. We still evaluate the limit
1149 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1150 // a truncate within in.
1151 bool Extended = false;
1152 const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1153 const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1154 const SCEV *ZExtTrunc =
1155 SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1156
1157 if (ZExtTrunc == IV) {
1158 Extended = true;
1159 ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1160 Name: "wide.trip.count");
1161 } else {
1162 const SCEV *SExtTrunc =
1163 SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1164 if (SExtTrunc == IV) {
1165 Extended = true;
1166 ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1167 Name: "wide.trip.count");
1168 }
1169 }
1170
1171 if (Extended) {
1172 bool Discard;
1173 L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1174 } else
1175 CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1176 Name: "lftr.wideiv");
1177 }
1178 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1179 << " LHS:" << *CmpIndVar << '\n'
1180 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1181 << "\n"
1182 << " RHS:\t" << *ExitCnt << "\n"
1183 << "ExitCount:\t" << *ExitCount << "\n"
1184 << " was: " << *BI->getCondition() << "\n");
1185
1186 Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1187 Value *OrigCond = BI->getCondition();
1188 // It's tempting to use replaceAllUsesWith here to fully replace the old
1189 // comparison, but that's not immediately safe, since users of the old
1190 // comparison may not be dominated by the new comparison. Instead, just
1191 // update the branch to use the new comparison; in the common case this
1192 // will make old comparison dead.
1193 BI->setCondition(Cond);
1194 DeadInsts.emplace_back(Args&: OrigCond);
1195
1196 ++NumLFTR;
1197 return true;
1198}
1199
1200//===----------------------------------------------------------------------===//
1201// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1202//===----------------------------------------------------------------------===//
1203
1204/// If there's a single exit block, sink any loop-invariant values that
1205/// were defined in the preheader but not used inside the loop into the
1206/// exit block to reduce register pressure in the loop.
1207bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1208 BasicBlock *ExitBlock = L->getExitBlock();
1209 if (!ExitBlock) return false;
1210
1211 BasicBlock *Preheader = L->getLoopPreheader();
1212 if (!Preheader) return false;
1213
1214 bool MadeAnyChanges = false;
1215 for (Instruction &I : llvm::make_early_inc_range(Range: llvm::reverse(C&: *Preheader))) {
1216
1217 // Skip BB Terminator.
1218 if (Preheader->getTerminator() == &I)
1219 continue;
1220
1221 // New instructions were inserted at the end of the preheader.
1222 if (isa<PHINode>(Val: I))
1223 break;
1224
1225 // Don't move instructions which might have side effects, since the side
1226 // effects need to complete before instructions inside the loop. Also don't
1227 // move instructions which might read memory, since the loop may modify
1228 // memory. Note that it's okay if the instruction might have undefined
1229 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1230 // block.
1231 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1232 continue;
1233
1234 // Skip debug or pseudo instructions.
1235 if (I.isDebugOrPseudoInst())
1236 continue;
1237
1238 // Skip eh pad instructions.
1239 if (I.isEHPad())
1240 continue;
1241
1242 // Don't sink alloca: we never want to sink static alloca's out of the
1243 // entry block, and correctly sinking dynamic alloca's requires
1244 // checks for stacksave/stackrestore intrinsics.
1245 // FIXME: Refactor this check somehow?
1246 if (isa<AllocaInst>(Val: &I))
1247 continue;
1248
1249 // Determine if there is a use in or before the loop (direct or
1250 // otherwise).
1251 bool UsedInLoop = false;
1252 for (Use &U : I.uses()) {
1253 Instruction *User = cast<Instruction>(Val: U.getUser());
1254 BasicBlock *UseBB = User->getParent();
1255 if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1256 unsigned i =
1257 PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1258 UseBB = P->getIncomingBlock(i);
1259 }
1260 if (UseBB == Preheader || L->contains(BB: UseBB)) {
1261 UsedInLoop = true;
1262 break;
1263 }
1264 }
1265
1266 // If there is, the def must remain in the preheader.
1267 if (UsedInLoop)
1268 continue;
1269
1270 // Otherwise, sink it to the exit block.
1271 I.moveBefore(InsertPos: ExitBlock->getFirstInsertionPt());
1272 SE->forgetValue(V: &I);
1273 MadeAnyChanges = true;
1274 }
1275
1276 return MadeAnyChanges;
1277}
1278
1279static void replaceExitCond(BranchInst *BI, Value *NewCond,
1280 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1281 auto *OldCond = BI->getCondition();
1282 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1283 << " with " << *NewCond << "\n");
1284 BI->setCondition(NewCond);
1285 if (OldCond->use_empty())
1286 DeadInsts.emplace_back(Args&: OldCond);
1287}
1288
1289static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1290 bool IsTaken) {
1291 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1292 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1293 auto *OldCond = BI->getCondition();
1294 return ConstantInt::get(Ty: OldCond->getType(),
1295 V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1296}
1297
1298static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1299 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1300 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1301 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1302 replaceExitCond(BI, NewCond, DeadInsts);
1303}
1304
1305static void replaceLoopPHINodesWithPreheaderValues(
1306 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1307 ScalarEvolution &SE) {
1308 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1309 auto *LoopPreheader = L->getLoopPreheader();
1310 auto *LoopHeader = L->getHeader();
1311 SmallVector<Instruction *> Worklist;
1312 for (auto &PN : LoopHeader->phis()) {
1313 auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1314 for (User *U : PN.users())
1315 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1316 SE.forgetValue(V: &PN);
1317 PN.replaceAllUsesWith(V: PreheaderIncoming);
1318 DeadInsts.emplace_back(Args: &PN);
1319 }
1320
1321 // Replacing with the preheader value will often allow IV users to simplify
1322 // (especially if the preheader value is a constant).
1323 SmallPtrSet<Instruction *, 16> Visited;
1324 while (!Worklist.empty()) {
1325 auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1326 if (!Visited.insert(Ptr: I).second)
1327 continue;
1328
1329 // Don't simplify instructions outside the loop.
1330 if (!L->contains(Inst: I))
1331 continue;
1332
1333 Value *Res = simplifyInstruction(I, Q: I->getDataLayout());
1334 if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1335 for (User *U : I->users())
1336 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1337 I->replaceAllUsesWith(V: Res);
1338 DeadInsts.emplace_back(Args&: I);
1339 }
1340 }
1341}
1342
1343static Value *
1344createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1345 const ScalarEvolution::LoopInvariantPredicate &LIP,
1346 SCEVExpander &Rewriter) {
1347 ICmpInst::Predicate InvariantPred = LIP.Pred;
1348 BasicBlock *Preheader = L->getLoopPreheader();
1349 assert(Preheader && "Preheader doesn't exist");
1350 Rewriter.setInsertPoint(Preheader->getTerminator());
1351 auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1352 auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1353 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1354 if (ExitIfTrue)
1355 InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1356 IRBuilder<> Builder(Preheader->getTerminator());
1357 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1358 return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1359 Name: BI->getCondition()->getName());
1360}
1361
1362static std::optional<Value *>
1363createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1364 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1365 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1366 CmpPredicate Pred = ICmp->getCmpPredicate();
1367 Value *LHS = ICmp->getOperand(i_nocapture: 0);
1368 Value *RHS = ICmp->getOperand(i_nocapture: 1);
1369
1370 // 'LHS pred RHS' should now mean that we stay in loop.
1371 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1372 if (Inverted)
1373 Pred = ICmpInst::getInverseCmpPredicate(Pred);
1374
1375 const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1376 const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1377 // Can we prove it to be trivially true or false?
1378 if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1379 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1380
1381 auto *ARTy = LHSS->getType();
1382 auto *MaxIterTy = MaxIter->getType();
1383 // If possible, adjust types.
1384 if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1385 MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1386 else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1387 const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1388 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1389 if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1390 MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1391 }
1392
1393 if (SkipLastIter) {
1394 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1395 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1396 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1397 // So we manually construct umin(a - 1, b - 1).
1398 SmallVector<const SCEV *, 4> Elements;
1399 if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1400 for (const SCEV *Op : UMin->operands())
1401 Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op->getType())));
1402 MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1403 } else
1404 MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1405 }
1406
1407 // Check if there is a loop-invariant predicate equivalent to our check.
1408 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1409 L, CtxI: BI, MaxIter);
1410 if (!LIP)
1411 return std::nullopt;
1412
1413 // Can we prove it to be trivially true?
1414 if (SE->isKnownPredicateAt(Pred: LIP->Pred, LHS: LIP->LHS, RHS: LIP->RHS, CtxI: BI))
1415 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1416 else
1417 return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1418}
1419
1420static bool optimizeLoopExitWithUnknownExitCount(
1421 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1422 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1423 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1424 assert(
1425 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1426 "Not a loop exit!");
1427
1428 // For branch that stays in loop by TRUE condition, go through AND. For branch
1429 // that stays in loop by FALSE condition, go through OR. Both gives the
1430 // similar logic: "stay in loop iff all conditions are true(false)".
1431 bool Inverted = L->contains(BB: BI->getSuccessor(i: 1));
1432 SmallVector<ICmpInst *, 4> LeafConditions;
1433 SmallVector<Value *, 4> Worklist;
1434 SmallPtrSet<Value *, 4> Visited;
1435 Value *OldCond = BI->getCondition();
1436 Visited.insert(Ptr: OldCond);
1437 Worklist.push_back(Elt: OldCond);
1438
1439 auto GoThrough = [&](Value *V) {
1440 Value *LHS = nullptr, *RHS = nullptr;
1441 if (Inverted) {
1442 if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1443 return false;
1444 } else {
1445 if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1446 return false;
1447 }
1448 if (Visited.insert(Ptr: LHS).second)
1449 Worklist.push_back(Elt: LHS);
1450 if (Visited.insert(Ptr: RHS).second)
1451 Worklist.push_back(Elt: RHS);
1452 return true;
1453 };
1454
1455 do {
1456 Value *Curr = Worklist.pop_back_val();
1457 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1458 // those with one use, to avoid instruction duplication.
1459 if (Curr->hasOneUse())
1460 if (!GoThrough(Curr))
1461 if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1462 LeafConditions.push_back(Elt: ICmp);
1463 } while (!Worklist.empty());
1464
1465 // If the current basic block has the same exit count as the whole loop, and
1466 // it consists of multiple icmp's, try to collect all icmp's that give exact
1467 // same exit count. For all other icmp's, we could use one less iteration,
1468 // because their value on the last iteration doesn't really matter.
1469 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1470 if (!SkipLastIter && LeafConditions.size() > 1 &&
1471 SE->getExitCount(L, ExitingBlock: ExitingBB,
1472 Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1473 MaxIter)
1474 for (auto *ICmp : LeafConditions) {
1475 auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1476 /*ControlsExit*/ ControlsOnlyExit: false);
1477 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1478 if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1479 continue;
1480 // They could be of different types (specifically this happens after
1481 // IV widening).
1482 auto *WiderType =
1483 SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1484 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1485 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1486 if (WideExitMax == WideMaxIter)
1487 ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1488 }
1489
1490 bool Changed = false;
1491 for (auto *OldCond : LeafConditions) {
1492 // Skip last iteration for this icmp under one of two conditions:
1493 // - We do it for all conditions;
1494 // - There is another ICmp that would fail on last iter, so this one doesn't
1495 // really matter.
1496 bool OptimisticSkipLastIter = SkipLastIter;
1497 if (!OptimisticSkipLastIter) {
1498 if (ICmpsFailingOnLastIter.size() > 1)
1499 OptimisticSkipLastIter = true;
1500 else if (ICmpsFailingOnLastIter.size() == 1)
1501 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1502 }
1503 if (auto Replaced =
1504 createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1505 SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1506 Changed = true;
1507 auto *NewCond = *Replaced;
1508 if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1509 NCI->setName(OldCond->getName() + ".first_iter");
1510 }
1511 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1512 << " with " << *NewCond << "\n");
1513 assert(OldCond->hasOneUse() && "Must be!");
1514 OldCond->replaceAllUsesWith(V: NewCond);
1515 DeadInsts.push_back(Elt: OldCond);
1516 // Make sure we no longer consider this condition as failing on last
1517 // iteration.
1518 ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1519 }
1520 }
1521 return Changed;
1522}
1523
1524bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1525 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1526 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1527 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1528 // it already has flags. The alternative to this would be to extending the
1529 // set of "interesting" IV users to include the icmp, but doing that
1530 // regresses results in practice by querying SCEVs before trip counts which
1531 // rely on them which results in SCEV caching sub-optimal answers. The
1532 // concern about caching sub-optimal results is why we only query SCEVs of
1533 // the loop invariant RHS here.
1534 SmallVector<BasicBlock*, 16> ExitingBlocks;
1535 L->getExitingBlocks(ExitingBlocks);
1536 bool Changed = false;
1537 for (auto *ExitingBB : ExitingBlocks) {
1538 auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1539 if (!BI)
1540 continue;
1541 assert(BI->isConditional() && "exit branch must be conditional");
1542
1543 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1544 if (!ICmp || !ICmp->hasOneUse())
1545 continue;
1546
1547 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1548 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1549 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1550 // poisoning cache with sub-optimal results. For the must-execute case,
1551 // this is a neccessary precondition for correctness.
1552 if (!L->isLoopInvariant(V: RHS)) {
1553 if (!L->isLoopInvariant(V: LHS))
1554 continue;
1555 // Same logic applies for the inverse case
1556 std::swap(a&: LHS, b&: RHS);
1557 }
1558
1559 // Match (icmp signed-cond zext, RHS)
1560 Value *LHSOp = nullptr;
1561 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) || !ICmp->isSigned())
1562 continue;
1563
1564 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1565 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1566 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1567 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1568 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1569 if (FullCR.contains(CR: RHSCR)) {
1570 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1571 // replace the signed condition with the unsigned version.
1572 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1573 Changed = true;
1574 // Note: No SCEV invalidation needed. We've changed the predicate, but
1575 // have not changed exit counts, or the values produced by the compare.
1576 continue;
1577 }
1578 }
1579
1580 // Now that we've canonicalized the condition to match the extend,
1581 // see if we can rotate the extend out of the loop.
1582 for (auto *ExitingBB : ExitingBlocks) {
1583 auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1584 if (!BI)
1585 continue;
1586 assert(BI->isConditional() && "exit branch must be conditional");
1587
1588 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1589 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1590 continue;
1591
1592 bool Swapped = false;
1593 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1594 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1595 if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1596 // Nothing to rotate
1597 continue;
1598 if (L->isLoopInvariant(V: LHS)) {
1599 // Same logic applies for the inverse case until we actually pick
1600 // which operand of the compare to update.
1601 Swapped = true;
1602 std::swap(a&: LHS, b&: RHS);
1603 }
1604 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1605
1606 // Match (icmp unsigned-cond zext, RHS)
1607 // TODO: Extend to handle corresponding sext/signed-cmp case
1608 // TODO: Extend to other invertible functions
1609 Value *LHSOp = nullptr;
1610 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1611 continue;
1612
1613 // In general, we only rotate if we can do so without increasing the number
1614 // of instructions. The exception is when we have an zext(add-rec). The
1615 // reason for allowing this exception is that we know we need to get rid
1616 // of the zext for SCEV to be able to compute a trip count for said loops;
1617 // we consider the new trip count valuable enough to increase instruction
1618 // count by one.
1619 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1620 continue;
1621
1622 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1623 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1624 // when zext is loop varying and RHS is loop invariant. This converts
1625 // loop varying work to loop-invariant work.
1626 auto doRotateTransform = [&]() {
1627 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1628 auto *NewRHS = CastInst::Create(
1629 Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1630 InsertBefore: L->getLoopPreheader()->getTerminator()->getIterator());
1631 // NewRHS is an operation that has been hoisted out of the loop, and
1632 // therefore should have a dropped location.
1633 NewRHS->setDebugLoc(DebugLoc::getDropped());
1634 ICmp->setOperand(i_nocapture: Swapped ? 1 : 0, Val_nocapture: LHSOp);
1635 ICmp->setOperand(i_nocapture: Swapped ? 0 : 1, Val_nocapture: NewRHS);
1636 // Samesign flag cannot be preserved after narrowing the compare.
1637 ICmp->setSameSign(false);
1638 if (LHS->use_empty())
1639 DeadInsts.push_back(Elt: LHS);
1640 };
1641
1642 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1643 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1644 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1645 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1646 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1647 if (FullCR.contains(CR: RHSCR)) {
1648 doRotateTransform();
1649 Changed = true;
1650 // Note, we are leaving SCEV in an unfortunately imprecise case here
1651 // as rotation tends to reveal information about trip counts not
1652 // previously visible.
1653 continue;
1654 }
1655 }
1656
1657 return Changed;
1658}
1659
1660bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1661 SmallVector<BasicBlock*, 16> ExitingBlocks;
1662 L->getExitingBlocks(ExitingBlocks);
1663
1664 // Remove all exits which aren't both rewriteable and execute on every
1665 // iteration.
1666 llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1667 // If our exitting block exits multiple loops, we can only rewrite the
1668 // innermost one. Otherwise, we're changing how many times the innermost
1669 // loop runs before it exits.
1670 if (LI->getLoopFor(BB: ExitingBB) != L)
1671 return true;
1672
1673 // Can't rewrite non-branch yet.
1674 BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1675 if (!BI)
1676 return true;
1677
1678 // Likewise, the loop latch must be dominated by the exiting BB.
1679 if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1680 return true;
1681
1682 if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1683 // If already constant, nothing to do. However, if this is an
1684 // unconditional exit, we can still replace header phis with their
1685 // preheader value.
1686 if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1687 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1688 return true;
1689 }
1690
1691 return false;
1692 });
1693
1694 if (ExitingBlocks.empty())
1695 return false;
1696
1697 // Get a symbolic upper bound on the loop backedge taken count.
1698 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1699 if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1700 return false;
1701
1702 // Visit our exit blocks in order of dominance. We know from the fact that
1703 // all exits must dominate the latch, so there is a total dominance order
1704 // between them.
1705 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1706 // std::sort sorts in ascending order, so we want the inverse of
1707 // the normal dominance relation.
1708 if (A == B) return false;
1709 if (DT->properlyDominates(A, B))
1710 return true;
1711 else {
1712 assert(DT->properlyDominates(B, A) &&
1713 "expected total dominance order!");
1714 return false;
1715 }
1716 });
1717#ifdef ASSERT
1718 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1719 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1720 }
1721#endif
1722
1723 bool Changed = false;
1724 bool SkipLastIter = false;
1725 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1726 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1727 if (SkipLastIter || isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1728 return;
1729 if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1730 CurrMaxExit = MaxExitCount;
1731 else
1732 CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1733 // If the loop has more than 1 iteration, all further checks will be
1734 // executed 1 iteration less.
1735 if (CurrMaxExit == MaxBECount)
1736 SkipLastIter = true;
1737 };
1738 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1739 for (BasicBlock *ExitingBB : ExitingBlocks) {
1740 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1741 const SCEV *MaxExitCount = SE->getExitCount(
1742 L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1743 if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1744 // Okay, we do not know the exit count here. Can we at least prove that it
1745 // will remain the same within iteration space?
1746 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1747 auto OptimizeCond = [&](bool SkipLastIter) {
1748 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1749 MaxIter: MaxBECount, SkipLastIter,
1750 SE, Rewriter, DeadInsts);
1751 };
1752
1753 // TODO: We might have proved that we can skip the last iteration for
1754 // this check. In this case, we only want to check the condition on the
1755 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1756 // corner case:
1757 //
1758 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1759 //
1760 // If we could not prove that len != 0, then we also could not prove that
1761 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1762 // OptimizeCond will likely not prove anything for it, even if it could
1763 // prove the same fact for len.
1764 //
1765 // As a temporary solution, we query both last and pre-last iterations in
1766 // hope that we will be able to prove triviality for at least one of
1767 // them. We can stop querying MaxBECount for this case once SCEV
1768 // understands that (MaxBECount - 1) will not overflow here.
1769 if (OptimizeCond(false))
1770 Changed = true;
1771 else if (SkipLastIter && OptimizeCond(true))
1772 Changed = true;
1773 UpdateSkipLastIter(MaxExitCount);
1774 continue;
1775 }
1776
1777 UpdateSkipLastIter(ExactExitCount);
1778
1779 // If we know we'd exit on the first iteration, rewrite the exit to
1780 // reflect this. This does not imply the loop must exit through this
1781 // exit; there may be an earlier one taken on the first iteration.
1782 // We know that the backedge can't be taken, so we replace all
1783 // the header PHIs with values coming from the preheader.
1784 if (ExactExitCount->isZero()) {
1785 foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1786 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1787 Changed = true;
1788 continue;
1789 }
1790
1791 assert(ExactExitCount->getType()->isIntegerTy() &&
1792 MaxBECount->getType()->isIntegerTy() &&
1793 "Exit counts must be integers");
1794
1795 Type *WiderType =
1796 SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1797 ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1798 MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1799 assert(MaxBECount->getType() == ExactExitCount->getType());
1800
1801 // Can we prove that some other exit must be taken strictly before this
1802 // one?
1803 if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1804 RHS: ExactExitCount)) {
1805 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1806 Changed = true;
1807 continue;
1808 }
1809
1810 // As we run, keep track of which exit counts we've encountered. If we
1811 // find a duplicate, we've found an exit which would have exited on the
1812 // exiting iteration, but (from the visit order) strictly follows another
1813 // which does the same and is thus dead.
1814 if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1815 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1816 Changed = true;
1817 continue;
1818 }
1819
1820 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1821 // here. If we kept track of the min of dominanting exits so far, we could
1822 // discharge exits with EC >= MDEC. This is less powerful than the existing
1823 // transform (since later exits aren't considered), but potentially more
1824 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1825 // or a >u b. Such a case is not currently known.
1826 }
1827 return Changed;
1828}
1829
1830static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1831 return llvm::all_of(Range: BB, P: [](const Instruction &I) {
1832 // TODO: for now this is overly restrictive, to make sure nothing in this
1833 // BB can depend on the loop body.
1834 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1835 // load does not have a side effect, but we could have
1836 // %a = load ptr, ptr %ptr
1837 // %b = load i32, ptr %a
1838 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1839 // dereference by skipping over loop iterations.
1840 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
1841 if (CB->onlyAccessesInaccessibleMemory())
1842 return true;
1843 }
1844 return isa<UnreachableInst>(Val: I);
1845 });
1846}
1847
1848bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1849 SmallVector<BasicBlock*, 16> ExitingBlocks;
1850 L->getExitingBlocks(ExitingBlocks);
1851
1852 // Finally, see if we can rewrite our exit conditions into a loop invariant
1853 // form. If we have a read-only loop, and we can tell that we must exit down
1854 // a path which does not need any of the values computed within the loop, we
1855 // can rewrite the loop to exit on the first iteration. Note that this
1856 // doesn't either a) tell us the loop exits on the first iteration (unless
1857 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1858 // This transformation looks a lot like a restricted form of dead loop
1859 // elimination, but restricted to read-only loops and without neccesssarily
1860 // needing to kill the loop entirely.
1861 if (!LoopPredication)
1862 return false;
1863
1864 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1865 // through *explicit* control flow. We have to eliminate the possibility of
1866 // implicit exits (see below) before we know it's truly exact.
1867 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1868 if (isa<SCEVCouldNotCompute>(Val: ExactBTC) || !Rewriter.isSafeToExpand(S: ExactBTC))
1869 return false;
1870
1871 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1872 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1873
1874 auto BadExit = [&](BasicBlock *ExitingBB) {
1875 // If our exiting block exits multiple loops, we can only rewrite the
1876 // innermost one. Otherwise, we're changing how many times the innermost
1877 // loop runs before it exits.
1878 if (LI->getLoopFor(BB: ExitingBB) != L)
1879 return true;
1880
1881 // Can't rewrite non-branch yet.
1882 BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1883 if (!BI)
1884 return true;
1885
1886 // If already constant, nothing to do.
1887 if (isa<Constant>(Val: BI->getCondition()))
1888 return true;
1889
1890 // If the exit block has phis, we need to be able to compute the values
1891 // within the loop which contains them. This assumes trivially lcssa phis
1892 // have already been removed; TODO: generalize
1893 BasicBlock *ExitBlock =
1894 BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: 0)) ? 1 : 0);
1895 if (!ExitBlock->phis().empty())
1896 return true;
1897
1898 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1899 if (isa<SCEVCouldNotCompute>(Val: ExitCount) ||
1900 !Rewriter.isSafeToExpand(S: ExitCount))
1901 return true;
1902
1903 assert(SE->isLoopInvariant(ExitCount, L) &&
1904 "Exit count must be loop invariant");
1905 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1906 return false;
1907 };
1908
1909 // Make sure all exits dominate the latch. This means there is a linear chain
1910 // of exits. We check this before sorting so we have a total order.
1911 BasicBlock *Latch = L->getLoopLatch();
1912 for (BasicBlock *ExitingBB : ExitingBlocks)
1913 if (!DT->dominates(A: ExitingBB, B: Latch))
1914 return false;
1915
1916 // If we have any exits which can't be predicated themselves, than we can't
1917 // predicate any exit which isn't guaranteed to execute before it. Consider
1918 // two exits (a) and (b) which would both exit on the same iteration. If we
1919 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1920 // we could convert a loop from exiting through (a) to one exiting through
1921 // (b). Note that this problem exists only for exits with the same exit
1922 // count, and we could be more aggressive when exit counts are known inequal.
1923 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1924 // llvm::sort sorts in ascending order, so we want the inverse of
1925 // the normal dominance relation.
1926 if (A == B)
1927 return false;
1928 if (DT->properlyDominates(A, B))
1929 return true;
1930 if (DT->properlyDominates(A: B, B: A))
1931 return false;
1932 llvm_unreachable("Should have total dominance order");
1933 });
1934
1935 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1936 // exits before the backedge).
1937 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1938 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1939 "Not sorted by dominance");
1940
1941 // Given our sorted total order, we know that exit[j] must be evaluated
1942 // after all exit[i] such j > i.
1943 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1944 if (BadExit(ExitingBlocks[i])) {
1945 ExitingBlocks.resize(N: i);
1946 break;
1947 }
1948
1949 if (ExitingBlocks.empty())
1950 return false;
1951
1952 // At this point, ExitingBlocks consists of only those blocks which are
1953 // predicatable. Given that, we know we have at least one exit we can
1954 // predicate if the loop is doesn't have side effects and doesn't have any
1955 // implicit exits (because then our exact BTC isn't actually exact).
1956 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1957 // suggestions on how to improve this? I can obviously bail out for outer
1958 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1959 // is that enough for *all* side effects?
1960 bool HasThreadLocalSideEffects = false;
1961 for (BasicBlock *BB : L->blocks())
1962 for (auto &I : *BB) {
1963 // TODO:isGuaranteedToTransfer
1964 if (I.mayHaveSideEffects()) {
1965 if (!LoopPredicationTraps)
1966 return false;
1967 HasThreadLocalSideEffects = true;
1968 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) {
1969 // Simple stores cannot be observed by other threads.
1970 // If HasThreadLocalSideEffects is set, we check
1971 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1972 // observe them either.
1973 if (!SI->isSimple())
1974 return false;
1975 } else {
1976 return false;
1977 }
1978 }
1979
1980 // Skip if the loop has tokens referenced outside the loop to avoid
1981 // changing convergence behavior.
1982 if (I.getType()->isTokenTy()) {
1983 for (User *U : I.users()) {
1984 Instruction *UserInst = dyn_cast<Instruction>(Val: U);
1985 if (UserInst && !L->contains(Inst: UserInst)) {
1986 return false;
1987 }
1988 }
1989 }
1990 }
1991
1992 bool Changed = false;
1993 // Finally, do the actual predication for all predicatable blocks. A couple
1994 // of notes here:
1995 // 1) We don't bother to constant fold dominated exits with identical exit
1996 // counts; that's simply a form of CSE/equality propagation and we leave
1997 // it for dedicated passes.
1998 // 2) We insert the comparison at the branch. Hoisting introduces additional
1999 // legality constraints and we leave that to dedicated logic. We want to
2000 // predicate even if we can't insert a loop invariant expression as
2001 // peeling or unrolling will likely reduce the cost of the otherwise loop
2002 // varying check.
2003 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2004 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2005 Value *ExactBTCV = nullptr; // Lazily generated if needed.
2006 for (BasicBlock *ExitingBB : ExitingBlocks) {
2007 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2008
2009 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
2010 if (HasThreadLocalSideEffects) {
2011 const BasicBlock *Unreachable = nullptr;
2012 for (const BasicBlock *Succ : BI->successors()) {
2013 if (isa<UnreachableInst>(Val: Succ->getTerminator()))
2014 Unreachable = Succ;
2015 }
2016 // Exit BB which have one branch back into the loop and another one to
2017 // a trap can still be optimized, because local side effects cannot
2018 // be observed in the exit case (the trap). We could be smarter about
2019 // this, but for now lets pattern match common cases that directly trap.
2020 if (Unreachable == nullptr || !crashingBBWithoutEffect(BB: *Unreachable))
2021 return Changed;
2022 }
2023 Value *NewCond;
2024 if (ExitCount == ExactBTC) {
2025 NewCond = L->contains(BB: BI->getSuccessor(i: 0)) ?
2026 B.getFalse() : B.getTrue();
2027 } else {
2028 Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
2029 if (!ExactBTCV)
2030 ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
2031 Value *RHS = ExactBTCV;
2032 if (ECV->getType() != RHS->getType()) {
2033 Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
2034 ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
2035 RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
2036 }
2037 auto Pred = L->contains(BB: BI->getSuccessor(i: 0)) ?
2038 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2039 NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
2040 }
2041 Value *OldCond = BI->getCondition();
2042 BI->setCondition(NewCond);
2043 if (OldCond->use_empty())
2044 DeadInsts.emplace_back(Args&: OldCond);
2045 Changed = true;
2046 RunUnswitching = true;
2047 }
2048
2049 return Changed;
2050}
2051
2052//===----------------------------------------------------------------------===//
2053// IndVarSimplify driver. Manage several subpasses of IV simplification.
2054//===----------------------------------------------------------------------===//
2055
2056bool IndVarSimplify::run(Loop *L) {
2057 // We need (and expect!) the incoming loop to be in LCSSA.
2058 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2059 "LCSSA required to run indvars!");
2060
2061 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2062 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2063 // canonicalization can be a pessimization without LSR to "clean up"
2064 // afterwards.
2065 // - We depend on having a preheader; in particular,
2066 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2067 // and we're in trouble if we can't find the induction variable even when
2068 // we've manually inserted one.
2069 // - LFTR relies on having a single backedge.
2070 if (!L->isLoopSimplifyForm())
2071 return false;
2072
2073 bool Changed = false;
2074 // If there are any floating-point recurrences, attempt to
2075 // transform them to use integer recurrences.
2076 Changed |= rewriteNonIntegerIVs(L);
2077
2078 // Create a rewriter object which we'll use to transform the code with.
2079 SCEVExpander Rewriter(*SE, "indvars");
2080#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2081 Rewriter.setDebugType(DEBUG_TYPE);
2082#endif
2083
2084 // Eliminate redundant IV users.
2085 //
2086 // Simplification works best when run before other consumers of SCEV. We
2087 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2088 // other expressions involving loop IVs have been evaluated. This helps SCEV
2089 // set no-wrap flags before normalizing sign/zero extension.
2090 Rewriter.disableCanonicalMode();
2091 Changed |= simplifyAndExtend(L, Rewriter, LI);
2092
2093 // Check to see if we can compute the final value of any expressions
2094 // that are recurrent in the loop, and substitute the exit values from the
2095 // loop into any instructions outside of the loop that use the final values
2096 // of the current expressions.
2097 if (ReplaceExitValue != NeverRepl) {
2098 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2099 ReplaceExitValue, DeadInsts)) {
2100 NumReplaced += Rewrites;
2101 Changed = true;
2102 }
2103 }
2104
2105 // Eliminate redundant IV cycles.
2106 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2107
2108 // Try to convert exit conditions to unsigned and rotate computation
2109 // out of the loop. Note: Handles invalidation internally if needed.
2110 Changed |= canonicalizeExitCondition(L);
2111
2112 // Try to eliminate loop exits based on analyzeable exit counts
2113 if (optimizeLoopExits(L, Rewriter)) {
2114 Changed = true;
2115 // Given we've changed exit counts, notify SCEV
2116 // Some nested loops may share same folded exit basic block,
2117 // thus we need to notify top most loop.
2118 SE->forgetTopmostLoop(L);
2119 }
2120
2121 // Try to form loop invariant tests for loop exits by changing how many
2122 // iterations of the loop run when that is unobservable.
2123 if (predicateLoopExits(L, Rewriter)) {
2124 Changed = true;
2125 // Given we've changed exit counts, notify SCEV
2126 SE->forgetLoop(L);
2127 }
2128
2129 // If we have a trip count expression, rewrite the loop's exit condition
2130 // using it.
2131 if (!DisableLFTR) {
2132 BasicBlock *PreHeader = L->getLoopPreheader();
2133
2134 SmallVector<BasicBlock*, 16> ExitingBlocks;
2135 L->getExitingBlocks(ExitingBlocks);
2136 for (BasicBlock *ExitingBB : ExitingBlocks) {
2137 // Can't rewrite non-branch yet.
2138 if (!isa<BranchInst>(Val: ExitingBB->getTerminator()))
2139 continue;
2140
2141 // If our exitting block exits multiple loops, we can only rewrite the
2142 // innermost one. Otherwise, we're changing how many times the innermost
2143 // loop runs before it exits.
2144 if (LI->getLoopFor(BB: ExitingBB) != L)
2145 continue;
2146
2147 if (!needsLFTR(L, ExitingBB))
2148 continue;
2149
2150 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2151 if (isa<SCEVCouldNotCompute>(Val: ExitCount))
2152 continue;
2153
2154 // This was handled above, but as we form SCEVs, we can sometimes refine
2155 // existing ones; this allows exit counts to be folded to zero which
2156 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2157 // until stable to handle cases like this better.
2158 if (ExitCount->isZero())
2159 continue;
2160
2161 PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
2162 if (!IndVar)
2163 continue;
2164
2165 // Avoid high cost expansions. Note: This heuristic is questionable in
2166 // that our definition of "high cost" is not exactly principled.
2167 if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
2168 TTI, At: PreHeader->getTerminator()))
2169 continue;
2170
2171 if (!Rewriter.isSafeToExpand(S: ExitCount))
2172 continue;
2173
2174 Changed |= linearFunctionTestReplace(L, ExitingBB,
2175 ExitCount, IndVar,
2176 Rewriter);
2177 }
2178 }
2179 // Clear the rewriter cache, because values that are in the rewriter's cache
2180 // can be deleted in the loop below, causing the AssertingVH in the cache to
2181 // trigger.
2182 Rewriter.clear();
2183
2184 // Now that we're done iterating through lists, clean up any instructions
2185 // which are now dead.
2186 while (!DeadInsts.empty()) {
2187 Value *V = DeadInsts.pop_back_val();
2188
2189 if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2190 Changed |= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2191 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2192 Changed |=
2193 RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2194 }
2195
2196 // The Rewriter may not be used from this point on.
2197
2198 // Loop-invariant instructions in the preheader that aren't used in the
2199 // loop may be sunk below the loop to reduce register pressure.
2200 Changed |= sinkUnusedInvariants(L);
2201
2202 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2203 // trip count and therefore can further simplify exit values in addition to
2204 // rewriteLoopExitValues.
2205 Changed |= rewriteFirstIterationLoopExitValues(L);
2206
2207 // Clean up dead instructions.
2208 Changed |= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2209
2210 // Check a post-condition.
2211 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2212 "Indvars did not preserve LCSSA!");
2213 if (VerifyMemorySSA && MSSAU)
2214 MSSAU->getMemorySSA()->verifyMemorySSA();
2215
2216 return Changed;
2217}
2218
2219PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2220 LoopStandardAnalysisResults &AR,
2221 LPMUpdater &) {
2222 Function *F = L.getHeader()->getParent();
2223 const DataLayout &DL = F->getDataLayout();
2224
2225 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2226 WidenIndVars && AllowIVWidening);
2227 if (!IVS.run(L: &L))
2228 return PreservedAnalyses::all();
2229
2230 auto PA = getLoopPassPreservedAnalyses();
2231 PA.preserveSet<CFGAnalyses>();
2232 if (IVS.runUnswitching()) {
2233 AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(IR&: L, ExtraArgs&: AR);
2234 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2235 }
2236
2237 if (AR.MSSA)
2238 PA.preserve<MemorySSAAnalysis>();
2239 return PA;
2240}
2241