1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/iterator_range.h"
34#include "llvm/Analysis/LoopInfo.h"
35#include "llvm/Analysis/LoopPass.h"
36#include "llvm/Analysis/MemorySSA.h"
37#include "llvm/Analysis/MemorySSAUpdater.h"
38#include "llvm/Analysis/ScalarEvolution.h"
39#include "llvm/Analysis/ScalarEvolutionExpressions.h"
40#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
41#include "llvm/Analysis/TargetLibraryInfo.h"
42#include "llvm/Analysis/TargetTransformInfo.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/ConstantRange.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/DerivedTypes.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PatternMatch.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CommandLine.h"
67#include "llvm/Support/Debug.h"
68#include "llvm/Support/MathExtras.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71#include "llvm/Transforms/Utils/BasicBlockUtils.h"
72#include "llvm/Transforms/Utils/Local.h"
73#include "llvm/Transforms/Utils/LoopUtils.h"
74#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75#include "llvm/Transforms/Utils/SimplifyIndVar.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
92static cl::opt<ReplaceExitVal> ReplaceExitValue(
93 "replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95 cl::values(
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
97 clEnumValN(OnlyCheapRepl, "cheap",
98 "only replace exit value when the cost is cheap"),
99 clEnumValN(
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
108static cl::opt<bool> UsePostIncrementRanges(
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(Val: true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
119 cl::desc("Predicate conditions in read only loops"));
120
121static cl::opt<bool> LoopPredicationTraps(
122 "indvars-predicate-loop-traps", cl::Hidden, cl::init(Val: true),
123 cl::desc("Predicate conditions that trap in loops with only local writes"));
124
125static cl::opt<bool>
126AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
127 cl::desc("Allow widening of indvars to eliminate s/zext"));
128
129namespace {
130
131class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
136 TargetLibraryInfo *TLI;
137 const TargetTransformInfo *TTI;
138 std::unique_ptr<MemorySSAUpdater> MSSAU;
139
140 SmallVector<WeakTrackingVH, 16> DeadInsts;
141 bool WidenIndVars;
142
143 bool RunUnswitching = false;
144
145 bool handleFloatingPointIV(Loop *L, PHINode *PH);
146 bool rewriteNonIntegerIVs(Loop *L);
147
148 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
149 /// Try to improve our exit conditions by converting condition from signed
150 /// to unsigned or rotating computation out of the loop.
151 /// (See inline comment about why this is duplicated from simplifyAndExtend)
152 bool canonicalizeExitCondition(Loop *L);
153 /// Try to eliminate loop exits based on analyzeable exit counts
154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155 /// Try to form loop invariant tests for loop exits by changing how many
156 /// iterations of the loop run when that is unobservable.
157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158
159 bool rewriteFirstIterationLoopExitValues(Loop *L);
160
161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162 const SCEV *ExitCount,
163 PHINode *IndVar, SCEVExpander &Rewriter);
164
165 bool sinkUnusedInvariants(Loop *L);
166
167public:
168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169 const DataLayout &DL, TargetLibraryInfo *TLI,
170 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
172 WidenIndVars(WidenIndVars) {
173 if (MSSA)
174 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
175 }
176
177 bool run(Loop *L);
178
179 bool runUnswitching() const { return RunUnswitching; }
180};
181
182} // end anonymous namespace
183
184//===----------------------------------------------------------------------===//
185// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
186//===----------------------------------------------------------------------===//
187
188/// Convert APF to an integer, if possible.
189static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
190 bool isExact = false;
191 // See if we can convert this to an int64_t
192 uint64_t UIntVal;
193 if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: 64, IsSigned: true,
194 RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK ||
195 !isExact)
196 return false;
197 IntVal = UIntVal;
198 return true;
199}
200
201/// Ensure we stay within the bounds of fp values that can be represented as
202/// integers without gaps, which are 2^24 and 2^53 for IEEE-754 single and
203/// double precision respectively (both on negative and positive side).
204static bool isRepresentableAsExactInteger(const APFloat &FPVal,
205 int64_t IntVal) {
206 const auto &FltSema = FPVal.getSemantics();
207 if (!APFloat::isIEEELikeFP(FltSema))
208 return false;
209 return isUIntN(N: APFloat::semanticsPrecision(FltSema), x: AbsoluteValue(X: IntVal));
210}
211
212/// Represents a floating-point induction variable pattern that may be
213/// convertible to integer form.
214struct FloatingPointIV {
215 APFloat InitValue;
216 APFloat IncrValue;
217 APFloat ExitValue;
218 FCmpInst *Compare;
219 BinaryOperator *Add;
220
221 FloatingPointIV(APFloat Init, APFloat Incr, APFloat Exit, FCmpInst *Compare,
222 BinaryOperator *Add)
223 : InitValue(std::move(Init)), IncrValue(std::move(Incr)),
224 ExitValue(std::move(Exit)), Compare(Compare), Add(Add) {}
225};
226
227/// Represents the integer values for a converted IV.
228struct IntegerIV {
229 int64_t InitValue;
230 int64_t IncrValue;
231 int64_t ExitValue;
232 CmpInst::Predicate NewPred;
233};
234
235static CmpInst::Predicate getIntegerPredicate(CmpInst::Predicate FPPred) {
236 switch (FPPred) {
237 case CmpInst::FCMP_OEQ:
238 case CmpInst::FCMP_UEQ:
239 return CmpInst::ICMP_EQ;
240 case CmpInst::FCMP_ONE:
241 case CmpInst::FCMP_UNE:
242 return CmpInst::ICMP_NE;
243 case CmpInst::FCMP_OGT:
244 case CmpInst::FCMP_UGT:
245 return CmpInst::ICMP_SGT;
246 case CmpInst::FCMP_OGE:
247 case CmpInst::FCMP_UGE:
248 return CmpInst::ICMP_SGE;
249 case CmpInst::FCMP_OLT:
250 case CmpInst::FCMP_ULT:
251 return CmpInst::ICMP_SLT;
252 case CmpInst::FCMP_OLE:
253 case CmpInst::FCMP_ULE:
254 return CmpInst::ICMP_SLE;
255 default:
256 return CmpInst::BAD_ICMP_PREDICATE;
257 }
258}
259
260/// Analyze a PN to determine whether it represents a simple floating-point
261/// induction variable, with constant fp init, increment, and exit values.
262///
263/// Returns a FloatingPointIV struct if matched, std::nullopt otherwise.
264static std::optional<FloatingPointIV>
265maybeFloatingPointRecurrence(Loop *L, PHINode *PN) {
266 // Identify incoming and backedge for the PN.
267 unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: 0));
268 unsigned BackEdge = IncomingEdge ^ 1;
269
270 // Check incoming value.
271 auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
272 if (!InitValueVal)
273 return std::nullopt;
274
275 // Check IV increment. Reject this PN if increment operation is not
276 // an add or increment value can not be represented by an integer.
277 auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
278 if (!Incr || Incr->getOpcode() != Instruction::FAdd)
279 return std::nullopt;
280
281 // If this is not an add of the PHI with a constantfp, or if the constant fp
282 // is not an integer, bail out.
283 auto *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: 1));
284 if (!IncValueVal || Incr->getOperand(i_nocapture: 0) != PN)
285 return std::nullopt;
286
287 // Check Incr uses. One user is PN and the other user is an exit condition
288 // used by the conditional terminator.
289 // TODO: Should relax this, so as to allow any `fpext` that may occur.
290 if (!Incr->hasNUses(N: 2))
291 return std::nullopt;
292
293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
294 // only used by a branch, we can't transform it.
295 auto It = llvm::find_if(Range: Incr->users(),
296 P: [](const User *U) { return isa<FCmpInst>(Val: U); });
297 if (It == Incr->users().end())
298 return std::nullopt;
299
300 FCmpInst *Compare = cast<FCmpInst>(Val: *It);
301 if (!Compare->hasOneUse())
302 return std::nullopt;
303
304 // We need to verify that the branch actually controls the iteration count
305 // of the loop. If not, the new IV can overflow and no one will notice.
306 // The branch block must be in the loop and one of the successors must be out
307 // of the loop.
308 auto *BI = dyn_cast<CondBrInst>(Val: Compare->user_back());
309 if (!BI)
310 return std::nullopt;
311
312 if (!L->contains(BB: BI->getParent()) ||
313 (L->contains(BB: BI->getSuccessor(i: 0)) && L->contains(BB: BI->getSuccessor(i: 1))))
314 return std::nullopt;
315
316 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
317 // transform it.
318 auto *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: 1));
319 if (!ExitValueVal)
320 return std::nullopt;
321
322 return FloatingPointIV(InitValueVal->getValueAPF(),
323 IncValueVal->getValueAPF(),
324 ExitValueVal->getValueAPF(), Compare, Incr);
325}
326
327/// Ensure that the floating-point IV can be converted to a semantics-preserving
328/// signed 32-bit integer IV.
329///
330/// Returns a IntegerIV struct if possible, std::nullopt otherwise.
331static std::optional<IntegerIV>
332tryConvertToIntegerIV(const FloatingPointIV &FPIV) {
333 // Convert floating-point predicate to integer.
334 auto NewPred = getIntegerPredicate(FPPred: FPIV.Compare->getPredicate());
335 if (NewPred == CmpInst::BAD_ICMP_PREDICATE)
336 return std::nullopt;
337
338 // Convert APFloat values to signed integers.
339 int64_t InitValue, IncrValue, ExitValue;
340 if (!ConvertToSInt(APF: FPIV.InitValue, IntVal&: InitValue) ||
341 !ConvertToSInt(APF: FPIV.IncrValue, IntVal&: IncrValue) ||
342 !ConvertToSInt(APF: FPIV.ExitValue, IntVal&: ExitValue))
343 return std::nullopt;
344
345 // Bail out if integers cannot be represented exactly.
346 if (!isRepresentableAsExactInteger(FPVal: FPIV.InitValue, IntVal: InitValue) ||
347 !isRepresentableAsExactInteger(FPVal: FPIV.ExitValue, IntVal: ExitValue))
348 return std::nullopt;
349
350 // We convert the floating point induction variable to a signed i32 value if
351 // we can. This is only safe if the comparison will not overflow in a way that
352 // won't be trapped by the integer equivalent operations. Check for this now.
353 // TODO: We could use i64 if it is native and the range requires it.
354
355 // The start/stride/exit values must all fit in signed i32.
356 if (!isInt<32>(x: InitValue) || !isInt<32>(x: IncrValue) || !isInt<32>(x: ExitValue))
357 return std::nullopt;
358
359 // If not actually striding (add x, 0.0), avoid touching the code.
360 if (IncrValue == 0)
361 return std::nullopt;
362
363 // Positive and negative strides have different safety conditions.
364 if (IncrValue > 0) {
365 // If we have a positive stride, we require the init to be less than the
366 // exit value.
367 if (InitValue >= ExitValue)
368 return std::nullopt;
369
370 uint32_t Range = uint32_t(ExitValue - InitValue);
371 // Check for infinite loop, either:
372 // while (i <= Exit) or until (i > Exit)
373 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
374 if (++Range == 0)
375 return std::nullopt; // Range overflows.
376 }
377
378 unsigned Leftover = Range % uint32_t(IncrValue);
379
380 // If this is an equality comparison, we require that the strided value
381 // exactly land on the exit value, otherwise the IV condition will wrap
382 // around and do things the fp IV wouldn't.
383 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
384 Leftover != 0)
385 return std::nullopt;
386
387 // If the stride would wrap around the i32 before exiting, we can't
388 // transform the IV.
389 if (Leftover != 0 && int32_t(ExitValue + IncrValue) < ExitValue)
390 return std::nullopt;
391 } else {
392 // If we have a negative stride, we require the init to be greater than the
393 // exit value.
394 if (InitValue <= ExitValue)
395 return std::nullopt;
396
397 uint32_t Range = uint32_t(InitValue - ExitValue);
398 // Check for infinite loop, either:
399 // while (i >= Exit) or until (i < Exit)
400 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
401 if (++Range == 0)
402 return std::nullopt; // Range overflows.
403 }
404
405 unsigned Leftover = Range % uint32_t(-IncrValue);
406
407 // If this is an equality comparison, we require that the strided value
408 // exactly land on the exit value, otherwise the IV condition will wrap
409 // around and do things the fp IV wouldn't.
410 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
411 Leftover != 0)
412 return std::nullopt;
413
414 // If the stride would wrap around the i32 before exiting, we can't
415 // transform the IV.
416 if (Leftover != 0 && int32_t(ExitValue + IncrValue) > ExitValue)
417 return std::nullopt;
418 }
419
420 return IntegerIV{.InitValue: InitValue, .IncrValue: IncrValue, .ExitValue: ExitValue, .NewPred: NewPred};
421}
422
423/// Rewrite the floating-point IV as an integer IV.
424static void canonicalizeToIntegerIV(Loop *L, PHINode *PN,
425 const FloatingPointIV &FPIV,
426 const IntegerIV &IIV,
427 const TargetLibraryInfo *TLI,
428 std::unique_ptr<MemorySSAUpdater> &MSSAU) {
429 unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: 0));
430 unsigned BackEdge = IncomingEdge ^ 1;
431
432 IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
433 auto *Incr = cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
434 auto *BI = cast<CondBrInst>(Val: FPIV.Compare->user_back());
435
436 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting floating-point IV to integer IV:\n"
437 << " Init: " << IIV.InitValue << "\n"
438 << " Incr: " << IIV.IncrValue << "\n"
439 << " Exit: " << IIV.ExitValue << "\n"
440 << " Pred: " << CmpInst::getPredicateName(IIV.NewPred)
441 << "\n"
442 << " Original PN: " << *PN << "\n");
443
444 // Insert new integer induction variable.
445 PHINode *NewPHI =
446 PHINode::Create(Ty: Int32Ty, NumReservedValues: 2, NameStr: PN->getName() + ".int", InsertBefore: PN->getIterator());
447 NewPHI->addIncoming(V: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.InitValue),
448 BB: PN->getIncomingBlock(i: IncomingEdge));
449 NewPHI->setDebugLoc(PN->getDebugLoc());
450
451 Instruction *NewAdd = BinaryOperator::CreateAdd(
452 V1: NewPHI, V2: ConstantInt::getSigned(Ty: Int32Ty, V: IIV.IncrValue),
453 Name: Incr->getName() + ".int", InsertBefore: Incr->getIterator());
454 NewAdd->setDebugLoc(Incr->getDebugLoc());
455 NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
456
457 ICmpInst *NewCompare = new ICmpInst(
458 BI->getIterator(), IIV.NewPred, NewAdd,
459 ConstantInt::getSigned(Ty: Int32Ty, V: IIV.ExitValue), FPIV.Compare->getName());
460 NewCompare->setDebugLoc(FPIV.Compare->getDebugLoc());
461
462 // In the following deletions, PN may become dead and may be deleted.
463 // Use a WeakTrackingVH to observe whether this happens.
464 WeakTrackingVH WeakPH = PN;
465
466 // Delete the old floating point exit comparison. The branch starts using the
467 // new comparison.
468 NewCompare->takeName(V: FPIV.Compare);
469 FPIV.Compare->replaceAllUsesWith(V: NewCompare);
470 RecursivelyDeleteTriviallyDeadInstructions(V: FPIV.Compare, TLI, MSSAU: MSSAU.get());
471
472 // Delete the old floating point increment.
473 Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
474 RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
475
476 // If the FP induction variable still has uses, this is because something else
477 // in the loop uses its value. In order to canonicalize the induction
478 // variable, we chose to eliminate the IV and rewrite it in terms of an
479 // int->fp cast.
480 //
481 // We give preference to sitofp over uitofp because it is faster on most
482 // platforms.
483 if (WeakPH) {
484 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
485 PN->getParent()->getFirstInsertionPt());
486 Conv->setDebugLoc(PN->getDebugLoc());
487 PN->replaceAllUsesWith(V: Conv);
488 RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
489 }
490}
491
492/// If the loop has a floating induction variable, then insert corresponding
493/// integer induction variable if possible. For example, the following:
494/// for(double i = 0; i < 10000; ++i)
495/// bar(i)
496/// is converted into
497/// for(int i = 0; i < 10000; ++i)
498/// bar((double)i);
499bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
500 // See if the PN matches a floating-point IV pattern.
501 auto FPIV = maybeFloatingPointRecurrence(L, PN);
502 if (!FPIV)
503 return false;
504
505 // Can we safely convert the floating-point values to integer ones?
506 auto IIV = tryConvertToIntegerIV(FPIV: *FPIV);
507 if (!IIV)
508 return false;
509
510 // Perform the rewriting.
511 canonicalizeToIntegerIV(L, PN, FPIV: *FPIV, IIV: *IIV, TLI, MSSAU);
512 return true;
513}
514
515bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
516 // First step. Check to see if there are any floating-point recurrences.
517 // If there are, change them into integer recurrences, permitting analysis by
518 // the SCEV routines.
519 BasicBlock *Header = L->getHeader();
520
521 SmallVector<WeakTrackingVH, 8> PHIs(llvm::make_pointer_range(Range: Header->phis()));
522
523 bool Changed = false;
524 for (WeakTrackingVH &PHI : PHIs)
525 if (PHINode *PN = dyn_cast_or_null<PHINode>(Val: &*PHI))
526 Changed |= handleFloatingPointIV(L, PN);
527
528 // If the loop previously had floating-point IV, ScalarEvolution
529 // may not have been able to compute a trip count. Now that we've done some
530 // re-writing, the trip count may be computable.
531 if (Changed)
532 SE->forgetLoop(L);
533 return Changed;
534}
535
536//===---------------------------------------------------------------------===//
537// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
538// they will exit at the first iteration.
539//===---------------------------------------------------------------------===//
540
541/// Check to see if this loop has loop invariant conditions which lead to loop
542/// exits. If so, we know that if the exit path is taken, it is at the first
543/// loop iteration. This lets us predict exit values of PHI nodes that live in
544/// loop header.
545bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
546 // Verify the input to the pass is already in LCSSA form.
547 assert(L->isLCSSAForm(*DT));
548
549 SmallVector<BasicBlock *, 8> ExitBlocks;
550 L->getUniqueExitBlocks(ExitBlocks);
551
552 bool MadeAnyChanges = false;
553 for (auto *ExitBB : ExitBlocks) {
554 // If there are no more PHI nodes in this exit block, then no more
555 // values defined inside the loop are used on this path.
556 for (PHINode &PN : ExitBB->phis()) {
557 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
558 IncomingValIdx != E; ++IncomingValIdx) {
559 auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
560
561 // Can we prove that the exit must run on the first iteration if it
562 // runs at all? (i.e. early exits are fine for our purposes, but
563 // traces which lead to this exit being taken on the 2nd iteration
564 // aren't.) Note that this is about whether the exit branch is
565 // executed, not about whether it is taken.
566 if (!L->getLoopLatch() ||
567 !DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
568 continue;
569
570 // Get condition that leads to the exit path.
571 auto *TermInst = IncomingBB->getTerminator();
572
573 Value *Cond = nullptr;
574 if (auto *BI = dyn_cast<CondBrInst>(Val: TermInst)) {
575 // Must be a conditional branch, otherwise the block
576 // should not be in the loop.
577 Cond = BI->getCondition();
578 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
579 Cond = SI->getCondition();
580 else
581 continue;
582
583 if (!L->isLoopInvariant(V: Cond))
584 continue;
585
586 auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
587
588 // Only deal with PHIs in the loop header.
589 if (!ExitVal || ExitVal->getParent() != L->getHeader())
590 continue;
591
592 // If ExitVal is a PHI on the loop header, then we know its
593 // value along this exit because the exit can only be taken
594 // on the first iteration.
595 auto *LoopPreheader = L->getLoopPreheader();
596 assert(LoopPreheader && "Invalid loop");
597 int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
598 if (PreheaderIdx != -1) {
599 assert(ExitVal->getParent() == L->getHeader() &&
600 "ExitVal must be in loop header");
601 MadeAnyChanges = true;
602 PN.setIncomingValue(i: IncomingValIdx,
603 V: ExitVal->getIncomingValue(i: PreheaderIdx));
604 SE->forgetValue(V: &PN);
605 }
606 }
607 }
608 }
609 return MadeAnyChanges;
610}
611
612//===----------------------------------------------------------------------===//
613// IV Widening - Extend the width of an IV to cover its widest uses.
614//===----------------------------------------------------------------------===//
615
616/// Update information about the induction variable that is extended by this
617/// sign or zero extend operation. This is used to determine the final width of
618/// the IV before actually widening it.
619static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
620 ScalarEvolution *SE,
621 const TargetTransformInfo *TTI) {
622 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
623 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
624 return;
625
626 Type *Ty = Cast->getType();
627 uint64_t Width = SE->getTypeSizeInBits(Ty);
628 if (!Cast->getDataLayout().isLegalInteger(Width))
629 return;
630
631 // Check that `Cast` actually extends the induction variable (we rely on this
632 // later). This takes care of cases where `Cast` is extending a truncation of
633 // the narrow induction variable, and thus can end up being narrower than the
634 // "narrow" induction variable.
635 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
636 if (NarrowIVWidth >= Width)
637 return;
638
639 // Cast is either an sext or zext up to this point.
640 // We should not widen an indvar if arithmetics on the wider indvar are more
641 // expensive than those on the narrower indvar. We check only the cost of ADD
642 // because at least an ADD is required to increment the induction variable. We
643 // could compute more comprehensively the cost of all instructions on the
644 // induction variable when necessary.
645 if (TTI &&
646 TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
647 TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
648 Ty: Cast->getOperand(i_nocapture: 0)->getType())) {
649 return;
650 }
651
652 if (!WI.WidestNativeType ||
653 Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
654 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
655 WI.IsSigned = IsSigned;
656 return;
657 }
658
659 // We extend the IV to satisfy the sign of its user(s), or 'signed'
660 // if there are multiple users with both sign- and zero extensions,
661 // in order not to introduce nondeterministic behaviour based on the
662 // unspecified order of a PHI nodes' users-iterator.
663 WI.IsSigned |= IsSigned;
664}
665
666//===----------------------------------------------------------------------===//
667// Live IV Reduction - Minimize IVs live across the loop.
668//===----------------------------------------------------------------------===//
669
670//===----------------------------------------------------------------------===//
671// Simplification of IV users based on SCEV evaluation.
672//===----------------------------------------------------------------------===//
673
674namespace {
675
676class IndVarSimplifyVisitor : public IVVisitor {
677 ScalarEvolution *SE;
678 const TargetTransformInfo *TTI;
679 PHINode *IVPhi;
680
681public:
682 WideIVInfo WI;
683
684 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
685 const TargetTransformInfo *TTI,
686 const DominatorTree *DTree)
687 : SE(SCEV), TTI(TTI), IVPhi(IV) {
688 DT = DTree;
689 WI.NarrowIV = IVPhi;
690 }
691
692 // Implement the interface used by simplifyUsersOfIV.
693 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
694};
695
696} // end anonymous namespace
697
698/// Iteratively perform simplification on a worklist of IV users. Each
699/// successive simplification may push more users which may themselves be
700/// candidates for simplification.
701///
702/// Sign/Zero extend elimination is interleaved with IV simplification.
703bool IndVarSimplify::simplifyAndExtend(Loop *L,
704 SCEVExpander &Rewriter,
705 LoopInfo *LI) {
706 SmallVector<WideIVInfo, 8> WideIVs;
707
708 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
709 M: L->getBlocks()[0]->getModule(), id: Intrinsic::experimental_guard);
710 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
711
712 SmallVector<PHINode *, 8> LoopPhis(
713 llvm::make_pointer_range(Range: L->getHeader()->phis()));
714
715 // Each round of simplification iterates through the SimplifyIVUsers worklist
716 // for all current phis, then determines whether any IVs can be
717 // widened. Widening adds new phis to LoopPhis, inducing another round of
718 // simplification on the wide IVs.
719 bool Changed = false;
720 while (!LoopPhis.empty()) {
721 // Evaluate as many IV expressions as possible before widening any IVs. This
722 // forces SCEV to set no-wrap flags before evaluating sign/zero
723 // extension. The first time SCEV attempts to normalize sign/zero extension,
724 // the result becomes final. So for the most predictable results, we delay
725 // evaluation of sign/zero extend evaluation until needed, and avoid running
726 // other SCEV based analysis prior to simplifyAndExtend.
727 do {
728 PHINode *CurrIV = LoopPhis.pop_back_val();
729
730 // Information about sign/zero extensions of CurrIV.
731 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
732
733 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts,
734 Rewriter, V: &Visitor);
735
736 Changed |= C;
737 RunUnswitching |= U;
738 if (Visitor.WI.WidestNativeType) {
739 WideIVs.push_back(Elt: Visitor.WI);
740 }
741 } while(!LoopPhis.empty());
742
743 // Continue if we disallowed widening.
744 if (!WidenIndVars)
745 continue;
746
747 for (; !WideIVs.empty(); WideIVs.pop_back()) {
748 unsigned ElimExt;
749 unsigned Widened;
750 if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
751 DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
752 HasGuards, UsePostIncrementRanges)) {
753 NumElimExt += ElimExt;
754 NumWidened += Widened;
755 Changed = true;
756 LoopPhis.push_back(Elt: WidePhi);
757 }
758 }
759 }
760 return Changed;
761}
762
763//===----------------------------------------------------------------------===//
764// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
765//===----------------------------------------------------------------------===//
766
767/// Given an Value which is hoped to be part of an add recurance in the given
768/// loop, return the associated Phi node if so. Otherwise, return null. Note
769/// that this is less general than SCEVs AddRec checking.
770static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
771 Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
772 if (!IncI)
773 return nullptr;
774
775 switch (IncI->getOpcode()) {
776 case Instruction::Add:
777 case Instruction::Sub:
778 break;
779 case Instruction::GetElementPtr:
780 // An IV counter must preserve its type.
781 if (IncI->getNumOperands() == 2)
782 break;
783 [[fallthrough]];
784 default:
785 return nullptr;
786 }
787
788 PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 0));
789 if (Phi && Phi->getParent() == L->getHeader()) {
790 if (L->isLoopInvariant(V: IncI->getOperand(i: 1)))
791 return Phi;
792 return nullptr;
793 }
794 if (IncI->getOpcode() == Instruction::GetElementPtr)
795 return nullptr;
796
797 // Allow add/sub to be commuted.
798 Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 1));
799 if (Phi && Phi->getParent() == L->getHeader()) {
800 if (L->isLoopInvariant(V: IncI->getOperand(i: 0)))
801 return Phi;
802 }
803 return nullptr;
804}
805
806/// Whether the current loop exit test is based on this value. Currently this
807/// is limited to a direct use in the loop condition.
808static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
809 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
810 ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
811 // TODO: Allow non-icmp loop test.
812 if (!ICmp)
813 return false;
814
815 // TODO: Allow indirect use.
816 return ICmp->getOperand(i_nocapture: 0) == V || ICmp->getOperand(i_nocapture: 1) == V;
817}
818
819/// linearFunctionTestReplace policy. Return true unless we can show that the
820/// current exit test is already sufficiently canonical.
821static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
822 assert(L->getLoopLatch() && "Must be in simplified form");
823
824 // Avoid converting a constant or loop invariant test back to a runtime
825 // test. This is critical for when SCEV's cached ExitCount is less precise
826 // than the current IR (such as after we've proven a particular exit is
827 // actually dead and thus the BE count never reaches our ExitCount.)
828 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
829 if (L->isLoopInvariant(V: BI->getCondition()))
830 return false;
831
832 // Do LFTR to simplify the exit condition to an ICMP.
833 ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
834 if (!Cond)
835 return true;
836
837 // Do LFTR to simplify the exit ICMP to EQ/NE
838 ICmpInst::Predicate Pred = Cond->getPredicate();
839 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
840 return true;
841
842 // Look for a loop invariant RHS
843 Value *LHS = Cond->getOperand(i_nocapture: 0);
844 Value *RHS = Cond->getOperand(i_nocapture: 1);
845 if (!L->isLoopInvariant(V: RHS)) {
846 if (!L->isLoopInvariant(V: LHS))
847 return true;
848 std::swap(a&: LHS, b&: RHS);
849 }
850 // Look for a simple IV counter LHS
851 PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
852 if (!Phi)
853 Phi = getLoopPhiForCounter(IncV: LHS, L);
854
855 if (!Phi)
856 return true;
857
858 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
859 int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
860 if (Idx < 0)
861 return true;
862
863 // Do LFTR if the exit condition's IV is *not* a simple counter.
864 Value *IncV = Phi->getIncomingValue(i: Idx);
865 return Phi != getLoopPhiForCounter(IncV, L);
866}
867
868/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
869/// down to checking that all operands are constant and listing instructions
870/// that may hide undef.
871static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
872 unsigned Depth) {
873 if (isa<Constant>(Val: V))
874 return !isa<UndefValue>(Val: V);
875
876 if (Depth >= 6)
877 return false;
878
879 // Conservatively handle non-constant non-instructions. For example, Arguments
880 // may be undef.
881 Instruction *I = dyn_cast<Instruction>(Val: V);
882 if (!I)
883 return false;
884
885 // Load and return values may be undef.
886 if(I->mayReadFromMemory() || isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I))
887 return false;
888
889 // Optimistically handle other instructions.
890 for (Value *Op : I->operands()) {
891 if (!Visited.insert(Ptr: Op).second)
892 continue;
893 if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+1))
894 return false;
895 }
896 return true;
897}
898
899/// Return true if the given value is concrete. We must prove that undef can
900/// never reach it.
901///
902/// TODO: If we decide that this is a good approach to checking for undef, we
903/// may factor it into a common location.
904static bool hasConcreteDef(Value *V) {
905 SmallPtrSet<Value*, 8> Visited;
906 Visited.insert(Ptr: V);
907 return hasConcreteDefImpl(V, Visited, Depth: 0);
908}
909
910/// Return true if the given phi is a "counter" in L. A counter is an
911/// add recurance (of integer or pointer type) with an arbitrary start, and a
912/// step of 1. Note that L must have exactly one latch.
913static bool isLoopCounter(PHINode* Phi, Loop *L,
914 ScalarEvolution *SE) {
915 assert(Phi->getParent() == L->getHeader());
916 assert(L->getLoopLatch());
917
918 if (!SE->isSCEVable(Ty: Phi->getType()))
919 return false;
920
921 const SCEV *S = SE->getSCEV(V: Phi);
922 if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L))))
923 return false;
924
925 int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
926 Value *IncV = Phi->getIncomingValue(i: LatchIdx);
927 return (getLoopPhiForCounter(IncV, L) == Phi &&
928 isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
929}
930
931/// Search the loop header for a loop counter (anadd rec w/step of one)
932/// suitable for use by LFTR. If multiple counters are available, select the
933/// "best" one based profitable heuristics.
934///
935/// BECount may be an i8* pointer type. The pointer difference is already
936/// valid count without scaling the address stride, so it remains a pointer
937/// expression as far as SCEV is concerned.
938static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
939 const SCEV *BECount,
940 ScalarEvolution *SE, DominatorTree *DT) {
941 uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
942
943 Value *Cond = cast<CondBrInst>(Val: ExitingBB->getTerminator())->getCondition();
944
945 // Loop over all of the PHI nodes, looking for a simple counter.
946 PHINode *BestPhi = nullptr;
947 const SCEV *BestInit = nullptr;
948 BasicBlock *LatchBlock = L->getLoopLatch();
949 assert(LatchBlock && "Must be in simplified form");
950 const DataLayout &DL = L->getHeader()->getDataLayout();
951
952 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
953 PHINode *Phi = cast<PHINode>(Val&: I);
954 if (!isLoopCounter(Phi, L, SE))
955 continue;
956
957 const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
958
959 // AR may be a pointer type, while BECount is an integer type.
960 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
961 // AR may not be a narrower type, or we may never exit.
962 uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
963 if (PhiWidth < BCWidth || !DL.isLegalInteger(Width: PhiWidth))
964 continue;
965
966 // Avoid reusing a potentially undef value to compute other values that may
967 // have originally had a concrete definition.
968 if (!hasConcreteDef(V: Phi)) {
969 // We explicitly allow unknown phis as long as they are already used by
970 // the loop exit test. This is legal since performing LFTR could not
971 // increase the number of undef users.
972 Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
973 if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
974 !isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
975 continue;
976 }
977
978 // Avoid introducing undefined behavior due to poison which didn't exist in
979 // the original program. (Annoyingly, the rules for poison and undef
980 // propagation are distinct, so this does NOT cover the undef case above.)
981 // We have to ensure that we don't introduce UB by introducing a use on an
982 // iteration where said IV produces poison. Our strategy here differs for
983 // pointers and integer IVs. For integers, we strip and reinfer as needed,
984 // see code in linearFunctionTestReplace. For pointers, we restrict
985 // transforms as there is no good way to reinfer inbounds once lost.
986 if (!Phi->getType()->isIntegerTy() &&
987 !mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
988 continue;
989
990 const SCEV *Init = AR->getStart();
991
992 if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
993 // Don't force a live loop counter if another IV can be used.
994 if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
995 continue;
996
997 // Prefer to count-from-zero. This is a more "canonical" counter form. It
998 // also prefers integer to pointer IVs.
999 if (BestInit->isZero() != Init->isZero()) {
1000 if (BestInit->isZero())
1001 continue;
1002 }
1003 // If two IVs both count from zero or both count from nonzero then the
1004 // narrower is likely a dead phi that has been widened. Use the wider phi
1005 // to allow the other to be eliminated.
1006 else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
1007 continue;
1008 }
1009 BestPhi = Phi;
1010 BestInit = Init;
1011 }
1012 return BestPhi;
1013}
1014
1015/// Insert an IR expression which computes the value held by the IV IndVar
1016/// (which must be an loop counter w/unit stride) after the backedge of loop L
1017/// is taken ExitCount times.
1018static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1019 const SCEV *ExitCount, bool UsePostInc, Loop *L,
1020 SCEVExpander &Rewriter, ScalarEvolution *SE) {
1021 assert(isLoopCounter(IndVar, L, SE));
1022 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
1023 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
1024 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1025
1026 // For integer IVs, truncate the IV before computing the limit unless we
1027 // know apriori that the limit must be a constant when evaluated in the
1028 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
1029 // IV in the loop over a (potentially) expensive expansion of the widened
1030 // exit count add(zext(add)) expression.
1031 if (IndVar->getType()->isIntegerTy() &&
1032 SE->getTypeSizeInBits(Ty: AR->getType()) >
1033 SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
1034 const SCEV *IVInit = AR->getStart();
1035 if (!isa<SCEVConstant>(Val: IVInit) || !isa<SCEVConstant>(Val: ExitCount)) {
1036 const SCEV *TruncExpr = SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType());
1037
1038 // The following bailout is necessary due to the interaction with
1039 // the depth limit in SCEV analysis.
1040 if (!isa<SCEVAddRecExpr>(Val: TruncExpr))
1041 return nullptr;
1042 AR = cast<SCEVAddRecExpr>(Val: TruncExpr);
1043 }
1044 }
1045
1046 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(SE&: *SE) : AR;
1047 const SCEV *IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: *SE);
1048 assert(SE->isLoopInvariant(IVLimit, L) &&
1049 "Computed iteration count is not loop invariant!");
1050 return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
1051 I: ExitingBB->getTerminator());
1052}
1053
1054/// This method rewrites the exit condition of the loop to be a canonical !=
1055/// comparison against the incremented loop induction variable. This pass is
1056/// able to rewrite the exit tests of any loop where the SCEV analysis can
1057/// determine a loop-invariant trip count of the loop, which is actually a much
1058/// broader range than just linear tests.
1059bool IndVarSimplify::
1060linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1061 const SCEV *ExitCount,
1062 PHINode *IndVar, SCEVExpander &Rewriter) {
1063 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1064 assert(isLoopCounter(IndVar, L, SE));
1065 Instruction * const IncVar =
1066 cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
1067
1068 // Initialize CmpIndVar to the preincremented IV.
1069 Value *CmpIndVar = IndVar;
1070 bool UsePostInc = false;
1071
1072 // If the exiting block is the same as the backedge block, we prefer to
1073 // compare against the post-incremented value, otherwise we must compare
1074 // against the preincremented value.
1075 if (ExitingBB == L->getLoopLatch()) {
1076 // For pointer IVs, we chose to not strip inbounds which requires us not
1077 // to add a potentially UB introducing use. We need to either a) show
1078 // the loop test we're modifying is already in post-inc form, or b) show
1079 // that adding a use must not introduce UB.
1080 bool SafeToPostInc =
1081 IndVar->getType()->isIntegerTy() ||
1082 isLoopExitTestBasedOn(V: IncVar, ExitingBB) ||
1083 mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
1084 if (SafeToPostInc) {
1085 UsePostInc = true;
1086 CmpIndVar = IncVar;
1087 }
1088 }
1089
1090 Value *ExitCnt =
1091 genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1092 if (!ExitCnt)
1093 return false;
1094
1095 assert(ExitCnt->getType()->isPointerTy() ==
1096 IndVar->getType()->isPointerTy() &&
1097 "genLoopLimit missed a cast");
1098
1099 // It may be necessary to drop nowrap flags on the incrementing instruction
1100 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1101 // case the increment might have previously been poison on the last iteration
1102 // only) or if LFTR switches to a different IV that was previously dynamically
1103 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105 // check), because the pre-inc addrec flags may be adopted from the original
1106 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107 // TODO: This handling is inaccurate for one case: If we switch to a
1108 // dynamically dead IV that wraps on the first loop iteration only, which is
1109 // not covered by the post-inc addrec. (If the new IV was not dynamically
1110 // dead, it could not be poison on the first iteration in the first place.)
1111 if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
1112 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
1113 if (BO->hasNoUnsignedWrap())
1114 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115 if (BO->hasNoSignedWrap())
1116 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117 }
1118
1119 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1120 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1121 ICmpInst::Predicate P;
1122 if (L->contains(BB: BI->getSuccessor(i: 0)))
1123 P = ICmpInst::ICMP_NE;
1124 else
1125 P = ICmpInst::ICMP_EQ;
1126
1127 IRBuilder<> Builder(BI);
1128
1129 // The new loop exit condition should reuse the debug location of the
1130 // original loop exit condition.
1131 if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1132 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1133
1134 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1135 // avoid the expensive expansion of the limit expression in the wider type,
1136 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1137 // since we know (from the exit count bitwidth), that we can't self-wrap in
1138 // the narrower type.
1139 unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1140 unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1141 if (CmpIndVarSize > ExitCntSize) {
1142 assert(!CmpIndVar->getType()->isPointerTy() &&
1143 !ExitCnt->getType()->isPointerTy());
1144
1145 // Before resorting to actually inserting the truncate, use the same
1146 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1147 // the other side of the comparison instead. We still evaluate the limit
1148 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1149 // a truncate within in.
1150 bool Extended = false;
1151 const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1152 const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1153 const SCEV *ZExtTrunc =
1154 SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1155
1156 if (ZExtTrunc == IV) {
1157 Extended = true;
1158 ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1159 Name: "wide.trip.count");
1160 } else {
1161 const SCEV *SExtTrunc =
1162 SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1163 if (SExtTrunc == IV) {
1164 Extended = true;
1165 ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1166 Name: "wide.trip.count");
1167 }
1168 }
1169
1170 if (Extended) {
1171 bool Discard;
1172 L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1173 } else
1174 CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1175 Name: "lftr.wideiv");
1176 }
1177 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178 << " LHS:" << *CmpIndVar << '\n'
1179 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180 << "\n"
1181 << " RHS:\t" << *ExitCnt << "\n"
1182 << "ExitCount:\t" << *ExitCount << "\n"
1183 << " was: " << *BI->getCondition() << "\n");
1184
1185 Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1186 Value *OrigCond = BI->getCondition();
1187 // It's tempting to use replaceAllUsesWith here to fully replace the old
1188 // comparison, but that's not immediately safe, since users of the old
1189 // comparison may not be dominated by the new comparison. Instead, just
1190 // update the branch to use the new comparison; in the common case this
1191 // will make old comparison dead.
1192 BI->setCondition(Cond);
1193 DeadInsts.emplace_back(Args&: OrigCond);
1194
1195 ++NumLFTR;
1196 return true;
1197}
1198
1199//===----------------------------------------------------------------------===//
1200// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201//===----------------------------------------------------------------------===//
1202
1203/// If there's a single exit block, sink any loop-invariant values that
1204/// were defined in the preheader but not used inside the loop into the
1205/// exit block to reduce register pressure in the loop.
1206bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207 BasicBlock *ExitBlock = L->getExitBlock();
1208 if (!ExitBlock) return false;
1209
1210 BasicBlock *Preheader = L->getLoopPreheader();
1211 if (!Preheader) return false;
1212
1213 bool MadeAnyChanges = false;
1214 for (Instruction &I : llvm::make_early_inc_range(Range: llvm::reverse(C&: *Preheader))) {
1215
1216 // Skip BB Terminator.
1217 if (Preheader->getTerminator() == &I)
1218 continue;
1219
1220 // New instructions were inserted at the end of the preheader.
1221 if (isa<PHINode>(Val: I))
1222 break;
1223
1224 // Don't move instructions which might have side effects, since the side
1225 // effects need to complete before instructions inside the loop. Also don't
1226 // move instructions which might read memory, since the loop may modify
1227 // memory. Note that it's okay if the instruction might have undefined
1228 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1229 // block.
1230 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1231 continue;
1232
1233 // Skip debug or pseudo instructions.
1234 if (I.isDebugOrPseudoInst())
1235 continue;
1236
1237 // Skip eh pad instructions.
1238 if (I.isEHPad())
1239 continue;
1240
1241 // Don't sink alloca: we never want to sink static alloca's out of the
1242 // entry block, and correctly sinking dynamic alloca's requires
1243 // checks for stacksave/stackrestore intrinsics.
1244 // FIXME: Refactor this check somehow?
1245 if (isa<AllocaInst>(Val: &I))
1246 continue;
1247
1248 // Determine if there is a use in or before the loop (direct or
1249 // otherwise).
1250 bool UsedInLoop = false;
1251 for (Use &U : I.uses()) {
1252 Instruction *User = cast<Instruction>(Val: U.getUser());
1253 BasicBlock *UseBB = User->getParent();
1254 if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1255 unsigned i =
1256 PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1257 UseBB = P->getIncomingBlock(i);
1258 }
1259 if (UseBB == Preheader || L->contains(BB: UseBB)) {
1260 UsedInLoop = true;
1261 break;
1262 }
1263 }
1264
1265 // If there is, the def must remain in the preheader.
1266 if (UsedInLoop)
1267 continue;
1268
1269 // Otherwise, sink it to the exit block.
1270 I.moveBefore(InsertPos: ExitBlock->getFirstInsertionPt());
1271 SE->forgetValue(V: &I);
1272 MadeAnyChanges = true;
1273 }
1274
1275 return MadeAnyChanges;
1276}
1277
1278static void replaceExitCond(CondBrInst *BI, Value *NewCond,
1279 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1280 auto *OldCond = BI->getCondition();
1281 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1282 << " with " << *NewCond << "\n");
1283 BI->setCondition(NewCond);
1284 if (OldCond->use_empty())
1285 DeadInsts.emplace_back(Args&: OldCond);
1286}
1287
1288static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1289 bool IsTaken) {
1290 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1291 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1292 auto *OldCond = BI->getCondition();
1293 return ConstantInt::get(Ty: OldCond->getType(),
1294 V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1295}
1296
1297static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1298 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1299 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1300 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1301 replaceExitCond(BI, NewCond, DeadInsts);
1302}
1303
1304static void replaceLoopPHINodesWithPreheaderValues(
1305 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1306 ScalarEvolution &SE) {
1307 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1308 auto *LoopPreheader = L->getLoopPreheader();
1309 auto *LoopHeader = L->getHeader();
1310 SmallVector<Instruction *> Worklist;
1311 for (auto &PN : LoopHeader->phis()) {
1312 auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1313 for (User *U : PN.users())
1314 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1315 SE.forgetValue(V: &PN);
1316 PN.replaceAllUsesWith(V: PreheaderIncoming);
1317 DeadInsts.emplace_back(Args: &PN);
1318 }
1319
1320 // Replacing with the preheader value will often allow IV users to simplify
1321 // (especially if the preheader value is a constant).
1322 SmallPtrSet<Instruction *, 16> Visited;
1323 while (!Worklist.empty()) {
1324 auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1325 if (!Visited.insert(Ptr: I).second)
1326 continue;
1327
1328 // Don't simplify instructions outside the loop.
1329 if (!L->contains(Inst: I))
1330 continue;
1331
1332 Value *Res = simplifyInstruction(I, Q: I->getDataLayout());
1333 if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1334 for (User *U : I->users())
1335 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1336 I->replaceAllUsesWith(V: Res);
1337 DeadInsts.emplace_back(Args&: I);
1338 }
1339 }
1340}
1341
1342static Value *
1343createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1344 const ScalarEvolution::LoopInvariantPredicate &LIP,
1345 SCEVExpander &Rewriter) {
1346 ICmpInst::Predicate InvariantPred = LIP.Pred;
1347 BasicBlock *Preheader = L->getLoopPreheader();
1348 assert(Preheader && "Preheader doesn't exist");
1349 Rewriter.setInsertPoint(Preheader->getTerminator());
1350 auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1351 auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1352 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1353 if (ExitIfTrue)
1354 InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1355 IRBuilder<> Builder(Preheader->getTerminator());
1356 CondBrInst *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1357 return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1358 Name: BI->getCondition()->getName());
1359}
1360
1361static std::optional<Value *>
1362createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1363 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1364 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1365 CmpPredicate Pred = ICmp->getCmpPredicate();
1366 Value *LHS = ICmp->getOperand(i_nocapture: 0);
1367 Value *RHS = ICmp->getOperand(i_nocapture: 1);
1368
1369 // 'LHS pred RHS' should now mean that we stay in loop.
1370 auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1371 if (Inverted)
1372 Pred = ICmpInst::getInverseCmpPredicate(Pred);
1373
1374 const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1375 const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1376 // Can we prove it to be trivially true or false?
1377 if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1378 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1379
1380 auto *ARTy = LHSS->getType();
1381 auto *MaxIterTy = MaxIter->getType();
1382 // If possible, adjust types.
1383 if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1384 MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1385 else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1386 const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1387 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1388 if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1389 MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1390 }
1391
1392 if (SkipLastIter) {
1393 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1394 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1395 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1396 // So we manually construct umin(a - 1, b - 1).
1397 SmallVector<SCEVUse, 4> Elements;
1398 if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1399 for (SCEVUse Op : UMin->operands())
1400 Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op->getType())));
1401 MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1402 } else
1403 MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1404 }
1405
1406 // Check if there is a loop-invariant predicate equivalent to our check.
1407 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1408 L, CtxI: BI, MaxIter);
1409 if (!LIP)
1410 return std::nullopt;
1411
1412 // Can we prove it to be trivially true?
1413 if (SE->isKnownPredicateAt(Pred: LIP->Pred, LHS: LIP->LHS, RHS: LIP->RHS, CtxI: BI))
1414 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1415 else
1416 return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1417}
1418
1419static bool optimizeLoopExitWithUnknownExitCount(
1420 const Loop *L, CondBrInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1421 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1422 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1423 assert(
1424 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1425 "Not a loop exit!");
1426
1427 // For branch that stays in loop by TRUE condition, go through AND. For branch
1428 // that stays in loop by FALSE condition, go through OR. Both gives the
1429 // similar logic: "stay in loop iff all conditions are true(false)".
1430 bool Inverted = L->contains(BB: BI->getSuccessor(i: 1));
1431 SmallVector<ICmpInst *, 4> LeafConditions;
1432 SmallVector<Value *, 4> Worklist;
1433 SmallPtrSet<Value *, 4> Visited;
1434 Value *OldCond = BI->getCondition();
1435 Visited.insert(Ptr: OldCond);
1436 Worklist.push_back(Elt: OldCond);
1437
1438 auto GoThrough = [&](Value *V) {
1439 Value *LHS = nullptr, *RHS = nullptr;
1440 if (Inverted) {
1441 if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1442 return false;
1443 } else {
1444 if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1445 return false;
1446 }
1447 if (Visited.insert(Ptr: LHS).second)
1448 Worklist.push_back(Elt: LHS);
1449 if (Visited.insert(Ptr: RHS).second)
1450 Worklist.push_back(Elt: RHS);
1451 return true;
1452 };
1453
1454 do {
1455 Value *Curr = Worklist.pop_back_val();
1456 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1457 // those with one use, to avoid instruction duplication.
1458 if (Curr->hasOneUse())
1459 if (!GoThrough(Curr))
1460 if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1461 LeafConditions.push_back(Elt: ICmp);
1462 } while (!Worklist.empty());
1463
1464 // If the current basic block has the same exit count as the whole loop, and
1465 // it consists of multiple icmp's, try to collect all icmp's that give exact
1466 // same exit count. For all other icmp's, we could use one less iteration,
1467 // because their value on the last iteration doesn't really matter.
1468 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1469 if (!SkipLastIter && LeafConditions.size() > 1 &&
1470 SE->getExitCount(L, ExitingBlock: ExitingBB,
1471 Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1472 MaxIter)
1473 for (auto *ICmp : LeafConditions) {
1474 auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1475 /*ControlsExit*/ ControlsOnlyExit: false);
1476 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1477 if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1478 continue;
1479 // They could be of different types (specifically this happens after
1480 // IV widening).
1481 auto *WiderType =
1482 SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1483 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1484 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1485 if (WideExitMax == WideMaxIter)
1486 ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1487 }
1488
1489 bool Changed = false;
1490 for (auto *OldCond : LeafConditions) {
1491 // Skip last iteration for this icmp under one of two conditions:
1492 // - We do it for all conditions;
1493 // - There is another ICmp that would fail on last iter, so this one doesn't
1494 // really matter.
1495 bool OptimisticSkipLastIter = SkipLastIter;
1496 if (!OptimisticSkipLastIter) {
1497 if (ICmpsFailingOnLastIter.size() > 1)
1498 OptimisticSkipLastIter = true;
1499 else if (ICmpsFailingOnLastIter.size() == 1)
1500 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1501 }
1502 if (auto Replaced =
1503 createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1504 SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1505 Changed = true;
1506 auto *NewCond = *Replaced;
1507 if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1508 NCI->setName(OldCond->getName() + ".first_iter");
1509 }
1510 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1511 << " with " << *NewCond << "\n");
1512 assert(OldCond->hasOneUse() && "Must be!");
1513 OldCond->replaceAllUsesWith(V: NewCond);
1514 DeadInsts.push_back(Elt: OldCond);
1515 // Make sure we no longer consider this condition as failing on last
1516 // iteration.
1517 ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1518 }
1519 }
1520 return Changed;
1521}
1522
1523bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1524 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1525 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1526 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1527 // it already has flags. The alternative to this would be to extending the
1528 // set of "interesting" IV users to include the icmp, but doing that
1529 // regresses results in practice by querying SCEVs before trip counts which
1530 // rely on them which results in SCEV caching sub-optimal answers. The
1531 // concern about caching sub-optimal results is why we only query SCEVs of
1532 // the loop invariant RHS here.
1533 SmallVector<BasicBlock*, 16> ExitingBlocks;
1534 L->getExitingBlocks(ExitingBlocks);
1535 bool Changed = false;
1536 for (auto *ExitingBB : ExitingBlocks) {
1537 auto *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1538 if (!BI)
1539 continue;
1540
1541 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1542 if (!ICmp || !ICmp->hasOneUse())
1543 continue;
1544
1545 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1546 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1547 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1548 // poisoning cache with sub-optimal results. For the must-execute case,
1549 // this is a neccessary precondition for correctness.
1550 if (!L->isLoopInvariant(V: RHS)) {
1551 if (!L->isLoopInvariant(V: LHS))
1552 continue;
1553 // Same logic applies for the inverse case
1554 std::swap(a&: LHS, b&: RHS);
1555 }
1556
1557 // Match (icmp signed-cond zext, RHS)
1558 Value *LHSOp = nullptr;
1559 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) || !ICmp->isSigned())
1560 continue;
1561
1562 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1563 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1564 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1565 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1566 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1567 if (FullCR.contains(CR: RHSCR)) {
1568 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1569 // replace the signed condition with the unsigned version.
1570 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1571 Changed = true;
1572 // Note: No SCEV invalidation needed. We've changed the predicate, but
1573 // have not changed exit counts, or the values produced by the compare.
1574 continue;
1575 }
1576 }
1577
1578 // Now that we've canonicalized the condition to match the extend,
1579 // see if we can rotate the extend out of the loop.
1580 for (auto *ExitingBB : ExitingBlocks) {
1581 auto *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1582 if (!BI)
1583 continue;
1584
1585 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1586 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1587 continue;
1588
1589 bool Swapped = false;
1590 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1591 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1592 if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1593 // Nothing to rotate
1594 continue;
1595 if (L->isLoopInvariant(V: LHS)) {
1596 // Same logic applies for the inverse case until we actually pick
1597 // which operand of the compare to update.
1598 Swapped = true;
1599 std::swap(a&: LHS, b&: RHS);
1600 }
1601 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1602
1603 // Match (icmp unsigned-cond zext, RHS)
1604 // TODO: Extend to handle corresponding sext/signed-cmp case
1605 // TODO: Extend to other invertible functions
1606 Value *LHSOp = nullptr;
1607 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1608 continue;
1609
1610 // In general, we only rotate if we can do so without increasing the number
1611 // of instructions. The exception is when we have an zext(add-rec). The
1612 // reason for allowing this exception is that we know we need to get rid
1613 // of the zext for SCEV to be able to compute a trip count for said loops;
1614 // we consider the new trip count valuable enough to increase instruction
1615 // count by one.
1616 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1617 continue;
1618
1619 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1620 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1621 // when zext is loop varying and RHS is loop invariant. This converts
1622 // loop varying work to loop-invariant work.
1623 auto doRotateTransform = [&]() {
1624 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1625 auto *NewRHS = CastInst::Create(
1626 Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1627 InsertBefore: L->getLoopPreheader()->getTerminator()->getIterator());
1628 // NewRHS is an operation that has been hoisted out of the loop, and
1629 // therefore should have a dropped location.
1630 NewRHS->setDebugLoc(DebugLoc::getDropped());
1631 ICmp->setOperand(i_nocapture: Swapped ? 1 : 0, Val_nocapture: LHSOp);
1632 ICmp->setOperand(i_nocapture: Swapped ? 0 : 1, Val_nocapture: NewRHS);
1633 // Samesign flag cannot be preserved after narrowing the compare.
1634 ICmp->setSameSign(false);
1635 if (LHS->use_empty())
1636 DeadInsts.push_back(Elt: LHS);
1637 };
1638
1639 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1640 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1641 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1642 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1643 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1644 if (FullCR.contains(CR: RHSCR)) {
1645 doRotateTransform();
1646 Changed = true;
1647 // Note, we are leaving SCEV in an unfortunately imprecise case here
1648 // as rotation tends to reveal information about trip counts not
1649 // previously visible.
1650 continue;
1651 }
1652 }
1653
1654 return Changed;
1655}
1656
1657bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1658 SmallVector<BasicBlock*, 16> ExitingBlocks;
1659 L->getExitingBlocks(ExitingBlocks);
1660
1661 // Remove all exits which aren't both rewriteable and execute on every
1662 // iteration.
1663 llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1664 // If our exitting block exits multiple loops, we can only rewrite the
1665 // innermost one. Otherwise, we're changing how many times the innermost
1666 // loop runs before it exits.
1667 if (LI->getLoopFor(BB: ExitingBB) != L)
1668 return true;
1669
1670 // Can't rewrite non-branch yet.
1671 CondBrInst *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1672 if (!BI)
1673 return true;
1674
1675 // Likewise, the loop latch must be dominated by the exiting BB.
1676 if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1677 return true;
1678
1679 if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1680 // If already constant, nothing to do. However, if this is an
1681 // unconditional exit, we can still replace header phis with their
1682 // preheader value.
1683 if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1684 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1685 return true;
1686 }
1687
1688 return false;
1689 });
1690
1691 if (ExitingBlocks.empty())
1692 return false;
1693
1694 // Get a symbolic upper bound on the loop backedge taken count.
1695 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1696 if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1697 return false;
1698
1699 // Visit our exit blocks in order of dominance. We know from the fact that
1700 // all exits must dominate the latch, so there is a total dominance order
1701 // between them.
1702 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1703 // std::sort sorts in ascending order, so we want the inverse of
1704 // the normal dominance relation.
1705 if (A == B) return false;
1706 if (DT->properlyDominates(A, B))
1707 return true;
1708 else {
1709 assert(DT->properlyDominates(B, A) &&
1710 "expected total dominance order!");
1711 return false;
1712 }
1713 });
1714#ifdef ASSERT
1715 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1716 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1717 }
1718#endif
1719
1720 bool Changed = false;
1721 bool SkipLastIter = false;
1722 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1723 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1724 if (SkipLastIter || isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1725 return;
1726 if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1727 CurrMaxExit = MaxExitCount;
1728 else
1729 CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1730 // If the loop has more than 1 iteration, all further checks will be
1731 // executed 1 iteration less.
1732 if (CurrMaxExit == MaxBECount)
1733 SkipLastIter = true;
1734 };
1735 SmallPtrSet<const SCEV *, 8> DominatingExactExitCounts;
1736 for (BasicBlock *ExitingBB : ExitingBlocks) {
1737 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1738 const SCEV *MaxExitCount = SE->getExitCount(
1739 L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1740 if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1741 // Okay, we do not know the exit count here. Can we at least prove that it
1742 // will remain the same within iteration space?
1743 auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
1744 auto OptimizeCond = [&](bool SkipLastIter) {
1745 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1746 MaxIter: MaxBECount, SkipLastIter,
1747 SE, Rewriter, DeadInsts);
1748 };
1749
1750 // TODO: We might have proved that we can skip the last iteration for
1751 // this check. In this case, we only want to check the condition on the
1752 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1753 // corner case:
1754 //
1755 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1756 //
1757 // If we could not prove that len != 0, then we also could not prove that
1758 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1759 // OptimizeCond will likely not prove anything for it, even if it could
1760 // prove the same fact for len.
1761 //
1762 // As a temporary solution, we query both last and pre-last iterations in
1763 // hope that we will be able to prove triviality for at least one of
1764 // them. We can stop querying MaxBECount for this case once SCEV
1765 // understands that (MaxBECount - 1) will not overflow here.
1766 if (OptimizeCond(false))
1767 Changed = true;
1768 else if (SkipLastIter && OptimizeCond(true))
1769 Changed = true;
1770 UpdateSkipLastIter(MaxExitCount);
1771 continue;
1772 }
1773
1774 UpdateSkipLastIter(ExactExitCount);
1775
1776 // If we know we'd exit on the first iteration, rewrite the exit to
1777 // reflect this. This does not imply the loop must exit through this
1778 // exit; there may be an earlier one taken on the first iteration.
1779 // We know that the backedge can't be taken, so we replace all
1780 // the header PHIs with values coming from the preheader.
1781 if (ExactExitCount->isZero()) {
1782 foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1783 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1784 Changed = true;
1785 continue;
1786 }
1787
1788 assert(ExactExitCount->getType()->isIntegerTy() &&
1789 MaxBECount->getType()->isIntegerTy() &&
1790 "Exit counts must be integers");
1791
1792 Type *WiderType =
1793 SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1794 ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1795 MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1796 assert(MaxBECount->getType() == ExactExitCount->getType());
1797
1798 // Can we prove that some other exit must be taken strictly before this
1799 // one?
1800 if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1801 RHS: ExactExitCount)) {
1802 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1803 Changed = true;
1804 continue;
1805 }
1806
1807 // As we run, keep track of which exit counts we've encountered. If we
1808 // find a duplicate, we've found an exit which would have exited on the
1809 // exiting iteration, but (from the visit order) strictly follows another
1810 // which does the same and is thus dead.
1811 if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1812 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1813 Changed = true;
1814 continue;
1815 }
1816
1817 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1818 // here. If we kept track of the min of dominanting exits so far, we could
1819 // discharge exits with EC >= MDEC. This is less powerful than the existing
1820 // transform (since later exits aren't considered), but potentially more
1821 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1822 // or a >u b. Such a case is not currently known.
1823 }
1824 return Changed;
1825}
1826
1827static bool crashingBBWithoutEffect(const BasicBlock &BB) {
1828 return llvm::all_of(Range: BB, P: [](const Instruction &I) {
1829 // TODO: for now this is overly restrictive, to make sure nothing in this
1830 // BB can depend on the loop body.
1831 // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
1832 // load does not have a side effect, but we could have
1833 // %a = load ptr, ptr %ptr
1834 // %b = load i32, ptr %a
1835 // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
1836 // dereference by skipping over loop iterations.
1837 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
1838 if (CB->onlyAccessesInaccessibleMemory())
1839 return true;
1840 }
1841 return isa<UnreachableInst>(Val: I);
1842 });
1843}
1844
1845bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1846 SmallVector<BasicBlock*, 16> ExitingBlocks;
1847 L->getExitingBlocks(ExitingBlocks);
1848
1849 // Finally, see if we can rewrite our exit conditions into a loop invariant
1850 // form. If we have a read-only loop, and we can tell that we must exit down
1851 // a path which does not need any of the values computed within the loop, we
1852 // can rewrite the loop to exit on the first iteration. Note that this
1853 // doesn't either a) tell us the loop exits on the first iteration (unless
1854 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1855 // This transformation looks a lot like a restricted form of dead loop
1856 // elimination, but restricted to read-only loops and without neccesssarily
1857 // needing to kill the loop entirely.
1858 if (!LoopPredication)
1859 return false;
1860
1861 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1862 // through *explicit* control flow. We have to eliminate the possibility of
1863 // implicit exits (see below) before we know it's truly exact.
1864 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1865 if (isa<SCEVCouldNotCompute>(Val: ExactBTC) || !Rewriter.isSafeToExpand(S: ExactBTC))
1866 return false;
1867
1868 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1869 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1870
1871 auto BadExit = [&](BasicBlock *ExitingBB) {
1872 // If our exiting block exits multiple loops, we can only rewrite the
1873 // innermost one. Otherwise, we're changing how many times the innermost
1874 // loop runs before it exits.
1875 if (LI->getLoopFor(BB: ExitingBB) != L)
1876 return true;
1877
1878 // Can't rewrite non-branch yet.
1879 CondBrInst *BI = dyn_cast<CondBrInst>(Val: ExitingBB->getTerminator());
1880 if (!BI)
1881 return true;
1882
1883 // If already constant, nothing to do.
1884 if (isa<Constant>(Val: BI->getCondition()))
1885 return true;
1886
1887 // If the exit block has phis, we need to be able to compute the values
1888 // within the loop which contains them. This assumes trivially lcssa phis
1889 // have already been removed; TODO: generalize
1890 BasicBlock *ExitBlock =
1891 BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: 0)) ? 1 : 0);
1892 if (!ExitBlock->phis().empty())
1893 return true;
1894
1895 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1896 if (isa<SCEVCouldNotCompute>(Val: ExitCount) ||
1897 !Rewriter.isSafeToExpand(S: ExitCount))
1898 return true;
1899
1900 assert(SE->isLoopInvariant(ExitCount, L) &&
1901 "Exit count must be loop invariant");
1902 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1903 return false;
1904 };
1905
1906 // Make sure all exits dominate the latch. This means there is a linear chain
1907 // of exits. We check this before sorting so we have a total order.
1908 BasicBlock *Latch = L->getLoopLatch();
1909 for (BasicBlock *ExitingBB : ExitingBlocks)
1910 if (!DT->dominates(A: ExitingBB, B: Latch))
1911 return false;
1912
1913 // If we have any exits which can't be predicated themselves, than we can't
1914 // predicate any exit which isn't guaranteed to execute before it. Consider
1915 // two exits (a) and (b) which would both exit on the same iteration. If we
1916 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1917 // we could convert a loop from exiting through (a) to one exiting through
1918 // (b). Note that this problem exists only for exits with the same exit
1919 // count, and we could be more aggressive when exit counts are known inequal.
1920 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1921 // llvm::sort sorts in ascending order, so we want the inverse of
1922 // the normal dominance relation.
1923 if (A == B)
1924 return false;
1925 if (DT->properlyDominates(A, B))
1926 return true;
1927 if (DT->properlyDominates(A: B, B: A))
1928 return false;
1929 llvm_unreachable("Should have total dominance order");
1930 });
1931
1932 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1933 // exits before the backedge).
1934 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1935 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1936 "Not sorted by dominance");
1937
1938 // Given our sorted total order, we know that exit[j] must be evaluated
1939 // after all exit[i] such j > i.
1940 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1941 if (BadExit(ExitingBlocks[i])) {
1942 ExitingBlocks.resize(N: i);
1943 break;
1944 }
1945
1946 if (ExitingBlocks.empty())
1947 return false;
1948
1949 // At this point, ExitingBlocks consists of only those blocks which are
1950 // predicatable. Given that, we know we have at least one exit we can
1951 // predicate if the loop is doesn't have side effects and doesn't have any
1952 // implicit exits (because then our exact BTC isn't actually exact).
1953 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1954 // suggestions on how to improve this? I can obviously bail out for outer
1955 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1956 // is that enough for *all* side effects?
1957 bool HasThreadLocalSideEffects = false;
1958 for (BasicBlock *BB : L->blocks())
1959 for (auto &I : *BB) {
1960 // TODO:isGuaranteedToTransfer
1961 if (I.mayHaveSideEffects()) {
1962 if (!LoopPredicationTraps)
1963 return false;
1964 HasThreadLocalSideEffects = true;
1965 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) {
1966 // Simple stores cannot be observed by other threads.
1967 // If HasThreadLocalSideEffects is set, we check
1968 // crashingBBWithoutEffect to make sure that the crashing BB cannot
1969 // observe them either.
1970 if (!SI->isSimple())
1971 return false;
1972 } else {
1973 return false;
1974 }
1975 }
1976
1977 // Skip if the loop has tokens referenced outside the loop to avoid
1978 // changing convergence behavior.
1979 if (I.getType()->isTokenTy()) {
1980 for (User *U : I.users()) {
1981 Instruction *UserInst = dyn_cast<Instruction>(Val: U);
1982 if (UserInst && !L->contains(Inst: UserInst)) {
1983 return false;
1984 }
1985 }
1986 }
1987 }
1988
1989 bool Changed = false;
1990 // Finally, do the actual predication for all predicatable blocks. A couple
1991 // of notes here:
1992 // 1) We don't bother to constant fold dominated exits with identical exit
1993 // counts; that's simply a form of CSE/equality propagation and we leave
1994 // it for dedicated passes.
1995 // 2) We insert the comparison at the branch. Hoisting introduces additional
1996 // legality constraints and we leave that to dedicated logic. We want to
1997 // predicate even if we can't insert a loop invariant expression as
1998 // peeling or unrolling will likely reduce the cost of the otherwise loop
1999 // varying check.
2000 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2001 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2002 Value *ExactBTCV = nullptr; // Lazily generated if needed.
2003 for (BasicBlock *ExitingBB : ExitingBlocks) {
2004 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2005
2006 auto *BI = cast<CondBrInst>(Val: ExitingBB->getTerminator());
2007 if (HasThreadLocalSideEffects) {
2008 const BasicBlock *Unreachable = nullptr;
2009 for (const BasicBlock *Succ : BI->successors()) {
2010 if (isa<UnreachableInst>(Val: Succ->getTerminator()))
2011 Unreachable = Succ;
2012 }
2013 // Exit BB which have one branch back into the loop and another one to
2014 // a trap can still be optimized, because local side effects cannot
2015 // be observed in the exit case (the trap). We could be smarter about
2016 // this, but for now lets pattern match common cases that directly trap.
2017 if (Unreachable == nullptr || !crashingBBWithoutEffect(BB: *Unreachable))
2018 return Changed;
2019 }
2020 Value *NewCond;
2021 if (ExitCount == ExactBTC) {
2022 NewCond = L->contains(BB: BI->getSuccessor(i: 0)) ?
2023 B.getFalse() : B.getTrue();
2024 } else {
2025 Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
2026 if (!ExactBTCV)
2027 ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
2028 Value *RHS = ExactBTCV;
2029 if (ECV->getType() != RHS->getType()) {
2030 Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
2031 ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
2032 RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
2033 }
2034 auto Pred = L->contains(BB: BI->getSuccessor(i: 0)) ?
2035 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2036 NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
2037 }
2038 Value *OldCond = BI->getCondition();
2039 BI->setCondition(NewCond);
2040 if (OldCond->use_empty())
2041 DeadInsts.emplace_back(Args&: OldCond);
2042 Changed = true;
2043 RunUnswitching = true;
2044 }
2045
2046 return Changed;
2047}
2048
2049//===----------------------------------------------------------------------===//
2050// IndVarSimplify driver. Manage several subpasses of IV simplification.
2051//===----------------------------------------------------------------------===//
2052
2053bool IndVarSimplify::run(Loop *L) {
2054 // We need (and expect!) the incoming loop to be in LCSSA.
2055 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2056 "LCSSA required to run indvars!");
2057
2058 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2059 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2060 // canonicalization can be a pessimization without LSR to "clean up"
2061 // afterwards.
2062 // - We depend on having a preheader; in particular,
2063 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2064 // and we're in trouble if we can't find the induction variable even when
2065 // we've manually inserted one.
2066 // - LFTR relies on having a single backedge.
2067 if (!L->isLoopSimplifyForm())
2068 return false;
2069
2070 bool Changed = false;
2071 // If there are any floating-point recurrences, attempt to
2072 // transform them to use integer recurrences.
2073 Changed |= rewriteNonIntegerIVs(L);
2074
2075 // Create a rewriter object which we'll use to transform the code with.
2076 SCEVExpander Rewriter(*SE, "indvars");
2077#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2078 Rewriter.setDebugType(DEBUG_TYPE);
2079#endif
2080
2081 // Eliminate redundant IV users.
2082 //
2083 // Simplification works best when run before other consumers of SCEV. We
2084 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2085 // other expressions involving loop IVs have been evaluated. This helps SCEV
2086 // set no-wrap flags before normalizing sign/zero extension.
2087 Rewriter.disableCanonicalMode();
2088 Changed |= simplifyAndExtend(L, Rewriter, LI);
2089
2090 // Check to see if we can compute the final value of any expressions
2091 // that are recurrent in the loop, and substitute the exit values from the
2092 // loop into any instructions outside of the loop that use the final values
2093 // of the current expressions.
2094 if (ReplaceExitValue != NeverRepl) {
2095 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2096 ReplaceExitValue, DeadInsts)) {
2097 NumReplaced += Rewrites;
2098 Changed = true;
2099 }
2100 }
2101
2102 // Eliminate redundant IV cycles.
2103 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2104
2105 // Try to convert exit conditions to unsigned and rotate computation
2106 // out of the loop. Note: Handles invalidation internally if needed.
2107 Changed |= canonicalizeExitCondition(L);
2108
2109 // Try to eliminate loop exits based on analyzeable exit counts
2110 if (optimizeLoopExits(L, Rewriter)) {
2111 Changed = true;
2112 // Given we've changed exit counts, notify SCEV
2113 // Some nested loops may share same folded exit basic block,
2114 // thus we need to notify top most loop.
2115 SE->forgetTopmostLoop(L);
2116 }
2117
2118 // Try to form loop invariant tests for loop exits by changing how many
2119 // iterations of the loop run when that is unobservable.
2120 if (predicateLoopExits(L, Rewriter)) {
2121 Changed = true;
2122 // Given we've changed exit counts, notify SCEV
2123 SE->forgetLoop(L);
2124 }
2125
2126 // If we have a trip count expression, rewrite the loop's exit condition
2127 // using it.
2128 if (!DisableLFTR) {
2129 BasicBlock *PreHeader = L->getLoopPreheader();
2130
2131 SmallVector<BasicBlock*, 16> ExitingBlocks;
2132 L->getExitingBlocks(ExitingBlocks);
2133 for (BasicBlock *ExitingBB : ExitingBlocks) {
2134 // Can't rewrite non-branch yet.
2135 if (!isa<CondBrInst>(Val: ExitingBB->getTerminator()))
2136 continue;
2137
2138 // If our exitting block exits multiple loops, we can only rewrite the
2139 // innermost one. Otherwise, we're changing how many times the innermost
2140 // loop runs before it exits.
2141 if (LI->getLoopFor(BB: ExitingBB) != L)
2142 continue;
2143
2144 if (!needsLFTR(L, ExitingBB))
2145 continue;
2146
2147 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
2148 if (isa<SCEVCouldNotCompute>(Val: ExitCount))
2149 continue;
2150
2151 // This was handled above, but as we form SCEVs, we can sometimes refine
2152 // existing ones; this allows exit counts to be folded to zero which
2153 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2154 // until stable to handle cases like this better.
2155 if (ExitCount->isZero())
2156 continue;
2157
2158 PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
2159 if (!IndVar)
2160 continue;
2161
2162 // Avoid high cost expansions. Note: This heuristic is questionable in
2163 // that our definition of "high cost" is not exactly principled.
2164 if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
2165 TTI, At: PreHeader->getTerminator()))
2166 continue;
2167
2168 if (!Rewriter.isSafeToExpand(S: ExitCount))
2169 continue;
2170
2171 Changed |= linearFunctionTestReplace(L, ExitingBB,
2172 ExitCount, IndVar,
2173 Rewriter);
2174 }
2175 }
2176 // Clear the rewriter cache, because values that are in the rewriter's cache
2177 // can be deleted in the loop below, causing the AssertingVH in the cache to
2178 // trigger.
2179 Rewriter.clear();
2180
2181 // Now that we're done iterating through lists, clean up any instructions
2182 // which are now dead.
2183 while (!DeadInsts.empty()) {
2184 Value *V = DeadInsts.pop_back_val();
2185
2186 if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2187 Changed |= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2188 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2189 Changed |=
2190 RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2191 }
2192
2193 // The Rewriter may not be used from this point on.
2194
2195 // Loop-invariant instructions in the preheader that aren't used in the
2196 // loop may be sunk below the loop to reduce register pressure.
2197 Changed |= sinkUnusedInvariants(L);
2198
2199 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2200 // trip count and therefore can further simplify exit values in addition to
2201 // rewriteLoopExitValues.
2202 Changed |= rewriteFirstIterationLoopExitValues(L);
2203
2204 // Clean up dead instructions.
2205 Changed |= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2206
2207 // Check a post-condition.
2208 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2209 "Indvars did not preserve LCSSA!");
2210 if (VerifyMemorySSA && MSSAU)
2211 MSSAU->getMemorySSA()->verifyMemorySSA();
2212
2213 return Changed;
2214}
2215
2216PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2217 LoopStandardAnalysisResults &AR,
2218 LPMUpdater &) {
2219 Function *F = L.getHeader()->getParent();
2220 const DataLayout &DL = F->getDataLayout();
2221
2222 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2223 WidenIndVars && AllowIVWidening);
2224 if (!IVS.run(L: &L))
2225 return PreservedAnalyses::all();
2226
2227 auto PA = getLoopPassPreservedAnalyses();
2228 PA.preserveSet<CFGAnalyses>();
2229 if (IVS.runUnswitching()) {
2230 AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(IR&: L, ExtraArgs&: AR);
2231 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2232 }
2233
2234 if (AR.MSSA)
2235 PA.preserve<MemorySSAAnalysis>();
2236 return PA;
2237}
2238