1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15// 1. The exit condition for the loop is canonicalized to compare the
16// induction value against the exit value. This turns loops like:
17// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18// 2. Any use outside of the loop of an expression derived from the indvar
19// is changed to compute the derived value outside of the loop, eliminating
20// the dependence on the exit value of the induction variable. If the only
21// purpose of the loop is to compute the exit value of some derived
22// expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/iterator_range.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/LoopPass.h"
37#include "llvm/Analysis/MemorySSA.h"
38#include "llvm/Analysis/MemorySSAUpdater.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
42#include "llvm/Analysis/TargetLibraryInfo.h"
43#include "llvm/Analysis/TargetTransformInfo.h"
44#include "llvm/Analysis/ValueTracking.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/Constant.h"
47#include "llvm/IR/ConstantRange.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DerivedTypes.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/IRBuilder.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PatternMatch.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CommandLine.h"
67#include "llvm/Support/Debug.h"
68#include "llvm/Support/MathExtras.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71#include "llvm/Transforms/Utils/BasicBlockUtils.h"
72#include "llvm/Transforms/Utils/Local.h"
73#include "llvm/Transforms/Utils/LoopUtils.h"
74#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75#include "llvm/Transforms/Utils/SimplifyIndVar.h"
76#include <cassert>
77#include <cstdint>
78#include <utility>
79
80using namespace llvm;
81using namespace PatternMatch;
82using namespace SCEVPatternMatch;
83
84#define DEBUG_TYPE "indvars"
85
86STATISTIC(NumWidened , "Number of indvars widened");
87STATISTIC(NumReplaced , "Number of exit values replaced");
88STATISTIC(NumLFTR , "Number of loop exit tests replaced");
89STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
90STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
91
92static cl::opt<ReplaceExitVal> ReplaceExitValue(
93 "replexitval", cl::Hidden, cl::init(Val: OnlyCheapRepl),
94 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95 cl::values(
96 clEnumValN(NeverRepl, "never", "never replace exit value"),
97 clEnumValN(OnlyCheapRepl, "cheap",
98 "only replace exit value when the cost is cheap"),
99 clEnumValN(
100 UnusedIndVarInLoop, "unusedindvarinloop",
101 "only replace exit value when it is an unused "
102 "induction variable in the loop and has cheap replacement cost"),
103 clEnumValN(NoHardUse, "noharduse",
104 "only replace exit values when loop def likely dead"),
105 clEnumValN(AlwaysRepl, "always",
106 "always replace exit value whenever possible")));
107
108static cl::opt<bool> UsePostIncrementRanges(
109 "indvars-post-increment-ranges", cl::Hidden,
110 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111 cl::init(Val: true));
112
113static cl::opt<bool>
114DisableLFTR("disable-lftr", cl::Hidden, cl::init(Val: false),
115 cl::desc("Disable Linear Function Test Replace optimization"));
116
117static cl::opt<bool>
118LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(Val: true),
119 cl::desc("Predicate conditions in read only loops"));
120
121static cl::opt<bool>
122AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(Val: true),
123 cl::desc("Allow widening of indvars to eliminate s/zext"));
124
125namespace {
126
127class IndVarSimplify {
128 LoopInfo *LI;
129 ScalarEvolution *SE;
130 DominatorTree *DT;
131 const DataLayout &DL;
132 TargetLibraryInfo *TLI;
133 const TargetTransformInfo *TTI;
134 std::unique_ptr<MemorySSAUpdater> MSSAU;
135
136 SmallVector<WeakTrackingVH, 16> DeadInsts;
137 bool WidenIndVars;
138
139 bool RunUnswitching = false;
140
141 bool handleFloatingPointIV(Loop *L, PHINode *PH);
142 bool rewriteNonIntegerIVs(Loop *L);
143
144 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
145 /// Try to improve our exit conditions by converting condition from signed
146 /// to unsigned or rotating computation out of the loop.
147 /// (See inline comment about why this is duplicated from simplifyAndExtend)
148 bool canonicalizeExitCondition(Loop *L);
149 /// Try to eliminate loop exits based on analyzeable exit counts
150 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
151 /// Try to form loop invariant tests for loop exits by changing how many
152 /// iterations of the loop run when that is unobservable.
153 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
154
155 bool rewriteFirstIterationLoopExitValues(Loop *L);
156
157 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
158 const SCEV *ExitCount,
159 PHINode *IndVar, SCEVExpander &Rewriter);
160
161 bool sinkUnusedInvariants(Loop *L);
162
163public:
164 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
165 const DataLayout &DL, TargetLibraryInfo *TLI,
166 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
167 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
168 WidenIndVars(WidenIndVars) {
169 if (MSSA)
170 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
171 }
172
173 bool run(Loop *L);
174
175 bool runUnswitching() const { return RunUnswitching; }
176};
177
178} // end anonymous namespace
179
180//===----------------------------------------------------------------------===//
181// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182//===----------------------------------------------------------------------===//
183
184/// Convert APF to an integer, if possible.
185static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186 bool isExact = false;
187 // See if we can convert this to an int64_t
188 uint64_t UIntVal;
189 if (APF.convertToInteger(Input: MutableArrayRef(UIntVal), Width: 64, IsSigned: true,
190 RM: APFloat::rmTowardZero, IsExact: &isExact) != APFloat::opOK ||
191 !isExact)
192 return false;
193 IntVal = UIntVal;
194 return true;
195}
196
197/// If the loop has floating induction variable then insert corresponding
198/// integer induction variable if possible.
199/// For example,
200/// for(double i = 0; i < 10000; ++i)
201/// bar(i)
202/// is converted into
203/// for(int i = 0; i < 10000; ++i)
204/// bar((double)i);
205bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
206 unsigned IncomingEdge = L->contains(BB: PN->getIncomingBlock(i: 0));
207 unsigned BackEdge = IncomingEdge^1;
208
209 // Check incoming value.
210 auto *InitValueVal = dyn_cast<ConstantFP>(Val: PN->getIncomingValue(i: IncomingEdge));
211
212 int64_t InitValue;
213 if (!InitValueVal || !ConvertToSInt(APF: InitValueVal->getValueAPF(), IntVal&: InitValue))
214 return false;
215
216 // Check IV increment. Reject this PN if increment operation is not
217 // an add or increment value can not be represented by an integer.
218 auto *Incr = dyn_cast<BinaryOperator>(Val: PN->getIncomingValue(i: BackEdge));
219 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
220
221 // If this is not an add of the PHI with a constantfp, or if the constant fp
222 // is not an integer, bail out.
223 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Val: Incr->getOperand(i_nocapture: 1));
224 int64_t IncValue;
225 if (IncValueVal == nullptr || Incr->getOperand(i_nocapture: 0) != PN ||
226 !ConvertToSInt(APF: IncValueVal->getValueAPF(), IntVal&: IncValue))
227 return false;
228
229 // Check Incr uses. One user is PN and the other user is an exit condition
230 // used by the conditional terminator.
231 Value::user_iterator IncrUse = Incr->user_begin();
232 Instruction *U1 = cast<Instruction>(Val: *IncrUse++);
233 if (IncrUse == Incr->user_end()) return false;
234 Instruction *U2 = cast<Instruction>(Val: *IncrUse++);
235 if (IncrUse != Incr->user_end()) return false;
236
237 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
238 // only used by a branch, we can't transform it.
239 FCmpInst *Compare = dyn_cast<FCmpInst>(Val: U1);
240 if (!Compare)
241 Compare = dyn_cast<FCmpInst>(Val: U2);
242 if (!Compare || !Compare->hasOneUse() ||
243 !isa<BranchInst>(Val: Compare->user_back()))
244 return false;
245
246 BranchInst *TheBr = cast<BranchInst>(Val: Compare->user_back());
247
248 // We need to verify that the branch actually controls the iteration count
249 // of the loop. If not, the new IV can overflow and no one will notice.
250 // The branch block must be in the loop and one of the successors must be out
251 // of the loop.
252 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253 if (!L->contains(BB: TheBr->getParent()) ||
254 (L->contains(BB: TheBr->getSuccessor(i: 0)) &&
255 L->contains(BB: TheBr->getSuccessor(i: 1))))
256 return false;
257
258 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
259 // transform it.
260 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Val: Compare->getOperand(i_nocapture: 1));
261 int64_t ExitValue;
262 if (ExitValueVal == nullptr ||
263 !ConvertToSInt(APF: ExitValueVal->getValueAPF(), IntVal&: ExitValue))
264 return false;
265
266 // Find new predicate for integer comparison.
267 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
268 switch (Compare->getPredicate()) {
269 default: return false; // Unknown comparison.
270 case CmpInst::FCMP_OEQ:
271 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272 case CmpInst::FCMP_ONE:
273 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274 case CmpInst::FCMP_OGT:
275 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276 case CmpInst::FCMP_OGE:
277 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278 case CmpInst::FCMP_OLT:
279 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280 case CmpInst::FCMP_OLE:
281 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282 }
283
284 // We convert the floating point induction variable to a signed i32 value if
285 // we can. This is only safe if the comparison will not overflow in a way
286 // that won't be trapped by the integer equivalent operations. Check for this
287 // now.
288 // TODO: We could use i64 if it is native and the range requires it.
289
290 // The start/stride/exit values must all fit in signed i32.
291 if (!isInt<32>(x: InitValue) || !isInt<32>(x: IncValue) || !isInt<32>(x: ExitValue))
292 return false;
293
294 // If not actually striding (add x, 0.0), avoid touching the code.
295 if (IncValue == 0)
296 return false;
297
298 // Positive and negative strides have different safety conditions.
299 if (IncValue > 0) {
300 // If we have a positive stride, we require the init to be less than the
301 // exit value.
302 if (InitValue >= ExitValue)
303 return false;
304
305 uint32_t Range = uint32_t(ExitValue-InitValue);
306 // Check for infinite loop, either:
307 // while (i <= Exit) or until (i > Exit)
308 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
309 if (++Range == 0) return false; // Range overflows.
310 }
311
312 unsigned Leftover = Range % uint32_t(IncValue);
313
314 // If this is an equality comparison, we require that the strided value
315 // exactly land on the exit value, otherwise the IV condition will wrap
316 // around and do things the fp IV wouldn't.
317 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
318 Leftover != 0)
319 return false;
320
321 // If the stride would wrap around the i32 before exiting, we can't
322 // transform the IV.
323 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
324 return false;
325 } else {
326 // If we have a negative stride, we require the init to be greater than the
327 // exit value.
328 if (InitValue <= ExitValue)
329 return false;
330
331 uint32_t Range = uint32_t(InitValue-ExitValue);
332 // Check for infinite loop, either:
333 // while (i >= Exit) or until (i < Exit)
334 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
335 if (++Range == 0) return false; // Range overflows.
336 }
337
338 unsigned Leftover = Range % uint32_t(-IncValue);
339
340 // If this is an equality comparison, we require that the strided value
341 // exactly land on the exit value, otherwise the IV condition will wrap
342 // around and do things the fp IV wouldn't.
343 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
344 Leftover != 0)
345 return false;
346
347 // If the stride would wrap around the i32 before exiting, we can't
348 // transform the IV.
349 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
350 return false;
351 }
352
353 IntegerType *Int32Ty = Type::getInt32Ty(C&: PN->getContext());
354
355 // Insert new integer induction variable.
356 PHINode *NewPHI =
357 PHINode::Create(Ty: Int32Ty, NumReservedValues: 2, NameStr: PN->getName() + ".int", InsertBefore: PN->getIterator());
358 NewPHI->addIncoming(V: ConstantInt::getSigned(Ty: Int32Ty, V: InitValue),
359 BB: PN->getIncomingBlock(i: IncomingEdge));
360 NewPHI->setDebugLoc(PN->getDebugLoc());
361
362 Instruction *NewAdd = BinaryOperator::CreateAdd(
363 V1: NewPHI, V2: ConstantInt::getSigned(Ty: Int32Ty, V: IncValue),
364 Name: Incr->getName() + ".int", InsertBefore: Incr->getIterator());
365 NewAdd->setDebugLoc(Incr->getDebugLoc());
366 NewPHI->addIncoming(V: NewAdd, BB: PN->getIncomingBlock(i: BackEdge));
367
368 ICmpInst *NewCompare = new ICmpInst(
369 TheBr->getIterator(), NewPred, NewAdd,
370 ConstantInt::getSigned(Ty: Int32Ty, V: ExitValue), Compare->getName());
371 NewCompare->setDebugLoc(Compare->getDebugLoc());
372
373 // In the following deletions, PN may become dead and may be deleted.
374 // Use a WeakTrackingVH to observe whether this happens.
375 WeakTrackingVH WeakPH = PN;
376
377 // Delete the old floating point exit comparison. The branch starts using the
378 // new comparison.
379 NewCompare->takeName(V: Compare);
380 Compare->replaceAllUsesWith(V: NewCompare);
381 RecursivelyDeleteTriviallyDeadInstructions(V: Compare, TLI, MSSAU: MSSAU.get());
382
383 // Delete the old floating point increment.
384 Incr->replaceAllUsesWith(V: PoisonValue::get(T: Incr->getType()));
385 RecursivelyDeleteTriviallyDeadInstructions(V: Incr, TLI, MSSAU: MSSAU.get());
386
387 // If the FP induction variable still has uses, this is because something else
388 // in the loop uses its value. In order to canonicalize the induction
389 // variable, we chose to eliminate the IV and rewrite it in terms of an
390 // int->fp cast.
391 //
392 // We give preference to sitofp over uitofp because it is faster on most
393 // platforms.
394 if (WeakPH) {
395 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
396 PN->getParent()->getFirstInsertionPt());
397 Conv->setDebugLoc(PN->getDebugLoc());
398 PN->replaceAllUsesWith(V: Conv);
399 RecursivelyDeleteTriviallyDeadInstructions(V: PN, TLI, MSSAU: MSSAU.get());
400 }
401 return true;
402}
403
404bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405 // First step. Check to see if there are any floating-point recurrences.
406 // If there are, change them into integer recurrences, permitting analysis by
407 // the SCEV routines.
408 BasicBlock *Header = L->getHeader();
409
410 SmallVector<WeakTrackingVH, 8> PHIs(llvm::make_pointer_range(Range: Header->phis()));
411
412 bool Changed = false;
413 for (WeakTrackingVH &PHI : PHIs)
414 if (PHINode *PN = dyn_cast_or_null<PHINode>(Val: &*PHI))
415 Changed |= handleFloatingPointIV(L, PN);
416
417 // If the loop previously had floating-point IV, ScalarEvolution
418 // may not have been able to compute a trip count. Now that we've done some
419 // re-writing, the trip count may be computable.
420 if (Changed)
421 SE->forgetLoop(L);
422 return Changed;
423}
424
425//===---------------------------------------------------------------------===//
426// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
427// they will exit at the first iteration.
428//===---------------------------------------------------------------------===//
429
430/// Check to see if this loop has loop invariant conditions which lead to loop
431/// exits. If so, we know that if the exit path is taken, it is at the first
432/// loop iteration. This lets us predict exit values of PHI nodes that live in
433/// loop header.
434bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
435 // Verify the input to the pass is already in LCSSA form.
436 assert(L->isLCSSAForm(*DT));
437
438 SmallVector<BasicBlock *, 8> ExitBlocks;
439 L->getUniqueExitBlocks(ExitBlocks);
440
441 bool MadeAnyChanges = false;
442 for (auto *ExitBB : ExitBlocks) {
443 // If there are no more PHI nodes in this exit block, then no more
444 // values defined inside the loop are used on this path.
445 for (PHINode &PN : ExitBB->phis()) {
446 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
447 IncomingValIdx != E; ++IncomingValIdx) {
448 auto *IncomingBB = PN.getIncomingBlock(i: IncomingValIdx);
449
450 // Can we prove that the exit must run on the first iteration if it
451 // runs at all? (i.e. early exits are fine for our purposes, but
452 // traces which lead to this exit being taken on the 2nd iteration
453 // aren't.) Note that this is about whether the exit branch is
454 // executed, not about whether it is taken.
455 if (!L->getLoopLatch() ||
456 !DT->dominates(A: IncomingBB, B: L->getLoopLatch()))
457 continue;
458
459 // Get condition that leads to the exit path.
460 auto *TermInst = IncomingBB->getTerminator();
461
462 Value *Cond = nullptr;
463 if (auto *BI = dyn_cast<BranchInst>(Val: TermInst)) {
464 // Must be a conditional branch, otherwise the block
465 // should not be in the loop.
466 Cond = BI->getCondition();
467 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TermInst))
468 Cond = SI->getCondition();
469 else
470 continue;
471
472 if (!L->isLoopInvariant(V: Cond))
473 continue;
474
475 auto *ExitVal = dyn_cast<PHINode>(Val: PN.getIncomingValue(i: IncomingValIdx));
476
477 // Only deal with PHIs in the loop header.
478 if (!ExitVal || ExitVal->getParent() != L->getHeader())
479 continue;
480
481 // If ExitVal is a PHI on the loop header, then we know its
482 // value along this exit because the exit can only be taken
483 // on the first iteration.
484 auto *LoopPreheader = L->getLoopPreheader();
485 assert(LoopPreheader && "Invalid loop");
486 int PreheaderIdx = ExitVal->getBasicBlockIndex(BB: LoopPreheader);
487 if (PreheaderIdx != -1) {
488 assert(ExitVal->getParent() == L->getHeader() &&
489 "ExitVal must be in loop header");
490 MadeAnyChanges = true;
491 PN.setIncomingValue(i: IncomingValIdx,
492 V: ExitVal->getIncomingValue(i: PreheaderIdx));
493 SE->forgetValue(V: &PN);
494 }
495 }
496 }
497 }
498 return MadeAnyChanges;
499}
500
501//===----------------------------------------------------------------------===//
502// IV Widening - Extend the width of an IV to cover its widest uses.
503//===----------------------------------------------------------------------===//
504
505/// Update information about the induction variable that is extended by this
506/// sign or zero extend operation. This is used to determine the final width of
507/// the IV before actually widening it.
508static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
509 ScalarEvolution *SE,
510 const TargetTransformInfo *TTI) {
511 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
512 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
513 return;
514
515 Type *Ty = Cast->getType();
516 uint64_t Width = SE->getTypeSizeInBits(Ty);
517 if (!Cast->getDataLayout().isLegalInteger(Width))
518 return;
519
520 // Check that `Cast` actually extends the induction variable (we rely on this
521 // later). This takes care of cases where `Cast` is extending a truncation of
522 // the narrow induction variable, and thus can end up being narrower than the
523 // "narrow" induction variable.
524 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(Ty: WI.NarrowIV->getType());
525 if (NarrowIVWidth >= Width)
526 return;
527
528 // Cast is either an sext or zext up to this point.
529 // We should not widen an indvar if arithmetics on the wider indvar are more
530 // expensive than those on the narrower indvar. We check only the cost of ADD
531 // because at least an ADD is required to increment the induction variable. We
532 // could compute more comprehensively the cost of all instructions on the
533 // induction variable when necessary.
534 if (TTI &&
535 TTI->getArithmeticInstrCost(Opcode: Instruction::Add, Ty) >
536 TTI->getArithmeticInstrCost(Opcode: Instruction::Add,
537 Ty: Cast->getOperand(i_nocapture: 0)->getType())) {
538 return;
539 }
540
541 if (!WI.WidestNativeType ||
542 Width > SE->getTypeSizeInBits(Ty: WI.WidestNativeType)) {
543 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
544 WI.IsSigned = IsSigned;
545 return;
546 }
547
548 // We extend the IV to satisfy the sign of its user(s), or 'signed'
549 // if there are multiple users with both sign- and zero extensions,
550 // in order not to introduce nondeterministic behaviour based on the
551 // unspecified order of a PHI nodes' users-iterator.
552 WI.IsSigned |= IsSigned;
553}
554
555//===----------------------------------------------------------------------===//
556// Live IV Reduction - Minimize IVs live across the loop.
557//===----------------------------------------------------------------------===//
558
559//===----------------------------------------------------------------------===//
560// Simplification of IV users based on SCEV evaluation.
561//===----------------------------------------------------------------------===//
562
563namespace {
564
565class IndVarSimplifyVisitor : public IVVisitor {
566 ScalarEvolution *SE;
567 const TargetTransformInfo *TTI;
568 PHINode *IVPhi;
569
570public:
571 WideIVInfo WI;
572
573 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
574 const TargetTransformInfo *TTI,
575 const DominatorTree *DTree)
576 : SE(SCEV), TTI(TTI), IVPhi(IV) {
577 DT = DTree;
578 WI.NarrowIV = IVPhi;
579 }
580
581 // Implement the interface used by simplifyUsersOfIV.
582 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
583};
584
585} // end anonymous namespace
586
587/// Iteratively perform simplification on a worklist of IV users. Each
588/// successive simplification may push more users which may themselves be
589/// candidates for simplification.
590///
591/// Sign/Zero extend elimination is interleaved with IV simplification.
592bool IndVarSimplify::simplifyAndExtend(Loop *L,
593 SCEVExpander &Rewriter,
594 LoopInfo *LI) {
595 SmallVector<WideIVInfo, 8> WideIVs;
596
597 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
598 M: L->getBlocks()[0]->getModule(), id: Intrinsic::experimental_guard);
599 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
600
601 SmallVector<PHINode *, 8> LoopPhis(
602 llvm::make_pointer_range(Range: L->getHeader()->phis()));
603
604 // Each round of simplification iterates through the SimplifyIVUsers worklist
605 // for all current phis, then determines whether any IVs can be
606 // widened. Widening adds new phis to LoopPhis, inducing another round of
607 // simplification on the wide IVs.
608 bool Changed = false;
609 while (!LoopPhis.empty()) {
610 // Evaluate as many IV expressions as possible before widening any IVs. This
611 // forces SCEV to set no-wrap flags before evaluating sign/zero
612 // extension. The first time SCEV attempts to normalize sign/zero extension,
613 // the result becomes final. So for the most predictable results, we delay
614 // evaluation of sign/zero extend evaluation until needed, and avoid running
615 // other SCEV based analysis prior to simplifyAndExtend.
616 do {
617 PHINode *CurrIV = LoopPhis.pop_back_val();
618
619 // Information about sign/zero extensions of CurrIV.
620 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
621
622 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, Dead&: DeadInsts,
623 Rewriter, V: &Visitor);
624
625 Changed |= C;
626 RunUnswitching |= U;
627 if (Visitor.WI.WidestNativeType) {
628 WideIVs.push_back(Elt: Visitor.WI);
629 }
630 } while(!LoopPhis.empty());
631
632 // Continue if we disallowed widening.
633 if (!WidenIndVars)
634 continue;
635
636 for (; !WideIVs.empty(); WideIVs.pop_back()) {
637 unsigned ElimExt;
638 unsigned Widened;
639 if (PHINode *WidePhi = createWideIV(WI: WideIVs.back(), LI, SE, Rewriter,
640 DT, DeadInsts, NumElimExt&: ElimExt, NumWidened&: Widened,
641 HasGuards, UsePostIncrementRanges)) {
642 NumElimExt += ElimExt;
643 NumWidened += Widened;
644 Changed = true;
645 LoopPhis.push_back(Elt: WidePhi);
646 }
647 }
648 }
649 return Changed;
650}
651
652//===----------------------------------------------------------------------===//
653// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
654//===----------------------------------------------------------------------===//
655
656/// Given an Value which is hoped to be part of an add recurance in the given
657/// loop, return the associated Phi node if so. Otherwise, return null. Note
658/// that this is less general than SCEVs AddRec checking.
659static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
660 Instruction *IncI = dyn_cast<Instruction>(Val: IncV);
661 if (!IncI)
662 return nullptr;
663
664 switch (IncI->getOpcode()) {
665 case Instruction::Add:
666 case Instruction::Sub:
667 break;
668 case Instruction::GetElementPtr:
669 // An IV counter must preserve its type.
670 if (IncI->getNumOperands() == 2)
671 break;
672 [[fallthrough]];
673 default:
674 return nullptr;
675 }
676
677 PHINode *Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 0));
678 if (Phi && Phi->getParent() == L->getHeader()) {
679 if (L->isLoopInvariant(V: IncI->getOperand(i: 1)))
680 return Phi;
681 return nullptr;
682 }
683 if (IncI->getOpcode() == Instruction::GetElementPtr)
684 return nullptr;
685
686 // Allow add/sub to be commuted.
687 Phi = dyn_cast<PHINode>(Val: IncI->getOperand(i: 1));
688 if (Phi && Phi->getParent() == L->getHeader()) {
689 if (L->isLoopInvariant(V: IncI->getOperand(i: 0)))
690 return Phi;
691 }
692 return nullptr;
693}
694
695/// Whether the current loop exit test is based on this value. Currently this
696/// is limited to a direct use in the loop condition.
697static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
698 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
699 ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
700 // TODO: Allow non-icmp loop test.
701 if (!ICmp)
702 return false;
703
704 // TODO: Allow indirect use.
705 return ICmp->getOperand(i_nocapture: 0) == V || ICmp->getOperand(i_nocapture: 1) == V;
706}
707
708/// linearFunctionTestReplace policy. Return true unless we can show that the
709/// current exit test is already sufficiently canonical.
710static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
711 assert(L->getLoopLatch() && "Must be in simplified form");
712
713 // Avoid converting a constant or loop invariant test back to a runtime
714 // test. This is critical for when SCEV's cached ExitCount is less precise
715 // than the current IR (such as after we've proven a particular exit is
716 // actually dead and thus the BE count never reaches our ExitCount.)
717 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
718 if (L->isLoopInvariant(V: BI->getCondition()))
719 return false;
720
721 // Do LFTR to simplify the exit condition to an ICMP.
722 ICmpInst *Cond = dyn_cast<ICmpInst>(Val: BI->getCondition());
723 if (!Cond)
724 return true;
725
726 // Do LFTR to simplify the exit ICMP to EQ/NE
727 ICmpInst::Predicate Pred = Cond->getPredicate();
728 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
729 return true;
730
731 // Look for a loop invariant RHS
732 Value *LHS = Cond->getOperand(i_nocapture: 0);
733 Value *RHS = Cond->getOperand(i_nocapture: 1);
734 if (!L->isLoopInvariant(V: RHS)) {
735 if (!L->isLoopInvariant(V: LHS))
736 return true;
737 std::swap(a&: LHS, b&: RHS);
738 }
739 // Look for a simple IV counter LHS
740 PHINode *Phi = dyn_cast<PHINode>(Val: LHS);
741 if (!Phi)
742 Phi = getLoopPhiForCounter(IncV: LHS, L);
743
744 if (!Phi)
745 return true;
746
747 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
748 int Idx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
749 if (Idx < 0)
750 return true;
751
752 // Do LFTR if the exit condition's IV is *not* a simple counter.
753 Value *IncV = Phi->getIncomingValue(i: Idx);
754 return Phi != getLoopPhiForCounter(IncV, L);
755}
756
757/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
758/// down to checking that all operands are constant and listing instructions
759/// that may hide undef.
760static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
761 unsigned Depth) {
762 if (isa<Constant>(Val: V))
763 return !isa<UndefValue>(Val: V);
764
765 if (Depth >= 6)
766 return false;
767
768 // Conservatively handle non-constant non-instructions. For example, Arguments
769 // may be undef.
770 Instruction *I = dyn_cast<Instruction>(Val: V);
771 if (!I)
772 return false;
773
774 // Load and return values may be undef.
775 if(I->mayReadFromMemory() || isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I))
776 return false;
777
778 // Optimistically handle other instructions.
779 for (Value *Op : I->operands()) {
780 if (!Visited.insert(Ptr: Op).second)
781 continue;
782 if (!hasConcreteDefImpl(V: Op, Visited, Depth: Depth+1))
783 return false;
784 }
785 return true;
786}
787
788/// Return true if the given value is concrete. We must prove that undef can
789/// never reach it.
790///
791/// TODO: If we decide that this is a good approach to checking for undef, we
792/// may factor it into a common location.
793static bool hasConcreteDef(Value *V) {
794 SmallPtrSet<Value*, 8> Visited;
795 Visited.insert(Ptr: V);
796 return hasConcreteDefImpl(V, Visited, Depth: 0);
797}
798
799/// Return true if the given phi is a "counter" in L. A counter is an
800/// add recurance (of integer or pointer type) with an arbitrary start, and a
801/// step of 1. Note that L must have exactly one latch.
802static bool isLoopCounter(PHINode* Phi, Loop *L,
803 ScalarEvolution *SE) {
804 assert(Phi->getParent() == L->getHeader());
805 assert(L->getLoopLatch());
806
807 if (!SE->isSCEVable(Ty: Phi->getType()))
808 return false;
809
810 const SCEV *S = SE->getSCEV(V: Phi);
811 if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_One(), L: m_SpecificLoop(L))))
812 return false;
813
814 int LatchIdx = Phi->getBasicBlockIndex(BB: L->getLoopLatch());
815 Value *IncV = Phi->getIncomingValue(i: LatchIdx);
816 return (getLoopPhiForCounter(IncV, L) == Phi &&
817 isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncV)));
818}
819
820/// Search the loop header for a loop counter (anadd rec w/step of one)
821/// suitable for use by LFTR. If multiple counters are available, select the
822/// "best" one based profitable heuristics.
823///
824/// BECount may be an i8* pointer type. The pointer difference is already
825/// valid count without scaling the address stride, so it remains a pointer
826/// expression as far as SCEV is concerned.
827static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
828 const SCEV *BECount,
829 ScalarEvolution *SE, DominatorTree *DT) {
830 uint64_t BCWidth = SE->getTypeSizeInBits(Ty: BECount->getType());
831
832 Value *Cond = cast<BranchInst>(Val: ExitingBB->getTerminator())->getCondition();
833
834 // Loop over all of the PHI nodes, looking for a simple counter.
835 PHINode *BestPhi = nullptr;
836 const SCEV *BestInit = nullptr;
837 BasicBlock *LatchBlock = L->getLoopLatch();
838 assert(LatchBlock && "Must be in simplified form");
839 const DataLayout &DL = L->getHeader()->getDataLayout();
840
841 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ++I) {
842 PHINode *Phi = cast<PHINode>(Val&: I);
843 if (!isLoopCounter(Phi, L, SE))
844 continue;
845
846 const auto *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: Phi));
847
848 // AR may be a pointer type, while BECount is an integer type.
849 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
850 // AR may not be a narrower type, or we may never exit.
851 uint64_t PhiWidth = SE->getTypeSizeInBits(Ty: AR->getType());
852 if (PhiWidth < BCWidth || !DL.isLegalInteger(Width: PhiWidth))
853 continue;
854
855 // Avoid reusing a potentially undef value to compute other values that may
856 // have originally had a concrete definition.
857 if (!hasConcreteDef(V: Phi)) {
858 // We explicitly allow unknown phis as long as they are already used by
859 // the loop exit test. This is legal since performing LFTR could not
860 // increase the number of undef users.
861 Value *IncPhi = Phi->getIncomingValueForBlock(BB: LatchBlock);
862 if (!isLoopExitTestBasedOn(V: Phi, ExitingBB) &&
863 !isLoopExitTestBasedOn(V: IncPhi, ExitingBB))
864 continue;
865 }
866
867 // Avoid introducing undefined behavior due to poison which didn't exist in
868 // the original program. (Annoyingly, the rules for poison and undef
869 // propagation are distinct, so this does NOT cover the undef case above.)
870 // We have to ensure that we don't introduce UB by introducing a use on an
871 // iteration where said IV produces poison. Our strategy here differs for
872 // pointers and integer IVs. For integers, we strip and reinfer as needed,
873 // see code in linearFunctionTestReplace. For pointers, we restrict
874 // transforms as there is no good way to reinfer inbounds once lost.
875 if (!Phi->getType()->isIntegerTy() &&
876 !mustExecuteUBIfPoisonOnPathTo(Root: Phi, OnPathTo: ExitingBB->getTerminator(), DT))
877 continue;
878
879 const SCEV *Init = AR->getStart();
880
881 if (BestPhi && !isAlmostDeadIV(IV: BestPhi, LatchBlock, Cond)) {
882 // Don't force a live loop counter if another IV can be used.
883 if (isAlmostDeadIV(IV: Phi, LatchBlock, Cond))
884 continue;
885
886 // Prefer to count-from-zero. This is a more "canonical" counter form. It
887 // also prefers integer to pointer IVs.
888 if (BestInit->isZero() != Init->isZero()) {
889 if (BestInit->isZero())
890 continue;
891 }
892 // If two IVs both count from zero or both count from nonzero then the
893 // narrower is likely a dead phi that has been widened. Use the wider phi
894 // to allow the other to be eliminated.
895 else if (PhiWidth <= SE->getTypeSizeInBits(Ty: BestPhi->getType()))
896 continue;
897 }
898 BestPhi = Phi;
899 BestInit = Init;
900 }
901 return BestPhi;
902}
903
904/// Insert an IR expression which computes the value held by the IV IndVar
905/// (which must be an loop counter w/unit stride) after the backedge of loop L
906/// is taken ExitCount times.
907static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
908 const SCEV *ExitCount, bool UsePostInc, Loop *L,
909 SCEVExpander &Rewriter, ScalarEvolution *SE) {
910 assert(isLoopCounter(IndVar, L, SE));
911 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IndVar));
913 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914
915 // For integer IVs, truncate the IV before computing the limit unless we
916 // know apriori that the limit must be a constant when evaluated in the
917 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
918 // IV in the loop over a (potentially) expensive expansion of the widened
919 // exit count add(zext(add)) expression.
920 if (IndVar->getType()->isIntegerTy() &&
921 SE->getTypeSizeInBits(Ty: AR->getType()) >
922 SE->getTypeSizeInBits(Ty: ExitCount->getType())) {
923 const SCEV *IVInit = AR->getStart();
924 if (!isa<SCEVConstant>(Val: IVInit) || !isa<SCEVConstant>(Val: ExitCount))
925 AR = cast<SCEVAddRecExpr>(Val: SE->getTruncateExpr(Op: AR, Ty: ExitCount->getType()));
926 }
927
928 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(SE&: *SE) : AR;
929 const SCEV *IVLimit = ARBase->evaluateAtIteration(It: ExitCount, SE&: *SE);
930 assert(SE->isLoopInvariant(IVLimit, L) &&
931 "Computed iteration count is not loop invariant!");
932 return Rewriter.expandCodeFor(SH: IVLimit, Ty: ARBase->getType(),
933 I: ExitingBB->getTerminator());
934}
935
936/// This method rewrites the exit condition of the loop to be a canonical !=
937/// comparison against the incremented loop induction variable. This pass is
938/// able to rewrite the exit tests of any loop where the SCEV analysis can
939/// determine a loop-invariant trip count of the loop, which is actually a much
940/// broader range than just linear tests.
941bool IndVarSimplify::
942linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
943 const SCEV *ExitCount,
944 PHINode *IndVar, SCEVExpander &Rewriter) {
945 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
946 assert(isLoopCounter(IndVar, L, SE));
947 Instruction * const IncVar =
948 cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: L->getLoopLatch()));
949
950 // Initialize CmpIndVar to the preincremented IV.
951 Value *CmpIndVar = IndVar;
952 bool UsePostInc = false;
953
954 // If the exiting block is the same as the backedge block, we prefer to
955 // compare against the post-incremented value, otherwise we must compare
956 // against the preincremented value.
957 if (ExitingBB == L->getLoopLatch()) {
958 // For pointer IVs, we chose to not strip inbounds which requires us not
959 // to add a potentially UB introducing use. We need to either a) show
960 // the loop test we're modifying is already in post-inc form, or b) show
961 // that adding a use must not introduce UB.
962 bool SafeToPostInc =
963 IndVar->getType()->isIntegerTy() ||
964 isLoopExitTestBasedOn(V: IncVar, ExitingBB) ||
965 mustExecuteUBIfPoisonOnPathTo(Root: IncVar, OnPathTo: ExitingBB->getTerminator(), DT);
966 if (SafeToPostInc) {
967 UsePostInc = true;
968 CmpIndVar = IncVar;
969 }
970 }
971
972 // It may be necessary to drop nowrap flags on the incrementing instruction
973 // if either LFTR moves from a pre-inc check to a post-inc check (in which
974 // case the increment might have previously been poison on the last iteration
975 // only) or if LFTR switches to a different IV that was previously dynamically
976 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
977 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
978 // check), because the pre-inc addrec flags may be adopted from the original
979 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
980 // TODO: This handling is inaccurate for one case: If we switch to a
981 // dynamically dead IV that wraps on the first loop iteration only, which is
982 // not covered by the post-inc addrec. (If the new IV was not dynamically
983 // dead, it could not be poison on the first iteration in the first place.)
984 if (auto *BO = dyn_cast<BinaryOperator>(Val: IncVar)) {
985 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(Val: SE->getSCEV(V: IncVar));
986 if (BO->hasNoUnsignedWrap())
987 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
988 if (BO->hasNoSignedWrap())
989 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990 }
991
992 Value *ExitCnt = genLoopLimit(
993 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
994 assert(ExitCnt->getType()->isPointerTy() ==
995 IndVar->getType()->isPointerTy() &&
996 "genLoopLimit missed a cast");
997
998 // Insert a new icmp_ne or icmp_eq instruction before the branch.
999 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1000 ICmpInst::Predicate P;
1001 if (L->contains(BB: BI->getSuccessor(i: 0)))
1002 P = ICmpInst::ICMP_NE;
1003 else
1004 P = ICmpInst::ICMP_EQ;
1005
1006 IRBuilder<> Builder(BI);
1007
1008 // The new loop exit condition should reuse the debug location of the
1009 // original loop exit condition.
1010 if (auto *Cond = dyn_cast<Instruction>(Val: BI->getCondition()))
1011 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1012
1013 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1014 // avoid the expensive expansion of the limit expression in the wider type,
1015 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1016 // since we know (from the exit count bitwidth), that we can't self-wrap in
1017 // the narrower type.
1018 unsigned CmpIndVarSize = SE->getTypeSizeInBits(Ty: CmpIndVar->getType());
1019 unsigned ExitCntSize = SE->getTypeSizeInBits(Ty: ExitCnt->getType());
1020 if (CmpIndVarSize > ExitCntSize) {
1021 assert(!CmpIndVar->getType()->isPointerTy() &&
1022 !ExitCnt->getType()->isPointerTy());
1023
1024 // Before resorting to actually inserting the truncate, use the same
1025 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1026 // the other side of the comparison instead. We still evaluate the limit
1027 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1028 // a truncate within in.
1029 bool Extended = false;
1030 const SCEV *IV = SE->getSCEV(V: CmpIndVar);
1031 const SCEV *TruncatedIV = SE->getTruncateExpr(Op: IV, Ty: ExitCnt->getType());
1032 const SCEV *ZExtTrunc =
1033 SE->getZeroExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1034
1035 if (ZExtTrunc == IV) {
1036 Extended = true;
1037 ExitCnt = Builder.CreateZExt(V: ExitCnt, DestTy: IndVar->getType(),
1038 Name: "wide.trip.count");
1039 } else {
1040 const SCEV *SExtTrunc =
1041 SE->getSignExtendExpr(Op: TruncatedIV, Ty: CmpIndVar->getType());
1042 if (SExtTrunc == IV) {
1043 Extended = true;
1044 ExitCnt = Builder.CreateSExt(V: ExitCnt, DestTy: IndVar->getType(),
1045 Name: "wide.trip.count");
1046 }
1047 }
1048
1049 if (Extended) {
1050 bool Discard;
1051 L->makeLoopInvariant(V: ExitCnt, Changed&: Discard);
1052 } else
1053 CmpIndVar = Builder.CreateTrunc(V: CmpIndVar, DestTy: ExitCnt->getType(),
1054 Name: "lftr.wideiv");
1055 }
1056 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1057 << " LHS:" << *CmpIndVar << '\n'
1058 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1059 << "\n"
1060 << " RHS:\t" << *ExitCnt << "\n"
1061 << "ExitCount:\t" << *ExitCount << "\n"
1062 << " was: " << *BI->getCondition() << "\n");
1063
1064 Value *Cond = Builder.CreateICmp(P, LHS: CmpIndVar, RHS: ExitCnt, Name: "exitcond");
1065 Value *OrigCond = BI->getCondition();
1066 // It's tempting to use replaceAllUsesWith here to fully replace the old
1067 // comparison, but that's not immediately safe, since users of the old
1068 // comparison may not be dominated by the new comparison. Instead, just
1069 // update the branch to use the new comparison; in the common case this
1070 // will make old comparison dead.
1071 BI->setCondition(Cond);
1072 DeadInsts.emplace_back(Args&: OrigCond);
1073
1074 ++NumLFTR;
1075 return true;
1076}
1077
1078//===----------------------------------------------------------------------===//
1079// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1080//===----------------------------------------------------------------------===//
1081
1082/// If there's a single exit block, sink any loop-invariant values that
1083/// were defined in the preheader but not used inside the loop into the
1084/// exit block to reduce register pressure in the loop.
1085bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1086 BasicBlock *ExitBlock = L->getExitBlock();
1087 if (!ExitBlock) return false;
1088
1089 BasicBlock *Preheader = L->getLoopPreheader();
1090 if (!Preheader) return false;
1091
1092 bool MadeAnyChanges = false;
1093 for (Instruction &I : llvm::make_early_inc_range(Range: llvm::reverse(C&: *Preheader))) {
1094
1095 // Skip BB Terminator.
1096 if (Preheader->getTerminator() == &I)
1097 continue;
1098
1099 // New instructions were inserted at the end of the preheader.
1100 if (isa<PHINode>(Val: I))
1101 break;
1102
1103 // Don't move instructions which might have side effects, since the side
1104 // effects need to complete before instructions inside the loop. Also don't
1105 // move instructions which might read memory, since the loop may modify
1106 // memory. Note that it's okay if the instruction might have undefined
1107 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1108 // block.
1109 if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1110 continue;
1111
1112 // Skip debug or pseudo instructions.
1113 if (I.isDebugOrPseudoInst())
1114 continue;
1115
1116 // Skip eh pad instructions.
1117 if (I.isEHPad())
1118 continue;
1119
1120 // Don't sink alloca: we never want to sink static alloca's out of the
1121 // entry block, and correctly sinking dynamic alloca's requires
1122 // checks for stacksave/stackrestore intrinsics.
1123 // FIXME: Refactor this check somehow?
1124 if (isa<AllocaInst>(Val: &I))
1125 continue;
1126
1127 // Determine if there is a use in or before the loop (direct or
1128 // otherwise).
1129 bool UsedInLoop = false;
1130 for (Use &U : I.uses()) {
1131 Instruction *User = cast<Instruction>(Val: U.getUser());
1132 BasicBlock *UseBB = User->getParent();
1133 if (PHINode *P = dyn_cast<PHINode>(Val: User)) {
1134 unsigned i =
1135 PHINode::getIncomingValueNumForOperand(i: U.getOperandNo());
1136 UseBB = P->getIncomingBlock(i);
1137 }
1138 if (UseBB == Preheader || L->contains(BB: UseBB)) {
1139 UsedInLoop = true;
1140 break;
1141 }
1142 }
1143
1144 // If there is, the def must remain in the preheader.
1145 if (UsedInLoop)
1146 continue;
1147
1148 // Otherwise, sink it to the exit block.
1149 I.moveBefore(InsertPos: ExitBlock->getFirstInsertionPt());
1150 SE->forgetValue(V: &I);
1151 MadeAnyChanges = true;
1152 }
1153
1154 return MadeAnyChanges;
1155}
1156
1157static void replaceExitCond(BranchInst *BI, Value *NewCond,
1158 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1159 auto *OldCond = BI->getCondition();
1160 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1161 << " with " << *NewCond << "\n");
1162 BI->setCondition(NewCond);
1163 if (OldCond->use_empty())
1164 DeadInsts.emplace_back(Args&: OldCond);
1165}
1166
1167static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1168 bool IsTaken) {
1169 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1170 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1171 auto *OldCond = BI->getCondition();
1172 return ConstantInt::get(Ty: OldCond->getType(),
1173 V: IsTaken ? ExitIfTrue : !ExitIfTrue);
1174}
1175
1176static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1177 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1178 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1179 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1180 replaceExitCond(BI, NewCond, DeadInsts);
1181}
1182
1183static void replaceLoopPHINodesWithPreheaderValues(
1184 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1185 ScalarEvolution &SE) {
1186 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1187 auto *LoopPreheader = L->getLoopPreheader();
1188 auto *LoopHeader = L->getHeader();
1189 SmallVector<Instruction *> Worklist;
1190 for (auto &PN : LoopHeader->phis()) {
1191 auto *PreheaderIncoming = PN.getIncomingValueForBlock(BB: LoopPreheader);
1192 for (User *U : PN.users())
1193 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1194 SE.forgetValue(V: &PN);
1195 PN.replaceAllUsesWith(V: PreheaderIncoming);
1196 DeadInsts.emplace_back(Args: &PN);
1197 }
1198
1199 // Replacing with the preheader value will often allow IV users to simplify
1200 // (especially if the preheader value is a constant).
1201 SmallPtrSet<Instruction *, 16> Visited;
1202 while (!Worklist.empty()) {
1203 auto *I = cast<Instruction>(Val: Worklist.pop_back_val());
1204 if (!Visited.insert(Ptr: I).second)
1205 continue;
1206
1207 // Don't simplify instructions outside the loop.
1208 if (!L->contains(Inst: I))
1209 continue;
1210
1211 Value *Res = simplifyInstruction(I, Q: I->getDataLayout());
1212 if (Res && LI->replacementPreservesLCSSAForm(From: I, To: Res)) {
1213 for (User *U : I->users())
1214 Worklist.push_back(Elt: cast<Instruction>(Val: U));
1215 I->replaceAllUsesWith(V: Res);
1216 DeadInsts.emplace_back(Args&: I);
1217 }
1218 }
1219}
1220
1221static Value *
1222createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1223 const ScalarEvolution::LoopInvariantPredicate &LIP,
1224 SCEVExpander &Rewriter) {
1225 ICmpInst::Predicate InvariantPred = LIP.Pred;
1226 BasicBlock *Preheader = L->getLoopPreheader();
1227 assert(Preheader && "Preheader doesn't exist");
1228 Rewriter.setInsertPoint(Preheader->getTerminator());
1229 auto *LHSV = Rewriter.expandCodeFor(SH: LIP.LHS);
1230 auto *RHSV = Rewriter.expandCodeFor(SH: LIP.RHS);
1231 bool ExitIfTrue = !L->contains(BB: *succ_begin(BB: ExitingBB));
1232 if (ExitIfTrue)
1233 InvariantPred = ICmpInst::getInversePredicate(pred: InvariantPred);
1234 IRBuilder<> Builder(Preheader->getTerminator());
1235 BranchInst *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1236 return Builder.CreateICmp(P: InvariantPred, LHS: LHSV, RHS: RHSV,
1237 Name: BI->getCondition()->getName());
1238}
1239
1240static std::optional<Value *>
1241createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1242 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1243 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1244 CmpPredicate Pred = ICmp->getCmpPredicate();
1245 Value *LHS = ICmp->getOperand(i_nocapture: 0);
1246 Value *RHS = ICmp->getOperand(i_nocapture: 1);
1247
1248 // 'LHS pred RHS' should now mean that we stay in loop.
1249 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1250 if (Inverted)
1251 Pred = ICmpInst::getInverseCmpPredicate(Pred);
1252
1253 const SCEV *LHSS = SE->getSCEVAtScope(V: LHS, L);
1254 const SCEV *RHSS = SE->getSCEVAtScope(V: RHS, L);
1255 // Can we prove it to be trivially true or false?
1256 if (auto EV = SE->evaluatePredicateAt(Pred, LHS: LHSS, RHS: RHSS, CtxI: BI))
1257 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1258
1259 auto *ARTy = LHSS->getType();
1260 auto *MaxIterTy = MaxIter->getType();
1261 // If possible, adjust types.
1262 if (SE->getTypeSizeInBits(Ty: ARTy) > SE->getTypeSizeInBits(Ty: MaxIterTy))
1263 MaxIter = SE->getZeroExtendExpr(Op: MaxIter, Ty: ARTy);
1264 else if (SE->getTypeSizeInBits(Ty: ARTy) < SE->getTypeSizeInBits(Ty: MaxIterTy)) {
1265 const SCEV *MinusOne = SE->getMinusOne(Ty: ARTy);
1266 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(Op: MinusOne, Ty: MaxIterTy);
1267 if (SE->isKnownPredicateAt(Pred: ICmpInst::ICMP_ULE, LHS: MaxIter, RHS: MaxAllowedIter, CtxI: BI))
1268 MaxIter = SE->getTruncateExpr(Op: MaxIter, Ty: ARTy);
1269 }
1270
1271 if (SkipLastIter) {
1272 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1273 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1274 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1275 // So we manually construct umin(a - 1, b - 1).
1276 SmallVector<const SCEV *, 4> Elements;
1277 if (auto *UMin = dyn_cast<SCEVUMinExpr>(Val: MaxIter)) {
1278 for (const SCEV *Op : UMin->operands())
1279 Elements.push_back(Elt: SE->getMinusSCEV(LHS: Op, RHS: SE->getOne(Ty: Op->getType())));
1280 MaxIter = SE->getUMinFromMismatchedTypes(Ops&: Elements);
1281 } else
1282 MaxIter = SE->getMinusSCEV(LHS: MaxIter, RHS: SE->getOne(Ty: MaxIter->getType()));
1283 }
1284
1285 // Check if there is a loop-invariant predicate equivalent to our check.
1286 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHS: LHSS, RHS: RHSS,
1287 L, CtxI: BI, MaxIter);
1288 if (!LIP)
1289 return std::nullopt;
1290
1291 // Can we prove it to be trivially true?
1292 if (SE->isKnownPredicateAt(Pred: LIP->Pred, LHS: LIP->LHS, RHS: LIP->RHS, CtxI: BI))
1293 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1294 else
1295 return createInvariantCond(L, ExitingBB, LIP: *LIP, Rewriter);
1296}
1297
1298static bool optimizeLoopExitWithUnknownExitCount(
1299 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1300 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1301 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1302 assert(
1303 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1304 "Not a loop exit!");
1305
1306 // For branch that stays in loop by TRUE condition, go through AND. For branch
1307 // that stays in loop by FALSE condition, go through OR. Both gives the
1308 // similar logic: "stay in loop iff all conditions are true(false)".
1309 bool Inverted = L->contains(BB: BI->getSuccessor(i: 1));
1310 SmallVector<ICmpInst *, 4> LeafConditions;
1311 SmallVector<Value *, 4> Worklist;
1312 SmallPtrSet<Value *, 4> Visited;
1313 Value *OldCond = BI->getCondition();
1314 Visited.insert(Ptr: OldCond);
1315 Worklist.push_back(Elt: OldCond);
1316
1317 auto GoThrough = [&](Value *V) {
1318 Value *LHS = nullptr, *RHS = nullptr;
1319 if (Inverted) {
1320 if (!match(V, P: m_LogicalOr(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1321 return false;
1322 } else {
1323 if (!match(V, P: m_LogicalAnd(L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
1324 return false;
1325 }
1326 if (Visited.insert(Ptr: LHS).second)
1327 Worklist.push_back(Elt: LHS);
1328 if (Visited.insert(Ptr: RHS).second)
1329 Worklist.push_back(Elt: RHS);
1330 return true;
1331 };
1332
1333 do {
1334 Value *Curr = Worklist.pop_back_val();
1335 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1336 // those with one use, to avoid instruction duplication.
1337 if (Curr->hasOneUse())
1338 if (!GoThrough(Curr))
1339 if (auto *ICmp = dyn_cast<ICmpInst>(Val: Curr))
1340 LeafConditions.push_back(Elt: ICmp);
1341 } while (!Worklist.empty());
1342
1343 // If the current basic block has the same exit count as the whole loop, and
1344 // it consists of multiple icmp's, try to collect all icmp's that give exact
1345 // same exit count. For all other icmp's, we could use one less iteration,
1346 // because their value on the last iteration doesn't really matter.
1347 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1348 if (!SkipLastIter && LeafConditions.size() > 1 &&
1349 SE->getExitCount(L, ExitingBlock: ExitingBB,
1350 Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1351 MaxIter)
1352 for (auto *ICmp : LeafConditions) {
1353 auto EL = SE->computeExitLimitFromCond(L, ExitCond: ICmp, ExitIfTrue: Inverted,
1354 /*ControlsExit*/ ControlsOnlyExit: false);
1355 const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1356 if (isa<SCEVCouldNotCompute>(Val: ExitMax))
1357 continue;
1358 // They could be of different types (specifically this happens after
1359 // IV widening).
1360 auto *WiderType =
1361 SE->getWiderType(Ty1: ExitMax->getType(), Ty2: MaxIter->getType());
1362 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(V: ExitMax, Ty: WiderType);
1363 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(V: MaxIter, Ty: WiderType);
1364 if (WideExitMax == WideMaxIter)
1365 ICmpsFailingOnLastIter.insert(Ptr: ICmp);
1366 }
1367
1368 bool Changed = false;
1369 for (auto *OldCond : LeafConditions) {
1370 // Skip last iteration for this icmp under one of two conditions:
1371 // - We do it for all conditions;
1372 // - There is another ICmp that would fail on last iter, so this one doesn't
1373 // really matter.
1374 bool OptimisticSkipLastIter = SkipLastIter;
1375 if (!OptimisticSkipLastIter) {
1376 if (ICmpsFailingOnLastIter.size() > 1)
1377 OptimisticSkipLastIter = true;
1378 else if (ICmpsFailingOnLastIter.size() == 1)
1379 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(Ptr: OldCond);
1380 }
1381 if (auto Replaced =
1382 createReplacement(ICmp: OldCond, L, ExitingBB, MaxIter, Inverted,
1383 SkipLastIter: OptimisticSkipLastIter, SE, Rewriter)) {
1384 Changed = true;
1385 auto *NewCond = *Replaced;
1386 if (auto *NCI = dyn_cast<Instruction>(Val: NewCond)) {
1387 NCI->setName(OldCond->getName() + ".first_iter");
1388 }
1389 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1390 << " with " << *NewCond << "\n");
1391 assert(OldCond->hasOneUse() && "Must be!");
1392 OldCond->replaceAllUsesWith(V: NewCond);
1393 DeadInsts.push_back(Elt: OldCond);
1394 // Make sure we no longer consider this condition as failing on last
1395 // iteration.
1396 ICmpsFailingOnLastIter.erase(Ptr: OldCond);
1397 }
1398 }
1399 return Changed;
1400}
1401
1402bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1403 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1404 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1405 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1406 // it already has flags. The alternative to this would be to extending the
1407 // set of "interesting" IV users to include the icmp, but doing that
1408 // regresses results in practice by querying SCEVs before trip counts which
1409 // rely on them which results in SCEV caching sub-optimal answers. The
1410 // concern about caching sub-optimal results is why we only query SCEVs of
1411 // the loop invariant RHS here.
1412 SmallVector<BasicBlock*, 16> ExitingBlocks;
1413 L->getExitingBlocks(ExitingBlocks);
1414 bool Changed = false;
1415 for (auto *ExitingBB : ExitingBlocks) {
1416 auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1417 if (!BI)
1418 continue;
1419 assert(BI->isConditional() && "exit branch must be conditional");
1420
1421 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1422 if (!ICmp || !ICmp->hasOneUse())
1423 continue;
1424
1425 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1426 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1427 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1428 // poisoning cache with sub-optimal results. For the must-execute case,
1429 // this is a neccessary precondition for correctness.
1430 if (!L->isLoopInvariant(V: RHS)) {
1431 if (!L->isLoopInvariant(V: LHS))
1432 continue;
1433 // Same logic applies for the inverse case
1434 std::swap(a&: LHS, b&: RHS);
1435 }
1436
1437 // Match (icmp signed-cond zext, RHS)
1438 Value *LHSOp = nullptr;
1439 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))) || !ICmp->isSigned())
1440 continue;
1441
1442 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1443 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1444 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1445 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1446 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1447 if (FullCR.contains(CR: RHSCR)) {
1448 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1449 // replace the signed condition with the unsigned version.
1450 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1451 Changed = true;
1452 // Note: No SCEV invalidation needed. We've changed the predicate, but
1453 // have not changed exit counts, or the values produced by the compare.
1454 continue;
1455 }
1456 }
1457
1458 // Now that we've canonicalized the condition to match the extend,
1459 // see if we can rotate the extend out of the loop.
1460 for (auto *ExitingBB : ExitingBlocks) {
1461 auto *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1462 if (!BI)
1463 continue;
1464 assert(BI->isConditional() && "exit branch must be conditional");
1465
1466 auto *ICmp = dyn_cast<ICmpInst>(Val: BI->getCondition());
1467 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1468 continue;
1469
1470 bool Swapped = false;
1471 auto *LHS = ICmp->getOperand(i_nocapture: 0);
1472 auto *RHS = ICmp->getOperand(i_nocapture: 1);
1473 if (L->isLoopInvariant(V: LHS) == L->isLoopInvariant(V: RHS))
1474 // Nothing to rotate
1475 continue;
1476 if (L->isLoopInvariant(V: LHS)) {
1477 // Same logic applies for the inverse case until we actually pick
1478 // which operand of the compare to update.
1479 Swapped = true;
1480 std::swap(a&: LHS, b&: RHS);
1481 }
1482 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1483
1484 // Match (icmp unsigned-cond zext, RHS)
1485 // TODO: Extend to handle corresponding sext/signed-cmp case
1486 // TODO: Extend to other invertible functions
1487 Value *LHSOp = nullptr;
1488 if (!match(V: LHS, P: m_ZExt(Op: m_Value(V&: LHSOp))))
1489 continue;
1490
1491 // In general, we only rotate if we can do so without increasing the number
1492 // of instructions. The exception is when we have an zext(add-rec). The
1493 // reason for allowing this exception is that we know we need to get rid
1494 // of the zext for SCEV to be able to compute a trip count for said loops;
1495 // we consider the new trip count valuable enough to increase instruction
1496 // count by one.
1497 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(Val: SE->getSCEV(V: LHSOp)))
1498 continue;
1499
1500 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1501 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1502 // when zext is loop varying and RHS is loop invariant. This converts
1503 // loop varying work to loop-invariant work.
1504 auto doRotateTransform = [&]() {
1505 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1506 auto *NewRHS = CastInst::Create(
1507 Instruction::Trunc, S: RHS, Ty: LHSOp->getType(), Name: "",
1508 InsertBefore: L->getLoopPreheader()->getTerminator()->getIterator());
1509 // NewRHS is an operation that has been hoisted out of the loop, and
1510 // therefore should have a dropped location.
1511 NewRHS->setDebugLoc(DebugLoc::getDropped());
1512 ICmp->setOperand(i_nocapture: Swapped ? 1 : 0, Val_nocapture: LHSOp);
1513 ICmp->setOperand(i_nocapture: Swapped ? 0 : 1, Val_nocapture: NewRHS);
1514 // Samesign flag cannot be preserved after narrowing the compare.
1515 ICmp->setSameSign(false);
1516 if (LHS->use_empty())
1517 DeadInsts.push_back(Elt: LHS);
1518 };
1519
1520 const unsigned InnerBitWidth = DL.getTypeSizeInBits(Ty: LHSOp->getType());
1521 const unsigned OuterBitWidth = DL.getTypeSizeInBits(Ty: RHS->getType());
1522 auto FullCR = ConstantRange::getFull(BitWidth: InnerBitWidth);
1523 FullCR = FullCR.zeroExtend(BitWidth: OuterBitWidth);
1524 auto RHSCR = SE->getUnsignedRange(S: SE->applyLoopGuards(Expr: SE->getSCEV(V: RHS), L));
1525 if (FullCR.contains(CR: RHSCR)) {
1526 doRotateTransform();
1527 Changed = true;
1528 // Note, we are leaving SCEV in an unfortunately imprecise case here
1529 // as rotation tends to reveal information about trip counts not
1530 // previously visible.
1531 continue;
1532 }
1533 }
1534
1535 return Changed;
1536}
1537
1538bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1539 SmallVector<BasicBlock*, 16> ExitingBlocks;
1540 L->getExitingBlocks(ExitingBlocks);
1541
1542 // Remove all exits which aren't both rewriteable and execute on every
1543 // iteration.
1544 llvm::erase_if(C&: ExitingBlocks, P: [&](BasicBlock *ExitingBB) {
1545 // If our exitting block exits multiple loops, we can only rewrite the
1546 // innermost one. Otherwise, we're changing how many times the innermost
1547 // loop runs before it exits.
1548 if (LI->getLoopFor(BB: ExitingBB) != L)
1549 return true;
1550
1551 // Can't rewrite non-branch yet.
1552 BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1553 if (!BI)
1554 return true;
1555
1556 // Likewise, the loop latch must be dominated by the exiting BB.
1557 if (!DT->dominates(A: ExitingBB, B: L->getLoopLatch()))
1558 return true;
1559
1560 if (auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1561 // If already constant, nothing to do. However, if this is an
1562 // unconditional exit, we can still replace header phis with their
1563 // preheader value.
1564 if (!L->contains(BB: BI->getSuccessor(i: CI->isNullValue())))
1565 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1566 return true;
1567 }
1568
1569 return false;
1570 });
1571
1572 if (ExitingBlocks.empty())
1573 return false;
1574
1575 // Get a symbolic upper bound on the loop backedge taken count.
1576 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1577 if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
1578 return false;
1579
1580 // Visit our exit blocks in order of dominance. We know from the fact that
1581 // all exits must dominate the latch, so there is a total dominance order
1582 // between them.
1583 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1584 // std::sort sorts in ascending order, so we want the inverse of
1585 // the normal dominance relation.
1586 if (A == B) return false;
1587 if (DT->properlyDominates(A, B))
1588 return true;
1589 else {
1590 assert(DT->properlyDominates(B, A) &&
1591 "expected total dominance order!");
1592 return false;
1593 }
1594 });
1595#ifdef ASSERT
1596 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1597 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1598 }
1599#endif
1600
1601 bool Changed = false;
1602 bool SkipLastIter = false;
1603 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1604 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1605 if (SkipLastIter || isa<SCEVCouldNotCompute>(Val: MaxExitCount))
1606 return;
1607 if (isa<SCEVCouldNotCompute>(Val: CurrMaxExit))
1608 CurrMaxExit = MaxExitCount;
1609 else
1610 CurrMaxExit = SE->getUMinFromMismatchedTypes(LHS: CurrMaxExit, RHS: MaxExitCount);
1611 // If the loop has more than 1 iteration, all further checks will be
1612 // executed 1 iteration less.
1613 if (CurrMaxExit == MaxBECount)
1614 SkipLastIter = true;
1615 };
1616 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1617 for (BasicBlock *ExitingBB : ExitingBlocks) {
1618 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1619 const SCEV *MaxExitCount = SE->getExitCount(
1620 L, ExitingBlock: ExitingBB, Kind: ScalarEvolution::ExitCountKind::SymbolicMaximum);
1621 if (isa<SCEVCouldNotCompute>(Val: ExactExitCount)) {
1622 // Okay, we do not know the exit count here. Can we at least prove that it
1623 // will remain the same within iteration space?
1624 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1625 auto OptimizeCond = [&](bool SkipLastIter) {
1626 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1627 MaxIter: MaxBECount, SkipLastIter,
1628 SE, Rewriter, DeadInsts);
1629 };
1630
1631 // TODO: We might have proved that we can skip the last iteration for
1632 // this check. In this case, we only want to check the condition on the
1633 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1634 // corner case:
1635 //
1636 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1637 //
1638 // If we could not prove that len != 0, then we also could not prove that
1639 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1640 // OptimizeCond will likely not prove anything for it, even if it could
1641 // prove the same fact for len.
1642 //
1643 // As a temporary solution, we query both last and pre-last iterations in
1644 // hope that we will be able to prove triviality for at least one of
1645 // them. We can stop querying MaxBECount for this case once SCEV
1646 // understands that (MaxBECount - 1) will not overflow here.
1647 if (OptimizeCond(false))
1648 Changed = true;
1649 else if (SkipLastIter && OptimizeCond(true))
1650 Changed = true;
1651 UpdateSkipLastIter(MaxExitCount);
1652 continue;
1653 }
1654
1655 UpdateSkipLastIter(ExactExitCount);
1656
1657 // If we know we'd exit on the first iteration, rewrite the exit to
1658 // reflect this. This does not imply the loop must exit through this
1659 // exit; there may be an earlier one taken on the first iteration.
1660 // We know that the backedge can't be taken, so we replace all
1661 // the header PHIs with values coming from the preheader.
1662 if (ExactExitCount->isZero()) {
1663 foldExit(L, ExitingBB, IsTaken: true, DeadInsts);
1664 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, SE&: *SE);
1665 Changed = true;
1666 continue;
1667 }
1668
1669 assert(ExactExitCount->getType()->isIntegerTy() &&
1670 MaxBECount->getType()->isIntegerTy() &&
1671 "Exit counts must be integers");
1672
1673 Type *WiderType =
1674 SE->getWiderType(Ty1: MaxBECount->getType(), Ty2: ExactExitCount->getType());
1675 ExactExitCount = SE->getNoopOrZeroExtend(V: ExactExitCount, Ty: WiderType);
1676 MaxBECount = SE->getNoopOrZeroExtend(V: MaxBECount, Ty: WiderType);
1677 assert(MaxBECount->getType() == ExactExitCount->getType());
1678
1679 // Can we prove that some other exit must be taken strictly before this
1680 // one?
1681 if (SE->isLoopEntryGuardedByCond(L, Pred: CmpInst::ICMP_ULT, LHS: MaxBECount,
1682 RHS: ExactExitCount)) {
1683 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1684 Changed = true;
1685 continue;
1686 }
1687
1688 // As we run, keep track of which exit counts we've encountered. If we
1689 // find a duplicate, we've found an exit which would have exited on the
1690 // exiting iteration, but (from the visit order) strictly follows another
1691 // which does the same and is thus dead.
1692 if (!DominatingExactExitCounts.insert(Ptr: ExactExitCount).second) {
1693 foldExit(L, ExitingBB, IsTaken: false, DeadInsts);
1694 Changed = true;
1695 continue;
1696 }
1697
1698 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1699 // here. If we kept track of the min of dominanting exits so far, we could
1700 // discharge exits with EC >= MDEC. This is less powerful than the existing
1701 // transform (since later exits aren't considered), but potentially more
1702 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1703 // or a >u b. Such a case is not currently known.
1704 }
1705 return Changed;
1706}
1707
1708bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1709 SmallVector<BasicBlock*, 16> ExitingBlocks;
1710 L->getExitingBlocks(ExitingBlocks);
1711
1712 // Finally, see if we can rewrite our exit conditions into a loop invariant
1713 // form. If we have a read-only loop, and we can tell that we must exit down
1714 // a path which does not need any of the values computed within the loop, we
1715 // can rewrite the loop to exit on the first iteration. Note that this
1716 // doesn't either a) tell us the loop exits on the first iteration (unless
1717 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1718 // This transformation looks a lot like a restricted form of dead loop
1719 // elimination, but restricted to read-only loops and without neccesssarily
1720 // needing to kill the loop entirely.
1721 if (!LoopPredication)
1722 return false;
1723
1724 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1725 // through *explicit* control flow. We have to eliminate the possibility of
1726 // implicit exits (see below) before we know it's truly exact.
1727 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1728 if (isa<SCEVCouldNotCompute>(Val: ExactBTC) || !Rewriter.isSafeToExpand(S: ExactBTC))
1729 return false;
1730
1731 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1732 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1733
1734 auto BadExit = [&](BasicBlock *ExitingBB) {
1735 // If our exiting block exits multiple loops, we can only rewrite the
1736 // innermost one. Otherwise, we're changing how many times the innermost
1737 // loop runs before it exits.
1738 if (LI->getLoopFor(BB: ExitingBB) != L)
1739 return true;
1740
1741 // Can't rewrite non-branch yet.
1742 BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBB->getTerminator());
1743 if (!BI)
1744 return true;
1745
1746 // If already constant, nothing to do.
1747 if (isa<Constant>(Val: BI->getCondition()))
1748 return true;
1749
1750 // If the exit block has phis, we need to be able to compute the values
1751 // within the loop which contains them. This assumes trivially lcssa phis
1752 // have already been removed; TODO: generalize
1753 BasicBlock *ExitBlock =
1754 BI->getSuccessor(i: L->contains(BB: BI->getSuccessor(i: 0)) ? 1 : 0);
1755 if (!ExitBlock->phis().empty())
1756 return true;
1757
1758 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1759 if (isa<SCEVCouldNotCompute>(Val: ExitCount) ||
1760 !Rewriter.isSafeToExpand(S: ExitCount))
1761 return true;
1762
1763 assert(SE->isLoopInvariant(ExitCount, L) &&
1764 "Exit count must be loop invariant");
1765 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1766 return false;
1767 };
1768
1769 // Make sure all exits dominate the latch. This means there is a linear chain
1770 // of exits. We check this before sorting so we have a total order.
1771 BasicBlock *Latch = L->getLoopLatch();
1772 for (BasicBlock *ExitingBB : ExitingBlocks)
1773 if (!DT->dominates(A: ExitingBB, B: Latch))
1774 return false;
1775
1776 // If we have any exits which can't be predicated themselves, than we can't
1777 // predicate any exit which isn't guaranteed to execute before it. Consider
1778 // two exits (a) and (b) which would both exit on the same iteration. If we
1779 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1780 // we could convert a loop from exiting through (a) to one exiting through
1781 // (b). Note that this problem exists only for exits with the same exit
1782 // count, and we could be more aggressive when exit counts are known inequal.
1783 llvm::sort(C&: ExitingBlocks, Comp: [&](BasicBlock *A, BasicBlock *B) {
1784 // llvm::sort sorts in ascending order, so we want the inverse of
1785 // the normal dominance relation.
1786 if (A == B)
1787 return false;
1788 if (DT->properlyDominates(A, B))
1789 return true;
1790 if (DT->properlyDominates(A: B, B: A))
1791 return false;
1792 llvm_unreachable("Should have total dominance order");
1793 });
1794
1795 // Make sure our exit blocks are really a total order (i.e. a linear chain of
1796 // exits before the backedge).
1797 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1798 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1799 "Not sorted by dominance");
1800
1801 // Given our sorted total order, we know that exit[j] must be evaluated
1802 // after all exit[i] such j > i.
1803 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1804 if (BadExit(ExitingBlocks[i])) {
1805 ExitingBlocks.resize(N: i);
1806 break;
1807 }
1808
1809 if (ExitingBlocks.empty())
1810 return false;
1811
1812 // At this point, ExitingBlocks consists of only those blocks which are
1813 // predicatable. Given that, we know we have at least one exit we can
1814 // predicate if the loop is doesn't have side effects and doesn't have any
1815 // implicit exits (because then our exact BTC isn't actually exact).
1816 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1817 // suggestions on how to improve this? I can obviously bail out for outer
1818 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1819 // is that enough for *all* side effects?
1820 for (BasicBlock *BB : L->blocks())
1821 for (auto &I : *BB)
1822 // TODO:isGuaranteedToTransfer
1823 if (I.mayHaveSideEffects())
1824 return false;
1825
1826 bool Changed = false;
1827 // Finally, do the actual predication for all predicatable blocks. A couple
1828 // of notes here:
1829 // 1) We don't bother to constant fold dominated exits with identical exit
1830 // counts; that's simply a form of CSE/equality propagation and we leave
1831 // it for dedicated passes.
1832 // 2) We insert the comparison at the branch. Hoisting introduces additional
1833 // legality constraints and we leave that to dedicated logic. We want to
1834 // predicate even if we can't insert a loop invariant expression as
1835 // peeling or unrolling will likely reduce the cost of the otherwise loop
1836 // varying check.
1837 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1838 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1839 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1840 for (BasicBlock *ExitingBB : ExitingBlocks) {
1841 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1842
1843 auto *BI = cast<BranchInst>(Val: ExitingBB->getTerminator());
1844 Value *NewCond;
1845 if (ExitCount == ExactBTC) {
1846 NewCond = L->contains(BB: BI->getSuccessor(i: 0)) ?
1847 B.getFalse() : B.getTrue();
1848 } else {
1849 Value *ECV = Rewriter.expandCodeFor(SH: ExitCount);
1850 if (!ExactBTCV)
1851 ExactBTCV = Rewriter.expandCodeFor(SH: ExactBTC);
1852 Value *RHS = ExactBTCV;
1853 if (ECV->getType() != RHS->getType()) {
1854 Type *WiderTy = SE->getWiderType(Ty1: ECV->getType(), Ty2: RHS->getType());
1855 ECV = B.CreateZExt(V: ECV, DestTy: WiderTy);
1856 RHS = B.CreateZExt(V: RHS, DestTy: WiderTy);
1857 }
1858 auto Pred = L->contains(BB: BI->getSuccessor(i: 0)) ?
1859 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1860 NewCond = B.CreateICmp(P: Pred, LHS: ECV, RHS);
1861 }
1862 Value *OldCond = BI->getCondition();
1863 BI->setCondition(NewCond);
1864 if (OldCond->use_empty())
1865 DeadInsts.emplace_back(Args&: OldCond);
1866 Changed = true;
1867 RunUnswitching = true;
1868 }
1869
1870 return Changed;
1871}
1872
1873//===----------------------------------------------------------------------===//
1874// IndVarSimplify driver. Manage several subpasses of IV simplification.
1875//===----------------------------------------------------------------------===//
1876
1877bool IndVarSimplify::run(Loop *L) {
1878 // We need (and expect!) the incoming loop to be in LCSSA.
1879 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1880 "LCSSA required to run indvars!");
1881
1882 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1883 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1884 // canonicalization can be a pessimization without LSR to "clean up"
1885 // afterwards.
1886 // - We depend on having a preheader; in particular,
1887 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1888 // and we're in trouble if we can't find the induction variable even when
1889 // we've manually inserted one.
1890 // - LFTR relies on having a single backedge.
1891 if (!L->isLoopSimplifyForm())
1892 return false;
1893
1894 bool Changed = false;
1895 // If there are any floating-point recurrences, attempt to
1896 // transform them to use integer recurrences.
1897 Changed |= rewriteNonIntegerIVs(L);
1898
1899 // Create a rewriter object which we'll use to transform the code with.
1900 SCEVExpander Rewriter(*SE, DL, "indvars");
1901#if LLVM_ENABLE_ABI_BREAKING_CHECKS
1902 Rewriter.setDebugType(DEBUG_TYPE);
1903#endif
1904
1905 // Eliminate redundant IV users.
1906 //
1907 // Simplification works best when run before other consumers of SCEV. We
1908 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1909 // other expressions involving loop IVs have been evaluated. This helps SCEV
1910 // set no-wrap flags before normalizing sign/zero extension.
1911 Rewriter.disableCanonicalMode();
1912 Changed |= simplifyAndExtend(L, Rewriter, LI);
1913
1914 // Check to see if we can compute the final value of any expressions
1915 // that are recurrent in the loop, and substitute the exit values from the
1916 // loop into any instructions outside of the loop that use the final values
1917 // of the current expressions.
1918 if (ReplaceExitValue != NeverRepl) {
1919 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1920 ReplaceExitValue, DeadInsts)) {
1921 NumReplaced += Rewrites;
1922 Changed = true;
1923 }
1924 }
1925
1926 // Eliminate redundant IV cycles.
1927 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1928
1929 // Try to convert exit conditions to unsigned and rotate computation
1930 // out of the loop. Note: Handles invalidation internally if needed.
1931 Changed |= canonicalizeExitCondition(L);
1932
1933 // Try to eliminate loop exits based on analyzeable exit counts
1934 if (optimizeLoopExits(L, Rewriter)) {
1935 Changed = true;
1936 // Given we've changed exit counts, notify SCEV
1937 // Some nested loops may share same folded exit basic block,
1938 // thus we need to notify top most loop.
1939 SE->forgetTopmostLoop(L);
1940 }
1941
1942 // Try to form loop invariant tests for loop exits by changing how many
1943 // iterations of the loop run when that is unobservable.
1944 if (predicateLoopExits(L, Rewriter)) {
1945 Changed = true;
1946 // Given we've changed exit counts, notify SCEV
1947 SE->forgetLoop(L);
1948 }
1949
1950 // If we have a trip count expression, rewrite the loop's exit condition
1951 // using it.
1952 if (!DisableLFTR) {
1953 BasicBlock *PreHeader = L->getLoopPreheader();
1954
1955 SmallVector<BasicBlock*, 16> ExitingBlocks;
1956 L->getExitingBlocks(ExitingBlocks);
1957 for (BasicBlock *ExitingBB : ExitingBlocks) {
1958 // Can't rewrite non-branch yet.
1959 if (!isa<BranchInst>(Val: ExitingBB->getTerminator()))
1960 continue;
1961
1962 // If our exitting block exits multiple loops, we can only rewrite the
1963 // innermost one. Otherwise, we're changing how many times the innermost
1964 // loop runs before it exits.
1965 if (LI->getLoopFor(BB: ExitingBB) != L)
1966 continue;
1967
1968 if (!needsLFTR(L, ExitingBB))
1969 continue;
1970
1971 const SCEV *ExitCount = SE->getExitCount(L, ExitingBlock: ExitingBB);
1972 if (isa<SCEVCouldNotCompute>(Val: ExitCount))
1973 continue;
1974
1975 // This was handled above, but as we form SCEVs, we can sometimes refine
1976 // existing ones; this allows exit counts to be folded to zero which
1977 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1978 // until stable to handle cases like this better.
1979 if (ExitCount->isZero())
1980 continue;
1981
1982 PHINode *IndVar = FindLoopCounter(L, ExitingBB, BECount: ExitCount, SE, DT);
1983 if (!IndVar)
1984 continue;
1985
1986 // Avoid high cost expansions. Note: This heuristic is questionable in
1987 // that our definition of "high cost" is not exactly principled.
1988 if (Rewriter.isHighCostExpansion(Exprs: ExitCount, L, Budget: SCEVCheapExpansionBudget,
1989 TTI, At: PreHeader->getTerminator()))
1990 continue;
1991
1992 if (!Rewriter.isSafeToExpand(S: ExitCount))
1993 continue;
1994
1995 Changed |= linearFunctionTestReplace(L, ExitingBB,
1996 ExitCount, IndVar,
1997 Rewriter);
1998 }
1999 }
2000 // Clear the rewriter cache, because values that are in the rewriter's cache
2001 // can be deleted in the loop below, causing the AssertingVH in the cache to
2002 // trigger.
2003 Rewriter.clear();
2004
2005 // Now that we're done iterating through lists, clean up any instructions
2006 // which are now dead.
2007 while (!DeadInsts.empty()) {
2008 Value *V = DeadInsts.pop_back_val();
2009
2010 if (PHINode *PHI = dyn_cast_or_null<PHINode>(Val: V))
2011 Changed |= RecursivelyDeleteDeadPHINode(PN: PHI, TLI, MSSAU: MSSAU.get());
2012 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(Val: V))
2013 Changed |=
2014 RecursivelyDeleteTriviallyDeadInstructions(V: Inst, TLI, MSSAU: MSSAU.get());
2015 }
2016
2017 // The Rewriter may not be used from this point on.
2018
2019 // Loop-invariant instructions in the preheader that aren't used in the
2020 // loop may be sunk below the loop to reduce register pressure.
2021 Changed |= sinkUnusedInvariants(L);
2022
2023 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2024 // trip count and therefore can further simplify exit values in addition to
2025 // rewriteLoopExitValues.
2026 Changed |= rewriteFirstIterationLoopExitValues(L);
2027
2028 // Clean up dead instructions.
2029 Changed |= DeleteDeadPHIs(BB: L->getHeader(), TLI, MSSAU: MSSAU.get());
2030
2031 // Check a post-condition.
2032 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2033 "Indvars did not preserve LCSSA!");
2034 if (VerifyMemorySSA && MSSAU)
2035 MSSAU->getMemorySSA()->verifyMemorySSA();
2036
2037 return Changed;
2038}
2039
2040PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2041 LoopStandardAnalysisResults &AR,
2042 LPMUpdater &) {
2043 Function *F = L.getHeader()->getParent();
2044 const DataLayout &DL = F->getDataLayout();
2045
2046 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2047 WidenIndVars && AllowIVWidening);
2048 if (!IVS.run(L: &L))
2049 return PreservedAnalyses::all();
2050
2051 auto PA = getLoopPassPreservedAnalyses();
2052 PA.preserveSet<CFGAnalyses>();
2053 if (IVS.runUnswitching()) {
2054 AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(IR&: L, ExtraArgs&: AR);
2055 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2056 }
2057
2058 if (AR.MSSA)
2059 PA.preserve<MemorySSAAnalysis>();
2060 return PA;
2061}
2062