1//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs several transformations to transform natural loops into a
10// simpler form, which makes subsequent analyses and transformations simpler and
11// more effective.
12//
13// Loop pre-header insertion guarantees that there is a single, non-critical
14// entry edge from outside of the loop to the loop header. This simplifies a
15// number of analyses and transformations, such as LICM.
16//
17// Loop exit-block insertion guarantees that all exit blocks from the loop
18// (blocks which are outside of the loop that have predecessors inside of the
19// loop) only have predecessors from inside of the loop (and are thus dominated
20// by the loop header). This simplifies transformations such as store-sinking
21// that are built into LICM.
22//
23// This pass also guarantees that loops will have exactly one backedge.
24//
25// Indirectbr instructions introduce several complications. If the loop
26// contains or is entered by an indirectbr instruction, it may not be possible
27// to transform the loop and make these guarantees. Client code should check
28// that these conditions are true before relying on them.
29//
30// Similar complications arise from callbr instructions, particularly in
31// asm-goto where blockaddress expressions are used.
32//
33// Note that the simplifycfg pass will clean up blocks which are split out but
34// end up being unnecessary, so usage of this pass should not pessimize
35// generated code.
36//
37// This pass obviously modifies the CFG, but updates loop information and
38// dominator information.
39//
40//===----------------------------------------------------------------------===//
41
42#include "llvm/Transforms/Utils/LoopSimplify.h"
43#include "llvm/ADT/SetVector.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/AliasAnalysis.h"
47#include "llvm/Analysis/AssumptionCache.h"
48#include "llvm/Analysis/BasicAliasAnalysis.h"
49#include "llvm/Analysis/BranchProbabilityInfo.h"
50#include "llvm/Analysis/GlobalsModRef.h"
51#include "llvm/Analysis/InstructionSimplify.h"
52#include "llvm/Analysis/LoopInfo.h"
53#include "llvm/Analysis/MemorySSA.h"
54#include "llvm/Analysis/MemorySSAUpdater.h"
55#include "llvm/Analysis/ScalarEvolution.h"
56#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
57#include "llvm/IR/CFG.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/Dominators.h"
60#include "llvm/IR/Function.h"
61#include "llvm/IR/Instructions.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Module.h"
64#include "llvm/InitializePasses.h"
65#include "llvm/Support/Debug.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Transforms/Utils.h"
68#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69#include "llvm/Transforms/Utils/Local.h"
70#include "llvm/Transforms/Utils/LoopUtils.h"
71using namespace llvm;
72
73#define DEBUG_TYPE "loop-simplify"
74
75STATISTIC(NumNested , "Number of nested loops split out");
76
77// If the block isn't already, move the new block to right after some 'outside
78// block' block. This prevents the preheader from being placed inside the loop
79// body, e.g. when the loop hasn't been rotated.
80static void placeSplitBlockCarefully(BasicBlock *NewBB,
81 SmallVectorImpl<BasicBlock *> &SplitPreds,
82 Loop *L) {
83 // Check to see if NewBB is already well placed.
84 Function::iterator BBI = --NewBB->getIterator();
85 if (llvm::is_contained(Range&: SplitPreds, Element: &*BBI))
86 return;
87
88 // If it isn't already after an outside block, move it after one. This is
89 // always good as it makes the uncond branch from the outside block into a
90 // fall-through.
91
92 // Figure out *which* outside block to put this after. Prefer an outside
93 // block that neighbors a BB actually in the loop.
94 BasicBlock *FoundBB = nullptr;
95 for (BasicBlock *Pred : SplitPreds) {
96 Function::iterator BBI = Pred->getIterator();
97 if (++BBI != NewBB->getParent()->end() && L->contains(BB: &*BBI)) {
98 FoundBB = Pred;
99 break;
100 }
101 }
102
103 // If our heuristic for a *good* bb to place this after doesn't find
104 // anything, just pick something. It's likely better than leaving it within
105 // the loop.
106 if (!FoundBB)
107 FoundBB = SplitPreds[0];
108 NewBB->moveAfter(MovePos: FoundBB);
109}
110
111/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
112/// preheader, this method is called to insert one. This method has two phases:
113/// preheader insertion and analysis updating.
114///
115BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
116 LoopInfo *LI, MemorySSAUpdater *MSSAU,
117 bool PreserveLCSSA) {
118 BasicBlock *Header = L->getHeader();
119
120 // Compute the set of predecessors of the loop that are not in the loop.
121 SmallVector<BasicBlock*, 8> OutsideBlocks;
122 for (BasicBlock *P : predecessors(BB: Header)) {
123 if (!L->contains(BB: P)) { // Coming in from outside the loop?
124 // If the loop is branched to from an indirect terminator, we won't
125 // be able to fully transform the loop, because it prohibits
126 // edge splitting.
127 if (isa<IndirectBrInst>(Val: P->getTerminator()))
128 return nullptr;
129
130 // Keep track of it.
131 OutsideBlocks.push_back(Elt: P);
132 }
133 }
134
135 // Split out the loop pre-header.
136 BasicBlock *PreheaderBB;
137 PreheaderBB = SplitBlockPredecessors(BB: Header, Preds: OutsideBlocks, Suffix: ".preheader", DT,
138 LI, MSSAU, PreserveLCSSA);
139 if (!PreheaderBB)
140 return nullptr;
141
142 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
143 << PreheaderBB->getName() << "\n");
144
145 // Make sure that NewBB is put someplace intelligent, which doesn't mess up
146 // code layout too horribly.
147 placeSplitBlockCarefully(NewBB: PreheaderBB, SplitPreds&: OutsideBlocks, L);
148
149 return PreheaderBB;
150}
151
152/// Add the specified block, and all of its predecessors, to the specified set,
153/// if it's not already in there. Stop predecessor traversal when we reach
154/// StopBlock.
155static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
156 SmallPtrSetImpl<BasicBlock *> &Blocks) {
157 SmallVector<BasicBlock *, 8> Worklist;
158 Worklist.push_back(Elt: InputBB);
159 do {
160 BasicBlock *BB = Worklist.pop_back_val();
161 if (Blocks.insert(Ptr: BB).second && BB != StopBlock)
162 // If BB is not already processed and it is not a stop block then
163 // insert its predecessor in the work list
164 append_range(C&: Worklist, R: predecessors(BB));
165 } while (!Worklist.empty());
166}
167
168/// The first part of loop-nestification is to find a PHI node that tells
169/// us how to partition the loops.
170static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
171 AssumptionCache *AC) {
172 const DataLayout &DL = L->getHeader()->getDataLayout();
173 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ) {
174 PHINode *PN = cast<PHINode>(Val&: I);
175 ++I;
176 if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) {
177 // This is a degenerate PHI already, don't modify it!
178 PN->replaceAllUsesWith(V);
179 PN->eraseFromParent();
180 continue;
181 }
182
183 // Scan this PHI node looking for a use of the PHI node by itself.
184 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
185 if (PN->getIncomingValue(i) == PN &&
186 L->contains(BB: PN->getIncomingBlock(i)))
187 // We found something tasty to remove.
188 return PN;
189 }
190 return nullptr;
191}
192
193/// If this loop has multiple backedges, try to pull one of them out into
194/// a nested loop.
195///
196/// This is important for code that looks like
197/// this:
198///
199/// Loop:
200/// ...
201/// br cond, Loop, Next
202/// ...
203/// br cond2, Loop, Out
204///
205/// To identify this common case, we look at the PHI nodes in the header of the
206/// loop. PHI nodes with unchanging values on one backedge correspond to values
207/// that change in the "outer" loop, but not in the "inner" loop.
208///
209/// If we are able to separate out a loop, return the new outer loop that was
210/// created.
211///
212static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
213 DominatorTree *DT, LoopInfo *LI,
214 ScalarEvolution *SE, bool PreserveLCSSA,
215 AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
216 // Don't try to separate loops without a preheader.
217 if (!Preheader)
218 return nullptr;
219
220 // Treat the presence of convergent functions conservatively. The
221 // transformation is invalid if calls to certain convergent
222 // functions (like an AMDGPU barrier) get included in the resulting
223 // inner loop. But blocks meant for the inner loop will be
224 // identified later at a point where it's too late to abort the
225 // transformation. Also, the convergent attribute is not really
226 // sufficient to express the semantics of functions that are
227 // affected by this transformation. So we choose to back off if such
228 // a function call is present until a better alternative becomes
229 // available. This is similar to the conservative treatment of
230 // convergent function calls in GVNHoist and JumpThreading.
231 for (auto *BB : L->blocks()) {
232 for (auto &II : *BB) {
233 if (auto CI = dyn_cast<CallBase>(Val: &II)) {
234 if (CI->isConvergent()) {
235 return nullptr;
236 }
237 }
238 }
239 }
240
241 // The header is not a landing pad; preheader insertion should ensure this.
242 BasicBlock *Header = L->getHeader();
243 assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
244
245 PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
246 if (!PN) return nullptr; // No known way to partition.
247
248 // Pull out all predecessors that have varying values in the loop. This
249 // handles the case when a PHI node has multiple instances of itself as
250 // arguments.
251 SmallVector<BasicBlock*, 8> OuterLoopPreds;
252 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
253 if (PN->getIncomingValue(i) != PN ||
254 !L->contains(BB: PN->getIncomingBlock(i))) {
255 // We can't split indirect control flow edges.
256 if (isa<IndirectBrInst>(Val: PN->getIncomingBlock(i)->getTerminator()))
257 return nullptr;
258 OuterLoopPreds.push_back(Elt: PN->getIncomingBlock(i));
259 }
260 }
261 LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
262
263 // If ScalarEvolution is around and knows anything about values in
264 // this loop, tell it to forget them, because we're about to
265 // substantially change it.
266 if (SE)
267 SE->forgetLoop(L);
268
269 BasicBlock *NewBB = SplitBlockPredecessors(BB: Header, Preds: OuterLoopPreds, Suffix: ".outer",
270 DT, LI, MSSAU, PreserveLCSSA);
271
272 // Make sure that NewBB is put someplace intelligent, which doesn't mess up
273 // code layout too horribly.
274 placeSplitBlockCarefully(NewBB, SplitPreds&: OuterLoopPreds, L);
275
276 // Create the new outer loop.
277 Loop *NewOuter = LI->AllocateLoop();
278
279 // Change the parent loop to use the outer loop as its child now.
280 if (Loop *Parent = L->getParentLoop())
281 Parent->replaceChildLoopWith(OldChild: L, NewChild: NewOuter);
282 else
283 LI->changeTopLevelLoop(OldLoop: L, NewLoop: NewOuter);
284
285 // L is now a subloop of our outer loop.
286 NewOuter->addChildLoop(NewChild: L);
287
288 for (BasicBlock *BB : L->blocks())
289 NewOuter->addBlockEntry(BB);
290
291 // Now reset the header in L, which had been moved by
292 // SplitBlockPredecessors for the outer loop.
293 L->moveToHeader(BB: Header);
294
295 // Determine which blocks should stay in L and which should be moved out to
296 // the Outer loop now.
297 SmallPtrSet<BasicBlock *, 4> BlocksInL;
298 for (BasicBlock *P : predecessors(BB: Header)) {
299 if (DT->dominates(A: Header, B: P))
300 addBlockAndPredsToSet(InputBB: P, StopBlock: Header, Blocks&: BlocksInL);
301 }
302
303 // Scan all of the loop children of L, moving them to OuterLoop if they are
304 // not part of the inner loop.
305 const std::vector<Loop*> &SubLoops = L->getSubLoops();
306 for (size_t I = 0; I != SubLoops.size(); )
307 if (BlocksInL.count(Ptr: SubLoops[I]->getHeader()))
308 ++I; // Loop remains in L
309 else
310 NewOuter->addChildLoop(NewChild: L->removeChildLoop(I: SubLoops.begin() + I));
311
312 SmallVector<BasicBlock *, 8> OuterLoopBlocks;
313 OuterLoopBlocks.push_back(Elt: NewBB);
314 // Now that we know which blocks are in L and which need to be moved to
315 // OuterLoop, move any blocks that need it.
316 for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
317 BasicBlock *BB = L->getBlocks()[i];
318 if (!BlocksInL.count(Ptr: BB)) {
319 // Move this block to the parent, updating the exit blocks sets
320 L->removeBlockFromLoop(BB);
321 if ((*LI)[BB] == L) {
322 LI->changeLoopFor(BB, L: NewOuter);
323 OuterLoopBlocks.push_back(Elt: BB);
324 }
325 --i;
326 }
327 }
328
329 // Split edges to exit blocks from the inner loop, if they emerged in the
330 // process of separating the outer one.
331 formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
332
333 if (PreserveLCSSA) {
334 // Fix LCSSA form for L. Some values, which previously were only used inside
335 // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
336 // in corresponding exit blocks.
337 // We don't need to form LCSSA recursively, because there cannot be uses
338 // inside a newly created loop of defs from inner loops as those would
339 // already be a use of an LCSSA phi node.
340 formLCSSA(L&: *L, DT: *DT, LI, SE);
341
342 assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
343 "LCSSA is broken after separating nested loops!");
344 }
345
346 return NewOuter;
347}
348
349/// This method is called when the specified loop has more than one
350/// backedge in it.
351///
352/// If this occurs, revector all of these backedges to target a new basic block
353/// and have that block branch to the loop header. This ensures that loops
354/// have exactly one backedge.
355static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
356 DominatorTree *DT, LoopInfo *LI,
357 MemorySSAUpdater *MSSAU) {
358 assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
359
360 // Get information about the loop
361 BasicBlock *Header = L->getHeader();
362 Function *F = Header->getParent();
363
364 // Unique backedge insertion currently depends on having a preheader.
365 if (!Preheader)
366 return nullptr;
367
368 // The header is not an EH pad; preheader insertion should ensure this.
369 assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
370
371 // Figure out which basic blocks contain back-edges to the loop header.
372 std::vector<BasicBlock*> BackedgeBlocks;
373 for (BasicBlock *P : predecessors(BB: Header)) {
374 // Indirect edges cannot be split, so we must fail if we find one.
375 if (isa<IndirectBrInst>(Val: P->getTerminator()))
376 return nullptr;
377
378 if (P != Preheader) BackedgeBlocks.push_back(x: P);
379 }
380
381 // Create and insert the new backedge block...
382 BasicBlock *BEBlock = BasicBlock::Create(Context&: Header->getContext(),
383 Name: Header->getName() + ".backedge", Parent: F);
384 BranchInst *BETerminator = BranchInst::Create(IfTrue: Header, InsertBefore: BEBlock);
385 BETerminator->setDebugLoc(Header->getFirstNonPHIIt()->getDebugLoc());
386
387 LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
388 << BEBlock->getName() << "\n");
389
390 // Move the new backedge block to right after the last backedge block.
391 Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
392 F->splice(ToIt: InsertPos, FromF: F, FromIt: BEBlock->getIterator());
393
394 // Now that the block has been inserted into the function, create PHI nodes in
395 // the backedge block which correspond to any PHI nodes in the header block.
396 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) {
397 PHINode *PN = cast<PHINode>(Val&: I);
398 PHINode *NewPN = PHINode::Create(Ty: PN->getType(), NumReservedValues: BackedgeBlocks.size(),
399 NameStr: PN->getName()+".be", InsertBefore: BETerminator->getIterator());
400
401 // Loop over the PHI node, moving all entries except the one for the
402 // preheader over to the new PHI node.
403 unsigned PreheaderIdx = ~0U;
404 bool HasUniqueIncomingValue = true;
405 Value *UniqueValue = nullptr;
406 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
407 BasicBlock *IBB = PN->getIncomingBlock(i);
408 Value *IV = PN->getIncomingValue(i);
409 if (IBB == Preheader) {
410 PreheaderIdx = i;
411 } else {
412 NewPN->addIncoming(V: IV, BB: IBB);
413 if (HasUniqueIncomingValue) {
414 if (!UniqueValue)
415 UniqueValue = IV;
416 else if (UniqueValue != IV)
417 HasUniqueIncomingValue = false;
418 }
419 }
420 }
421
422 // Delete all of the incoming values from the old PN except the preheader's
423 assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
424 if (PreheaderIdx != 0) {
425 PN->setIncomingValue(i: 0, V: PN->getIncomingValue(i: PreheaderIdx));
426 PN->setIncomingBlock(i: 0, BB: PN->getIncomingBlock(i: PreheaderIdx));
427 }
428 // Nuke all entries except the zero'th.
429 PN->removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != 0; },
430 /* DeletePHIIfEmpty */ false);
431
432 // Finally, add the newly constructed PHI node as the entry for the BEBlock.
433 PN->addIncoming(V: NewPN, BB: BEBlock);
434
435 // As an optimization, if all incoming values in the new PhiNode (which is a
436 // subset of the incoming values of the old PHI node) have the same value,
437 // eliminate the PHI Node.
438 if (HasUniqueIncomingValue) {
439 NewPN->replaceAllUsesWith(V: UniqueValue);
440 NewPN->eraseFromParent();
441 }
442 }
443
444 // Now that all of the PHI nodes have been inserted and adjusted, modify the
445 // backedge blocks to jump to the BEBlock instead of the header.
446 // If one of the backedges has llvm.loop metadata attached, we remove
447 // it from the backedge and add it to BEBlock.
448 MDNode *LoopMD = nullptr;
449 for (BasicBlock *BB : BackedgeBlocks) {
450 Instruction *TI = BB->getTerminator();
451 if (!LoopMD)
452 LoopMD = TI->getMetadata(KindID: LLVMContext::MD_loop);
453 TI->setMetadata(KindID: LLVMContext::MD_loop, Node: nullptr);
454 TI->replaceSuccessorWith(OldBB: Header, NewBB: BEBlock);
455 }
456 BEBlock->getTerminator()->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
457
458 //===--- Update all analyses which we must preserve now -----------------===//
459
460 // Update Loop Information - we know that this block is now in the current
461 // loop and all parent loops.
462 L->addBasicBlockToLoop(NewBB: BEBlock, LI&: *LI);
463
464 // Update dominator information
465 DT->splitBlock(NewBB: BEBlock);
466
467 if (MSSAU)
468 MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(LoopHeader: Header, LoopPreheader: Preheader,
469 BackedgeBlock: BEBlock);
470
471 return BEBlock;
472}
473
474/// Simplify one loop and queue further loops for simplification.
475static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
476 DominatorTree *DT, LoopInfo *LI,
477 ScalarEvolution *SE, AssumptionCache *AC,
478 MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
479 bool Changed = false;
480 if (MSSAU && VerifyMemorySSA)
481 MSSAU->getMemorySSA()->verifyMemorySSA();
482
483ReprocessLoop:
484
485 // Check to see that no blocks (other than the header) in this loop have
486 // predecessors that are not in the loop. This is not valid for natural
487 // loops, but can occur if the blocks are unreachable. Since they are
488 // unreachable we can just shamelessly delete those CFG edges!
489 for (BasicBlock *BB : L->blocks()) {
490 if (BB == L->getHeader())
491 continue;
492
493 SmallPtrSet<BasicBlock*, 4> BadPreds;
494 for (BasicBlock *P : predecessors(BB))
495 if (!L->contains(BB: P))
496 BadPreds.insert(Ptr: P);
497
498 // Delete each unique out-of-loop (and thus dead) predecessor.
499 for (BasicBlock *P : BadPreds) {
500
501 LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
502 << P->getName() << "\n");
503
504 // Zap the dead pred's terminator and replace it with unreachable.
505 Instruction *TI = P->getTerminator();
506 changeToUnreachable(I: TI, PreserveLCSSA,
507 /*DTU=*/nullptr, MSSAU);
508 Changed = true;
509 }
510 }
511
512 if (MSSAU && VerifyMemorySSA)
513 MSSAU->getMemorySSA()->verifyMemorySSA();
514
515 // If there are exiting blocks with branches on undef, resolve the undef in
516 // the direction which will exit the loop. This will help simplify loop
517 // trip count computations.
518 SmallVector<BasicBlock*, 8> ExitingBlocks;
519 L->getExitingBlocks(ExitingBlocks);
520 for (BasicBlock *ExitingBlock : ExitingBlocks)
521 if (BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator()))
522 if (BI->isConditional()) {
523 if (UndefValue *Cond = dyn_cast<UndefValue>(Val: BI->getCondition())) {
524
525 LLVM_DEBUG(dbgs()
526 << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
527 << ExitingBlock->getName() << "\n");
528
529 BI->setCondition(ConstantInt::get(Ty: Cond->getType(),
530 V: !L->contains(BB: BI->getSuccessor(i: 0))));
531
532 Changed = true;
533 }
534 }
535
536 // Does the loop already have a preheader? If so, don't insert one.
537 BasicBlock *Preheader = L->getLoopPreheader();
538 if (!Preheader) {
539 Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
540 if (Preheader)
541 Changed = true;
542 }
543
544 // Next, check to make sure that all exit nodes of the loop only have
545 // predecessors that are inside of the loop. This check guarantees that the
546 // loop preheader/header will dominate the exit blocks. If the exit block has
547 // predecessors from outside of the loop, split the edge now.
548 if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
549 Changed = true;
550
551 if (MSSAU && VerifyMemorySSA)
552 MSSAU->getMemorySSA()->verifyMemorySSA();
553
554 // If the header has more than two predecessors at this point (from the
555 // preheader and from multiple backedges), we must adjust the loop.
556 BasicBlock *LoopLatch = L->getLoopLatch();
557 if (!LoopLatch) {
558 // If this is really a nested loop, rip it out into a child loop. Don't do
559 // this for loops with a giant number of backedges, just factor them into a
560 // common backedge instead.
561 if (L->getNumBackEdges() < 8) {
562 if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
563 PreserveLCSSA, AC, MSSAU)) {
564 ++NumNested;
565 // Enqueue the outer loop as it should be processed next in our
566 // depth-first nest walk.
567 Worklist.push_back(Elt: OuterL);
568
569 // This is a big restructuring change, reprocess the whole loop.
570 Changed = true;
571 // GCC doesn't tail recursion eliminate this.
572 // FIXME: It isn't clear we can't rely on LLVM to TRE this.
573 goto ReprocessLoop;
574 }
575 }
576
577 // If we either couldn't, or didn't want to, identify nesting of the loops,
578 // insert a new block that all backedges target, then make it jump to the
579 // loop header.
580 LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
581 if (LoopLatch)
582 Changed = true;
583 }
584
585 if (MSSAU && VerifyMemorySSA)
586 MSSAU->getMemorySSA()->verifyMemorySSA();
587
588 const DataLayout &DL = L->getHeader()->getDataLayout();
589
590 // Scan over the PHI nodes in the loop header. Since they now have only two
591 // incoming values (the loop is canonicalized), we may have simplified the PHI
592 // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
593 PHINode *PN;
594 for (BasicBlock::iterator I = L->getHeader()->begin();
595 (PN = dyn_cast<PHINode>(Val: I++)); )
596 if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) {
597 if (SE) SE->forgetValue(V: PN);
598 if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(From: PN, To: V)) {
599 PN->replaceAllUsesWith(V);
600 PN->eraseFromParent();
601 Changed = true;
602 }
603 }
604
605 // If this loop has multiple exits and the exits all go to the same
606 // block, attempt to merge the exits. This helps several passes, such
607 // as LoopRotation, which do not support loops with multiple exits.
608 // SimplifyCFG also does this (and this code uses the same utility
609 // function), however this code is loop-aware, where SimplifyCFG is
610 // not. That gives it the advantage of being able to hoist
611 // loop-invariant instructions out of the way to open up more
612 // opportunities, and the disadvantage of having the responsibility
613 // to preserve dominator information.
614 auto HasUniqueExitBlock = [&]() {
615 BasicBlock *UniqueExit = nullptr;
616 for (auto *ExitingBB : ExitingBlocks)
617 for (auto *SuccBB : successors(BB: ExitingBB)) {
618 if (L->contains(BB: SuccBB))
619 continue;
620
621 if (!UniqueExit)
622 UniqueExit = SuccBB;
623 else if (UniqueExit != SuccBB)
624 return false;
625 }
626
627 return true;
628 };
629 if (HasUniqueExitBlock()) {
630 for (BasicBlock *ExitingBlock : ExitingBlocks) {
631 if (!ExitingBlock->getSinglePredecessor()) continue;
632 BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator());
633 if (!BI || !BI->isConditional()) continue;
634 CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition());
635 if (!CI || CI->getParent() != ExitingBlock) continue;
636
637 // Attempt to hoist out all instructions except for the
638 // comparison and the branch.
639 bool AllInvariant = true;
640 bool AnyInvariant = false;
641 for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
642 Instruction *Inst = &*I++;
643 if (Inst == CI)
644 continue;
645 if (!L->makeLoopInvariant(
646 I: Inst, Changed&: AnyInvariant,
647 InsertPt: Preheader ? Preheader->getTerminator() : nullptr, MSSAU, SE)) {
648 AllInvariant = false;
649 break;
650 }
651 }
652 if (AnyInvariant)
653 Changed = true;
654 if (!AllInvariant) continue;
655
656 // The block has now been cleared of all instructions except for
657 // a comparison and a conditional branch. SimplifyCFG may be able
658 // to fold it now.
659 if (!foldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
660 continue;
661
662 // Success. The block is now dead, so remove it from the loop,
663 // update the dominator tree and delete it.
664 LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
665 << ExitingBlock->getName() << "\n");
666
667 assert(pred_empty(ExitingBlock));
668 Changed = true;
669 LI->removeBlock(BB: ExitingBlock);
670
671 DomTreeNode *Node = DT->getNode(BB: ExitingBlock);
672 while (!Node->isLeaf()) {
673 DomTreeNode *Child = Node->back();
674 DT->changeImmediateDominator(N: Child, NewIDom: Node->getIDom());
675 }
676 DT->eraseNode(BB: ExitingBlock);
677 if (MSSAU) {
678 SmallSetVector<BasicBlock *, 8> ExitBlockSet;
679 ExitBlockSet.insert(X: ExitingBlock);
680 MSSAU->removeBlocks(DeadBlocks: ExitBlockSet);
681 }
682
683 BI->getSuccessor(i: 0)->removePredecessor(
684 Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
685 BI->getSuccessor(i: 1)->removePredecessor(
686 Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
687 ExitingBlock->eraseFromParent();
688 }
689 }
690
691 if (MSSAU && VerifyMemorySSA)
692 MSSAU->getMemorySSA()->verifyMemorySSA();
693
694 return Changed;
695}
696
697bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
698 ScalarEvolution *SE, AssumptionCache *AC,
699 MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
700 bool Changed = false;
701
702#ifndef NDEBUG
703 // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
704 // form.
705 if (PreserveLCSSA) {
706 assert(DT && "DT not available.");
707 assert(LI && "LI not available.");
708 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
709 "Requested to preserve LCSSA, but it's already broken.");
710 }
711#endif
712
713 // Worklist maintains our depth-first queue of loops in this nest to process.
714 SmallVector<Loop *, 4> Worklist;
715 Worklist.push_back(Elt: L);
716
717 // Walk the worklist from front to back, pushing newly found sub loops onto
718 // the back. This will let us process loops from back to front in depth-first
719 // order. We can use this simple process because loops form a tree.
720 for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
721 Loop *L2 = Worklist[Idx];
722 Worklist.append(in_start: L2->begin(), in_end: L2->end());
723 }
724
725 while (!Worklist.empty())
726 Changed |= simplifyOneLoop(L: Worklist.pop_back_val(), Worklist, DT, LI, SE,
727 AC, MSSAU, PreserveLCSSA);
728
729 // Changing exit conditions for blocks may affect exit counts of this loop and
730 // any of its parents, so we must invalidate the entire subtree if we've made
731 // any changes. Do this here rather than in simplifyOneLoop() as the top-most
732 // loop is going to be the same for all child loops.
733 if (Changed && SE)
734 SE->forgetTopmostLoop(L);
735
736 return Changed;
737}
738
739namespace {
740 struct LoopSimplify : public FunctionPass {
741 static char ID; // Pass identification, replacement for typeid
742 LoopSimplify() : FunctionPass(ID) {
743 initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
744 }
745
746 bool runOnFunction(Function &F) override;
747
748 void getAnalysisUsage(AnalysisUsage &AU) const override {
749 AU.addRequired<AssumptionCacheTracker>();
750
751 // We need loop information to identify the loops...
752 AU.addRequired<DominatorTreeWrapperPass>();
753 AU.addPreserved<DominatorTreeWrapperPass>();
754
755 AU.addRequired<LoopInfoWrapperPass>();
756 AU.addPreserved<LoopInfoWrapperPass>();
757
758 AU.addPreserved<BasicAAWrapperPass>();
759 AU.addPreserved<AAResultsWrapperPass>();
760 AU.addPreserved<GlobalsAAWrapperPass>();
761 AU.addPreserved<ScalarEvolutionWrapperPass>();
762 AU.addPreserved<SCEVAAWrapperPass>();
763 AU.addPreservedID(ID&: LCSSAID);
764 AU.addPreservedID(ID&: BreakCriticalEdgesID); // No critical edges added.
765 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
766 AU.addPreserved<MemorySSAWrapperPass>();
767 }
768
769 /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
770 void verifyAnalysis() const override;
771 };
772}
773
774char LoopSimplify::ID = 0;
775INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
776 "Canonicalize natural loops", false, false)
777INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
778INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
779INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
780INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops",
781 false, false)
782
783// Publicly exposed interface to pass...
784char &llvm::LoopSimplifyID = LoopSimplify::ID;
785Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
786
787/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
788/// it in any convenient order) inserting preheaders...
789///
790bool LoopSimplify::runOnFunction(Function &F) {
791 bool Changed = false;
792 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
793 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
794 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
795 ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
796 AssumptionCache *AC =
797 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
798 MemorySSA *MSSA = nullptr;
799 std::unique_ptr<MemorySSAUpdater> MSSAU;
800 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
801 if (MSSAAnalysis) {
802 MSSA = &MSSAAnalysis->getMSSA();
803 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
804 }
805
806 bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID);
807
808 // Simplify each loop nest in the function.
809 for (auto *L : *LI)
810 Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), PreserveLCSSA);
811
812#ifndef NDEBUG
813 if (PreserveLCSSA) {
814 bool InLCSSA = all_of(
815 *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
816 assert(InLCSSA && "LCSSA is broken after loop-simplify.");
817 }
818#endif
819 return Changed;
820}
821
822PreservedAnalyses LoopSimplifyPass::run(Function &F,
823 FunctionAnalysisManager &AM) {
824 bool Changed = false;
825 LoopInfo *LI = &AM.getResult<LoopAnalysis>(IR&: F);
826 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
827 ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(IR&: F);
828 AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(IR&: F);
829 auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(IR&: F);
830 std::unique_ptr<MemorySSAUpdater> MSSAU;
831 if (MSSAAnalysis) {
832 auto *MSSA = &MSSAAnalysis->getMSSA();
833 MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
834 }
835
836
837 // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
838 // after simplifying the loops. MemorySSA is preserved if it exists.
839 for (auto *L : *LI)
840 Changed |=
841 simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), /*PreserveLCSSA*/ false);
842
843 if (!Changed)
844 return PreservedAnalyses::all();
845
846 PreservedAnalyses PA;
847 PA.preserve<DominatorTreeAnalysis>();
848 PA.preserve<LoopAnalysis>();
849 PA.preserve<ScalarEvolutionAnalysis>();
850 if (MSSAAnalysis)
851 PA.preserve<MemorySSAAnalysis>();
852 // BPI maps conditional terminators to probabilities, LoopSimplify can insert
853 // blocks, but it does so only by splitting existing blocks and edges. This
854 // results in the interesting property that all new terminators inserted are
855 // unconditional branches which do not appear in BPI. All deletions are
856 // handled via ValueHandle callbacks w/in BPI.
857 PA.preserve<BranchProbabilityAnalysis>();
858 return PA;
859}
860
861// FIXME: Restore this code when we re-enable verification in verifyAnalysis
862// below.
863#if 0
864static void verifyLoop(Loop *L) {
865 // Verify subloops.
866 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
867 verifyLoop(*I);
868
869 // It used to be possible to just assert L->isLoopSimplifyForm(), however
870 // with the introduction of indirectbr, there are now cases where it's
871 // not possible to transform a loop as necessary. We can at least check
872 // that there is an indirectbr near any time there's trouble.
873
874 // Indirectbr can interfere with preheader and unique backedge insertion.
875 if (!L->getLoopPreheader() || !L->getLoopLatch()) {
876 bool HasIndBrPred = false;
877 for (BasicBlock *Pred : predecessors(L->getHeader()))
878 if (isa<IndirectBrInst>(Pred->getTerminator())) {
879 HasIndBrPred = true;
880 break;
881 }
882 assert(HasIndBrPred &&
883 "LoopSimplify has no excuse for missing loop header info!");
884 (void)HasIndBrPred;
885 }
886
887 // Indirectbr can interfere with exit block canonicalization.
888 if (!L->hasDedicatedExits()) {
889 bool HasIndBrExiting = false;
890 SmallVector<BasicBlock*, 8> ExitingBlocks;
891 L->getExitingBlocks(ExitingBlocks);
892 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
893 if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
894 HasIndBrExiting = true;
895 break;
896 }
897 }
898
899 assert(HasIndBrExiting &&
900 "LoopSimplify has no excuse for missing exit block info!");
901 (void)HasIndBrExiting;
902 }
903}
904#endif
905
906void LoopSimplify::verifyAnalysis() const {
907 // FIXME: This routine is being called mid-way through the loop pass manager
908 // as loop passes destroy this analysis. That's actually fine, but we have no
909 // way of expressing that here. Once all of the passes that destroy this are
910 // hoisted out of the loop pass manager we can add back verification here.
911#if 0
912 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
913 verifyLoop(*I);
914#endif
915}
916