1 | //===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass performs several transformations to transform natural loops into a |
10 | // simpler form, which makes subsequent analyses and transformations simpler and |
11 | // more effective. |
12 | // |
13 | // Loop pre-header insertion guarantees that there is a single, non-critical |
14 | // entry edge from outside of the loop to the loop header. This simplifies a |
15 | // number of analyses and transformations, such as LICM. |
16 | // |
17 | // Loop exit-block insertion guarantees that all exit blocks from the loop |
18 | // (blocks which are outside of the loop that have predecessors inside of the |
19 | // loop) only have predecessors from inside of the loop (and are thus dominated |
20 | // by the loop header). This simplifies transformations such as store-sinking |
21 | // that are built into LICM. |
22 | // |
23 | // This pass also guarantees that loops will have exactly one backedge. |
24 | // |
25 | // Indirectbr instructions introduce several complications. If the loop |
26 | // contains or is entered by an indirectbr instruction, it may not be possible |
27 | // to transform the loop and make these guarantees. Client code should check |
28 | // that these conditions are true before relying on them. |
29 | // |
30 | // Similar complications arise from callbr instructions, particularly in |
31 | // asm-goto where blockaddress expressions are used. |
32 | // |
33 | // Note that the simplifycfg pass will clean up blocks which are split out but |
34 | // end up being unnecessary, so usage of this pass should not pessimize |
35 | // generated code. |
36 | // |
37 | // This pass obviously modifies the CFG, but updates loop information and |
38 | // dominator information. |
39 | // |
40 | //===----------------------------------------------------------------------===// |
41 | |
42 | #include "llvm/Transforms/Utils/LoopSimplify.h" |
43 | #include "llvm/ADT/SetVector.h" |
44 | #include "llvm/ADT/SmallVector.h" |
45 | #include "llvm/ADT/Statistic.h" |
46 | #include "llvm/Analysis/AliasAnalysis.h" |
47 | #include "llvm/Analysis/AssumptionCache.h" |
48 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
49 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
50 | #include "llvm/Analysis/DependenceAnalysis.h" |
51 | #include "llvm/Analysis/GlobalsModRef.h" |
52 | #include "llvm/Analysis/InstructionSimplify.h" |
53 | #include "llvm/Analysis/LoopInfo.h" |
54 | #include "llvm/Analysis/MemorySSA.h" |
55 | #include "llvm/Analysis/MemorySSAUpdater.h" |
56 | #include "llvm/Analysis/ScalarEvolution.h" |
57 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" |
58 | #include "llvm/IR/CFG.h" |
59 | #include "llvm/IR/Constants.h" |
60 | #include "llvm/IR/Dominators.h" |
61 | #include "llvm/IR/Function.h" |
62 | #include "llvm/IR/Instructions.h" |
63 | #include "llvm/IR/LLVMContext.h" |
64 | #include "llvm/IR/Module.h" |
65 | #include "llvm/InitializePasses.h" |
66 | #include "llvm/Support/Debug.h" |
67 | #include "llvm/Support/raw_ostream.h" |
68 | #include "llvm/Transforms/Utils.h" |
69 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
70 | #include "llvm/Transforms/Utils/Local.h" |
71 | #include "llvm/Transforms/Utils/LoopUtils.h" |
72 | using namespace llvm; |
73 | |
74 | #define DEBUG_TYPE "loop-simplify" |
75 | |
76 | STATISTIC(NumNested , "Number of nested loops split out" ); |
77 | |
78 | // If the block isn't already, move the new block to right after some 'outside |
79 | // block' block. This prevents the preheader from being placed inside the loop |
80 | // body, e.g. when the loop hasn't been rotated. |
81 | static void placeSplitBlockCarefully(BasicBlock *NewBB, |
82 | SmallVectorImpl<BasicBlock *> &SplitPreds, |
83 | Loop *L) { |
84 | // Check to see if NewBB is already well placed. |
85 | Function::iterator BBI = --NewBB->getIterator(); |
86 | for (BasicBlock *Pred : SplitPreds) { |
87 | if (&*BBI == Pred) |
88 | return; |
89 | } |
90 | |
91 | // If it isn't already after an outside block, move it after one. This is |
92 | // always good as it makes the uncond branch from the outside block into a |
93 | // fall-through. |
94 | |
95 | // Figure out *which* outside block to put this after. Prefer an outside |
96 | // block that neighbors a BB actually in the loop. |
97 | BasicBlock *FoundBB = nullptr; |
98 | for (BasicBlock *Pred : SplitPreds) { |
99 | Function::iterator BBI = Pred->getIterator(); |
100 | if (++BBI != NewBB->getParent()->end() && L->contains(BB: &*BBI)) { |
101 | FoundBB = Pred; |
102 | break; |
103 | } |
104 | } |
105 | |
106 | // If our heuristic for a *good* bb to place this after doesn't find |
107 | // anything, just pick something. It's likely better than leaving it within |
108 | // the loop. |
109 | if (!FoundBB) |
110 | FoundBB = SplitPreds[0]; |
111 | NewBB->moveAfter(MovePos: FoundBB); |
112 | } |
113 | |
114 | /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a |
115 | /// preheader, this method is called to insert one. This method has two phases: |
116 | /// preheader insertion and analysis updating. |
117 | /// |
118 | BasicBlock *llvm::(Loop *L, DominatorTree *DT, |
119 | LoopInfo *LI, MemorySSAUpdater *MSSAU, |
120 | bool PreserveLCSSA) { |
121 | BasicBlock * = L->getHeader(); |
122 | |
123 | // Compute the set of predecessors of the loop that are not in the loop. |
124 | SmallVector<BasicBlock*, 8> OutsideBlocks; |
125 | for (BasicBlock *P : predecessors(BB: Header)) { |
126 | if (!L->contains(BB: P)) { // Coming in from outside the loop? |
127 | // If the loop is branched to from an indirect terminator, we won't |
128 | // be able to fully transform the loop, because it prohibits |
129 | // edge splitting. |
130 | if (isa<IndirectBrInst>(Val: P->getTerminator())) |
131 | return nullptr; |
132 | |
133 | // Keep track of it. |
134 | OutsideBlocks.push_back(Elt: P); |
135 | } |
136 | } |
137 | |
138 | // Split out the loop pre-header. |
139 | BasicBlock *; |
140 | PreheaderBB = SplitBlockPredecessors(BB: Header, Preds: OutsideBlocks, Suffix: ".preheader" , DT, |
141 | LI, MSSAU, PreserveLCSSA); |
142 | if (!PreheaderBB) |
143 | return nullptr; |
144 | |
145 | LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " |
146 | << PreheaderBB->getName() << "\n" ); |
147 | |
148 | // Make sure that NewBB is put someplace intelligent, which doesn't mess up |
149 | // code layout too horribly. |
150 | placeSplitBlockCarefully(NewBB: PreheaderBB, SplitPreds&: OutsideBlocks, L); |
151 | |
152 | return PreheaderBB; |
153 | } |
154 | |
155 | /// Add the specified block, and all of its predecessors, to the specified set, |
156 | /// if it's not already in there. Stop predecessor traversal when we reach |
157 | /// StopBlock. |
158 | static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, |
159 | SmallPtrSetImpl<BasicBlock *> &Blocks) { |
160 | SmallVector<BasicBlock *, 8> Worklist; |
161 | Worklist.push_back(Elt: InputBB); |
162 | do { |
163 | BasicBlock *BB = Worklist.pop_back_val(); |
164 | if (Blocks.insert(Ptr: BB).second && BB != StopBlock) |
165 | // If BB is not already processed and it is not a stop block then |
166 | // insert its predecessor in the work list |
167 | append_range(C&: Worklist, R: predecessors(BB)); |
168 | } while (!Worklist.empty()); |
169 | } |
170 | |
171 | /// The first part of loop-nestification is to find a PHI node that tells |
172 | /// us how to partition the loops. |
173 | static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, |
174 | AssumptionCache *AC) { |
175 | const DataLayout &DL = L->getHeader()->getDataLayout(); |
176 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ) { |
177 | PHINode *PN = cast<PHINode>(Val&: I); |
178 | ++I; |
179 | if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) { |
180 | // This is a degenerate PHI already, don't modify it! |
181 | PN->replaceAllUsesWith(V); |
182 | PN->eraseFromParent(); |
183 | continue; |
184 | } |
185 | |
186 | // Scan this PHI node looking for a use of the PHI node by itself. |
187 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
188 | if (PN->getIncomingValue(i) == PN && |
189 | L->contains(BB: PN->getIncomingBlock(i))) |
190 | // We found something tasty to remove. |
191 | return PN; |
192 | } |
193 | return nullptr; |
194 | } |
195 | |
196 | /// If this loop has multiple backedges, try to pull one of them out into |
197 | /// a nested loop. |
198 | /// |
199 | /// This is important for code that looks like |
200 | /// this: |
201 | /// |
202 | /// Loop: |
203 | /// ... |
204 | /// br cond, Loop, Next |
205 | /// ... |
206 | /// br cond2, Loop, Out |
207 | /// |
208 | /// To identify this common case, we look at the PHI nodes in the header of the |
209 | /// loop. PHI nodes with unchanging values on one backedge correspond to values |
210 | /// that change in the "outer" loop, but not in the "inner" loop. |
211 | /// |
212 | /// If we are able to separate out a loop, return the new outer loop that was |
213 | /// created. |
214 | /// |
215 | static Loop *separateNestedLoop(Loop *L, BasicBlock *, |
216 | DominatorTree *DT, LoopInfo *LI, |
217 | ScalarEvolution *SE, bool PreserveLCSSA, |
218 | AssumptionCache *AC, MemorySSAUpdater *MSSAU) { |
219 | // Don't try to separate loops without a preheader. |
220 | if (!Preheader) |
221 | return nullptr; |
222 | |
223 | // Treat the presence of convergent functions conservatively. The |
224 | // transformation is invalid if calls to certain convergent |
225 | // functions (like an AMDGPU barrier) get included in the resulting |
226 | // inner loop. But blocks meant for the inner loop will be |
227 | // identified later at a point where it's too late to abort the |
228 | // transformation. Also, the convergent attribute is not really |
229 | // sufficient to express the semantics of functions that are |
230 | // affected by this transformation. So we choose to back off if such |
231 | // a function call is present until a better alternative becomes |
232 | // available. This is similar to the conservative treatment of |
233 | // convergent function calls in GVNHoist and JumpThreading. |
234 | for (auto *BB : L->blocks()) { |
235 | for (auto &II : *BB) { |
236 | if (auto CI = dyn_cast<CallBase>(Val: &II)) { |
237 | if (CI->isConvergent()) { |
238 | return nullptr; |
239 | } |
240 | } |
241 | } |
242 | } |
243 | |
244 | // The header is not a landing pad; preheader insertion should ensure this. |
245 | BasicBlock * = L->getHeader(); |
246 | assert(!Header->isEHPad() && "Can't insert backedge to EH pad" ); |
247 | |
248 | PHINode *PN = findPHIToPartitionLoops(L, DT, AC); |
249 | if (!PN) return nullptr; // No known way to partition. |
250 | |
251 | // Pull out all predecessors that have varying values in the loop. This |
252 | // handles the case when a PHI node has multiple instances of itself as |
253 | // arguments. |
254 | SmallVector<BasicBlock*, 8> OuterLoopPreds; |
255 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
256 | if (PN->getIncomingValue(i) != PN || |
257 | !L->contains(BB: PN->getIncomingBlock(i))) { |
258 | // We can't split indirect control flow edges. |
259 | if (isa<IndirectBrInst>(Val: PN->getIncomingBlock(i)->getTerminator())) |
260 | return nullptr; |
261 | OuterLoopPreds.push_back(Elt: PN->getIncomingBlock(i)); |
262 | } |
263 | } |
264 | LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n" ); |
265 | |
266 | // If ScalarEvolution is around and knows anything about values in |
267 | // this loop, tell it to forget them, because we're about to |
268 | // substantially change it. |
269 | if (SE) |
270 | SE->forgetLoop(L); |
271 | |
272 | BasicBlock *NewBB = SplitBlockPredecessors(BB: Header, Preds: OuterLoopPreds, Suffix: ".outer" , |
273 | DT, LI, MSSAU, PreserveLCSSA); |
274 | |
275 | // Make sure that NewBB is put someplace intelligent, which doesn't mess up |
276 | // code layout too horribly. |
277 | placeSplitBlockCarefully(NewBB, SplitPreds&: OuterLoopPreds, L); |
278 | |
279 | // Create the new outer loop. |
280 | Loop *NewOuter = LI->AllocateLoop(); |
281 | |
282 | // Change the parent loop to use the outer loop as its child now. |
283 | if (Loop *Parent = L->getParentLoop()) |
284 | Parent->replaceChildLoopWith(OldChild: L, NewChild: NewOuter); |
285 | else |
286 | LI->changeTopLevelLoop(OldLoop: L, NewLoop: NewOuter); |
287 | |
288 | // L is now a subloop of our outer loop. |
289 | NewOuter->addChildLoop(NewChild: L); |
290 | |
291 | for (BasicBlock *BB : L->blocks()) |
292 | NewOuter->addBlockEntry(BB); |
293 | |
294 | // Now reset the header in L, which had been moved by |
295 | // SplitBlockPredecessors for the outer loop. |
296 | L->moveToHeader(BB: Header); |
297 | |
298 | // Determine which blocks should stay in L and which should be moved out to |
299 | // the Outer loop now. |
300 | SmallPtrSet<BasicBlock *, 4> BlocksInL; |
301 | for (BasicBlock *P : predecessors(BB: Header)) { |
302 | if (DT->dominates(A: Header, B: P)) |
303 | addBlockAndPredsToSet(InputBB: P, StopBlock: Header, Blocks&: BlocksInL); |
304 | } |
305 | |
306 | // Scan all of the loop children of L, moving them to OuterLoop if they are |
307 | // not part of the inner loop. |
308 | const std::vector<Loop*> &SubLoops = L->getSubLoops(); |
309 | for (size_t I = 0; I != SubLoops.size(); ) |
310 | if (BlocksInL.count(Ptr: SubLoops[I]->getHeader())) |
311 | ++I; // Loop remains in L |
312 | else |
313 | NewOuter->addChildLoop(NewChild: L->removeChildLoop(I: SubLoops.begin() + I)); |
314 | |
315 | SmallVector<BasicBlock *, 8> OuterLoopBlocks; |
316 | OuterLoopBlocks.push_back(Elt: NewBB); |
317 | // Now that we know which blocks are in L and which need to be moved to |
318 | // OuterLoop, move any blocks that need it. |
319 | for (unsigned i = 0; i != L->getBlocks().size(); ++i) { |
320 | BasicBlock *BB = L->getBlocks()[i]; |
321 | if (!BlocksInL.count(Ptr: BB)) { |
322 | // Move this block to the parent, updating the exit blocks sets |
323 | L->removeBlockFromLoop(BB); |
324 | if ((*LI)[BB] == L) { |
325 | LI->changeLoopFor(BB, L: NewOuter); |
326 | OuterLoopBlocks.push_back(Elt: BB); |
327 | } |
328 | --i; |
329 | } |
330 | } |
331 | |
332 | // Split edges to exit blocks from the inner loop, if they emerged in the |
333 | // process of separating the outer one. |
334 | formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); |
335 | |
336 | if (PreserveLCSSA) { |
337 | // Fix LCSSA form for L. Some values, which previously were only used inside |
338 | // L, can now be used in NewOuter loop. We need to insert phi-nodes for them |
339 | // in corresponding exit blocks. |
340 | // We don't need to form LCSSA recursively, because there cannot be uses |
341 | // inside a newly created loop of defs from inner loops as those would |
342 | // already be a use of an LCSSA phi node. |
343 | formLCSSA(L&: *L, DT: *DT, LI, SE); |
344 | |
345 | assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && |
346 | "LCSSA is broken after separating nested loops!" ); |
347 | } |
348 | |
349 | return NewOuter; |
350 | } |
351 | |
352 | /// This method is called when the specified loop has more than one |
353 | /// backedge in it. |
354 | /// |
355 | /// If this occurs, revector all of these backedges to target a new basic block |
356 | /// and have that block branch to the loop header. This ensures that loops |
357 | /// have exactly one backedge. |
358 | static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *, |
359 | DominatorTree *DT, LoopInfo *LI, |
360 | MemorySSAUpdater *MSSAU) { |
361 | assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!" ); |
362 | |
363 | // Get information about the loop |
364 | BasicBlock * = L->getHeader(); |
365 | Function *F = Header->getParent(); |
366 | |
367 | // Unique backedge insertion currently depends on having a preheader. |
368 | if (!Preheader) |
369 | return nullptr; |
370 | |
371 | // The header is not an EH pad; preheader insertion should ensure this. |
372 | assert(!Header->isEHPad() && "Can't insert backedge to EH pad" ); |
373 | |
374 | // Figure out which basic blocks contain back-edges to the loop header. |
375 | std::vector<BasicBlock*> BackedgeBlocks; |
376 | for (BasicBlock *P : predecessors(BB: Header)) { |
377 | // Indirect edges cannot be split, so we must fail if we find one. |
378 | if (isa<IndirectBrInst>(Val: P->getTerminator())) |
379 | return nullptr; |
380 | |
381 | if (P != Preheader) BackedgeBlocks.push_back(x: P); |
382 | } |
383 | |
384 | // Create and insert the new backedge block... |
385 | BasicBlock *BEBlock = BasicBlock::Create(Context&: Header->getContext(), |
386 | Name: Header->getName() + ".backedge" , Parent: F); |
387 | BranchInst *BETerminator = BranchInst::Create(IfTrue: Header, InsertBefore: BEBlock); |
388 | BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); |
389 | |
390 | LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " |
391 | << BEBlock->getName() << "\n" ); |
392 | |
393 | // Move the new backedge block to right after the last backedge block. |
394 | Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); |
395 | F->splice(ToIt: InsertPos, FromF: F, FromIt: BEBlock->getIterator()); |
396 | |
397 | // Now that the block has been inserted into the function, create PHI nodes in |
398 | // the backedge block which correspond to any PHI nodes in the header block. |
399 | for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) { |
400 | PHINode *PN = cast<PHINode>(Val&: I); |
401 | PHINode *NewPN = PHINode::Create(Ty: PN->getType(), NumReservedValues: BackedgeBlocks.size(), |
402 | NameStr: PN->getName()+".be" , InsertBefore: BETerminator->getIterator()); |
403 | |
404 | // Loop over the PHI node, moving all entries except the one for the |
405 | // preheader over to the new PHI node. |
406 | unsigned = ~0U; |
407 | bool HasUniqueIncomingValue = true; |
408 | Value *UniqueValue = nullptr; |
409 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
410 | BasicBlock *IBB = PN->getIncomingBlock(i); |
411 | Value *IV = PN->getIncomingValue(i); |
412 | if (IBB == Preheader) { |
413 | PreheaderIdx = i; |
414 | } else { |
415 | NewPN->addIncoming(V: IV, BB: IBB); |
416 | if (HasUniqueIncomingValue) { |
417 | if (!UniqueValue) |
418 | UniqueValue = IV; |
419 | else if (UniqueValue != IV) |
420 | HasUniqueIncomingValue = false; |
421 | } |
422 | } |
423 | } |
424 | |
425 | // Delete all of the incoming values from the old PN except the preheader's |
426 | assert(PreheaderIdx != ~0U && "PHI has no preheader entry??" ); |
427 | if (PreheaderIdx != 0) { |
428 | PN->setIncomingValue(i: 0, V: PN->getIncomingValue(i: PreheaderIdx)); |
429 | PN->setIncomingBlock(i: 0, BB: PN->getIncomingBlock(i: PreheaderIdx)); |
430 | } |
431 | // Nuke all entries except the zero'th. |
432 | PN->removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != 0; }, |
433 | /* DeletePHIIfEmpty */ false); |
434 | |
435 | // Finally, add the newly constructed PHI node as the entry for the BEBlock. |
436 | PN->addIncoming(V: NewPN, BB: BEBlock); |
437 | |
438 | // As an optimization, if all incoming values in the new PhiNode (which is a |
439 | // subset of the incoming values of the old PHI node) have the same value, |
440 | // eliminate the PHI Node. |
441 | if (HasUniqueIncomingValue) { |
442 | NewPN->replaceAllUsesWith(V: UniqueValue); |
443 | NewPN->eraseFromParent(); |
444 | } |
445 | } |
446 | |
447 | // Now that all of the PHI nodes have been inserted and adjusted, modify the |
448 | // backedge blocks to jump to the BEBlock instead of the header. |
449 | // If one of the backedges has llvm.loop metadata attached, we remove |
450 | // it from the backedge and add it to BEBlock. |
451 | MDNode *LoopMD = nullptr; |
452 | for (BasicBlock *BB : BackedgeBlocks) { |
453 | Instruction *TI = BB->getTerminator(); |
454 | if (!LoopMD) |
455 | LoopMD = TI->getMetadata(KindID: LLVMContext::MD_loop); |
456 | TI->setMetadata(KindID: LLVMContext::MD_loop, Node: nullptr); |
457 | TI->replaceSuccessorWith(OldBB: Header, NewBB: BEBlock); |
458 | } |
459 | BEBlock->getTerminator()->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD); |
460 | |
461 | //===--- Update all analyses which we must preserve now -----------------===// |
462 | |
463 | // Update Loop Information - we know that this block is now in the current |
464 | // loop and all parent loops. |
465 | L->addBasicBlockToLoop(NewBB: BEBlock, LI&: *LI); |
466 | |
467 | // Update dominator information |
468 | DT->splitBlock(NewBB: BEBlock); |
469 | |
470 | if (MSSAU) |
471 | MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(LoopHeader: Header, LoopPreheader: Preheader, |
472 | BackedgeBlock: BEBlock); |
473 | |
474 | return BEBlock; |
475 | } |
476 | |
477 | /// Simplify one loop and queue further loops for simplification. |
478 | static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, |
479 | DominatorTree *DT, LoopInfo *LI, |
480 | ScalarEvolution *SE, AssumptionCache *AC, |
481 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { |
482 | bool Changed = false; |
483 | if (MSSAU && VerifyMemorySSA) |
484 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
485 | |
486 | ReprocessLoop: |
487 | |
488 | // Check to see that no blocks (other than the header) in this loop have |
489 | // predecessors that are not in the loop. This is not valid for natural |
490 | // loops, but can occur if the blocks are unreachable. Since they are |
491 | // unreachable we can just shamelessly delete those CFG edges! |
492 | for (BasicBlock *BB : L->blocks()) { |
493 | if (BB == L->getHeader()) |
494 | continue; |
495 | |
496 | SmallPtrSet<BasicBlock*, 4> BadPreds; |
497 | for (BasicBlock *P : predecessors(BB)) |
498 | if (!L->contains(BB: P)) |
499 | BadPreds.insert(Ptr: P); |
500 | |
501 | // Delete each unique out-of-loop (and thus dead) predecessor. |
502 | for (BasicBlock *P : BadPreds) { |
503 | |
504 | LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " |
505 | << P->getName() << "\n" ); |
506 | |
507 | // Zap the dead pred's terminator and replace it with unreachable. |
508 | Instruction *TI = P->getTerminator(); |
509 | changeToUnreachable(I: TI, PreserveLCSSA, |
510 | /*DTU=*/nullptr, MSSAU); |
511 | Changed = true; |
512 | } |
513 | } |
514 | |
515 | if (MSSAU && VerifyMemorySSA) |
516 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
517 | |
518 | // If there are exiting blocks with branches on undef, resolve the undef in |
519 | // the direction which will exit the loop. This will help simplify loop |
520 | // trip count computations. |
521 | SmallVector<BasicBlock*, 8> ExitingBlocks; |
522 | L->getExitingBlocks(ExitingBlocks); |
523 | for (BasicBlock *ExitingBlock : ExitingBlocks) |
524 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator())) |
525 | if (BI->isConditional()) { |
526 | if (UndefValue *Cond = dyn_cast<UndefValue>(Val: BI->getCondition())) { |
527 | |
528 | LLVM_DEBUG(dbgs() |
529 | << "LoopSimplify: Resolving \"br i1 undef\" to exit in " |
530 | << ExitingBlock->getName() << "\n" ); |
531 | |
532 | BI->setCondition(ConstantInt::get(Ty: Cond->getType(), |
533 | V: !L->contains(BB: BI->getSuccessor(i: 0)))); |
534 | |
535 | Changed = true; |
536 | } |
537 | } |
538 | |
539 | // Does the loop already have a preheader? If so, don't insert one. |
540 | BasicBlock * = L->getLoopPreheader(); |
541 | if (!Preheader) { |
542 | Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); |
543 | if (Preheader) |
544 | Changed = true; |
545 | } |
546 | |
547 | // Next, check to make sure that all exit nodes of the loop only have |
548 | // predecessors that are inside of the loop. This check guarantees that the |
549 | // loop preheader/header will dominate the exit blocks. If the exit block has |
550 | // predecessors from outside of the loop, split the edge now. |
551 | if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) |
552 | Changed = true; |
553 | |
554 | if (MSSAU && VerifyMemorySSA) |
555 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
556 | |
557 | // If the header has more than two predecessors at this point (from the |
558 | // preheader and from multiple backedges), we must adjust the loop. |
559 | BasicBlock *LoopLatch = L->getLoopLatch(); |
560 | if (!LoopLatch) { |
561 | // If this is really a nested loop, rip it out into a child loop. Don't do |
562 | // this for loops with a giant number of backedges, just factor them into a |
563 | // common backedge instead. |
564 | if (L->getNumBackEdges() < 8) { |
565 | if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, |
566 | PreserveLCSSA, AC, MSSAU)) { |
567 | ++NumNested; |
568 | // Enqueue the outer loop as it should be processed next in our |
569 | // depth-first nest walk. |
570 | Worklist.push_back(Elt: OuterL); |
571 | |
572 | // This is a big restructuring change, reprocess the whole loop. |
573 | Changed = true; |
574 | // GCC doesn't tail recursion eliminate this. |
575 | // FIXME: It isn't clear we can't rely on LLVM to TRE this. |
576 | goto ReprocessLoop; |
577 | } |
578 | } |
579 | |
580 | // If we either couldn't, or didn't want to, identify nesting of the loops, |
581 | // insert a new block that all backedges target, then make it jump to the |
582 | // loop header. |
583 | LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); |
584 | if (LoopLatch) |
585 | Changed = true; |
586 | } |
587 | |
588 | if (MSSAU && VerifyMemorySSA) |
589 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
590 | |
591 | const DataLayout &DL = L->getHeader()->getDataLayout(); |
592 | |
593 | // Scan over the PHI nodes in the loop header. Since they now have only two |
594 | // incoming values (the loop is canonicalized), we may have simplified the PHI |
595 | // down to 'X = phi [X, Y]', which should be replaced with 'Y'. |
596 | PHINode *PN; |
597 | for (BasicBlock::iterator I = L->getHeader()->begin(); |
598 | (PN = dyn_cast<PHINode>(Val: I++)); ) |
599 | if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) { |
600 | if (SE) SE->forgetValue(V: PN); |
601 | if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(From: PN, To: V)) { |
602 | PN->replaceAllUsesWith(V); |
603 | PN->eraseFromParent(); |
604 | Changed = true; |
605 | } |
606 | } |
607 | |
608 | // If this loop has multiple exits and the exits all go to the same |
609 | // block, attempt to merge the exits. This helps several passes, such |
610 | // as LoopRotation, which do not support loops with multiple exits. |
611 | // SimplifyCFG also does this (and this code uses the same utility |
612 | // function), however this code is loop-aware, where SimplifyCFG is |
613 | // not. That gives it the advantage of being able to hoist |
614 | // loop-invariant instructions out of the way to open up more |
615 | // opportunities, and the disadvantage of having the responsibility |
616 | // to preserve dominator information. |
617 | auto HasUniqueExitBlock = [&]() { |
618 | BasicBlock *UniqueExit = nullptr; |
619 | for (auto *ExitingBB : ExitingBlocks) |
620 | for (auto *SuccBB : successors(BB: ExitingBB)) { |
621 | if (L->contains(BB: SuccBB)) |
622 | continue; |
623 | |
624 | if (!UniqueExit) |
625 | UniqueExit = SuccBB; |
626 | else if (UniqueExit != SuccBB) |
627 | return false; |
628 | } |
629 | |
630 | return true; |
631 | }; |
632 | if (HasUniqueExitBlock()) { |
633 | for (BasicBlock *ExitingBlock : ExitingBlocks) { |
634 | if (!ExitingBlock->getSinglePredecessor()) continue; |
635 | BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator()); |
636 | if (!BI || !BI->isConditional()) continue; |
637 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
638 | if (!CI || CI->getParent() != ExitingBlock) continue; |
639 | |
640 | // Attempt to hoist out all instructions except for the |
641 | // comparison and the branch. |
642 | bool AllInvariant = true; |
643 | bool AnyInvariant = false; |
644 | for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { |
645 | Instruction *Inst = &*I++; |
646 | if (Inst == CI) |
647 | continue; |
648 | if (!L->makeLoopInvariant( |
649 | I: Inst, Changed&: AnyInvariant, |
650 | InsertPt: Preheader ? Preheader->getTerminator() : nullptr, MSSAU, SE)) { |
651 | AllInvariant = false; |
652 | break; |
653 | } |
654 | } |
655 | if (AnyInvariant) |
656 | Changed = true; |
657 | if (!AllInvariant) continue; |
658 | |
659 | // The block has now been cleared of all instructions except for |
660 | // a comparison and a conditional branch. SimplifyCFG may be able |
661 | // to fold it now. |
662 | if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) |
663 | continue; |
664 | |
665 | // Success. The block is now dead, so remove it from the loop, |
666 | // update the dominator tree and delete it. |
667 | LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " |
668 | << ExitingBlock->getName() << "\n" ); |
669 | |
670 | assert(pred_empty(ExitingBlock)); |
671 | Changed = true; |
672 | LI->removeBlock(BB: ExitingBlock); |
673 | |
674 | DomTreeNode *Node = DT->getNode(BB: ExitingBlock); |
675 | while (!Node->isLeaf()) { |
676 | DomTreeNode *Child = Node->back(); |
677 | DT->changeImmediateDominator(N: Child, NewIDom: Node->getIDom()); |
678 | } |
679 | DT->eraseNode(BB: ExitingBlock); |
680 | if (MSSAU) { |
681 | SmallSetVector<BasicBlock *, 8> ExitBlockSet; |
682 | ExitBlockSet.insert(X: ExitingBlock); |
683 | MSSAU->removeBlocks(DeadBlocks: ExitBlockSet); |
684 | } |
685 | |
686 | BI->getSuccessor(i: 0)->removePredecessor( |
687 | Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); |
688 | BI->getSuccessor(i: 1)->removePredecessor( |
689 | Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); |
690 | ExitingBlock->eraseFromParent(); |
691 | } |
692 | } |
693 | |
694 | if (MSSAU && VerifyMemorySSA) |
695 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
696 | |
697 | return Changed; |
698 | } |
699 | |
700 | bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, |
701 | ScalarEvolution *SE, AssumptionCache *AC, |
702 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { |
703 | bool Changed = false; |
704 | |
705 | #ifndef NDEBUG |
706 | // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA |
707 | // form. |
708 | if (PreserveLCSSA) { |
709 | assert(DT && "DT not available." ); |
710 | assert(LI && "LI not available." ); |
711 | assert(L->isRecursivelyLCSSAForm(*DT, *LI) && |
712 | "Requested to preserve LCSSA, but it's already broken." ); |
713 | } |
714 | #endif |
715 | |
716 | // Worklist maintains our depth-first queue of loops in this nest to process. |
717 | SmallVector<Loop *, 4> Worklist; |
718 | Worklist.push_back(Elt: L); |
719 | |
720 | // Walk the worklist from front to back, pushing newly found sub loops onto |
721 | // the back. This will let us process loops from back to front in depth-first |
722 | // order. We can use this simple process because loops form a tree. |
723 | for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { |
724 | Loop *L2 = Worklist[Idx]; |
725 | Worklist.append(in_start: L2->begin(), in_end: L2->end()); |
726 | } |
727 | |
728 | while (!Worklist.empty()) |
729 | Changed |= simplifyOneLoop(L: Worklist.pop_back_val(), Worklist, DT, LI, SE, |
730 | AC, MSSAU, PreserveLCSSA); |
731 | |
732 | // Changing exit conditions for blocks may affect exit counts of this loop and |
733 | // any of its parents, so we must invalidate the entire subtree if we've made |
734 | // any changes. Do this here rather than in simplifyOneLoop() as the top-most |
735 | // loop is going to be the same for all child loops. |
736 | if (Changed && SE) |
737 | SE->forgetTopmostLoop(L); |
738 | |
739 | return Changed; |
740 | } |
741 | |
742 | namespace { |
743 | struct LoopSimplify : public FunctionPass { |
744 | static char ID; // Pass identification, replacement for typeid |
745 | LoopSimplify() : FunctionPass(ID) { |
746 | initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); |
747 | } |
748 | |
749 | bool runOnFunction(Function &F) override; |
750 | |
751 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
752 | AU.addRequired<AssumptionCacheTracker>(); |
753 | |
754 | // We need loop information to identify the loops... |
755 | AU.addRequired<DominatorTreeWrapperPass>(); |
756 | AU.addPreserved<DominatorTreeWrapperPass>(); |
757 | |
758 | AU.addRequired<LoopInfoWrapperPass>(); |
759 | AU.addPreserved<LoopInfoWrapperPass>(); |
760 | |
761 | AU.addPreserved<BasicAAWrapperPass>(); |
762 | AU.addPreserved<AAResultsWrapperPass>(); |
763 | AU.addPreserved<GlobalsAAWrapperPass>(); |
764 | AU.addPreserved<ScalarEvolutionWrapperPass>(); |
765 | AU.addPreserved<SCEVAAWrapperPass>(); |
766 | AU.addPreservedID(ID&: LCSSAID); |
767 | AU.addPreserved<DependenceAnalysisWrapperPass>(); |
768 | AU.addPreservedID(ID&: BreakCriticalEdgesID); // No critical edges added. |
769 | AU.addPreserved<BranchProbabilityInfoWrapperPass>(); |
770 | AU.addPreserved<MemorySSAWrapperPass>(); |
771 | } |
772 | |
773 | /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. |
774 | void verifyAnalysis() const override; |
775 | }; |
776 | } |
777 | |
778 | char LoopSimplify::ID = 0; |
779 | INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify" , |
780 | "Canonicalize natural loops" , false, false) |
781 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
782 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
783 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
784 | INITIALIZE_PASS_END(LoopSimplify, "loop-simplify" , |
785 | "Canonicalize natural loops" , false, false) |
786 | |
787 | // Publicly exposed interface to pass... |
788 | char &llvm::LoopSimplifyID = LoopSimplify::ID; |
789 | Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } |
790 | |
791 | /// runOnFunction - Run down all loops in the CFG (recursively, but we could do |
792 | /// it in any convenient order) inserting preheaders... |
793 | /// |
794 | bool LoopSimplify::runOnFunction(Function &F) { |
795 | bool Changed = false; |
796 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
797 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
798 | auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); |
799 | ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; |
800 | AssumptionCache *AC = |
801 | &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
802 | MemorySSA *MSSA = nullptr; |
803 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
804 | auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); |
805 | if (MSSAAnalysis) { |
806 | MSSA = &MSSAAnalysis->getMSSA(); |
807 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA); |
808 | } |
809 | |
810 | bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID); |
811 | |
812 | // Simplify each loop nest in the function. |
813 | for (auto *L : *LI) |
814 | Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), PreserveLCSSA); |
815 | |
816 | #ifndef NDEBUG |
817 | if (PreserveLCSSA) { |
818 | bool InLCSSA = all_of( |
819 | *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); |
820 | assert(InLCSSA && "LCSSA is broken after loop-simplify." ); |
821 | } |
822 | #endif |
823 | return Changed; |
824 | } |
825 | |
826 | PreservedAnalyses LoopSimplifyPass::run(Function &F, |
827 | FunctionAnalysisManager &AM) { |
828 | bool Changed = false; |
829 | LoopInfo *LI = &AM.getResult<LoopAnalysis>(IR&: F); |
830 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
831 | ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(IR&: F); |
832 | AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(IR&: F); |
833 | auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
834 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
835 | if (MSSAAnalysis) { |
836 | auto *MSSA = &MSSAAnalysis->getMSSA(); |
837 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA); |
838 | } |
839 | |
840 | |
841 | // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA |
842 | // after simplifying the loops. MemorySSA is preserved if it exists. |
843 | for (auto *L : *LI) |
844 | Changed |= |
845 | simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), /*PreserveLCSSA*/ false); |
846 | |
847 | if (!Changed) |
848 | return PreservedAnalyses::all(); |
849 | |
850 | PreservedAnalyses PA; |
851 | PA.preserve<DominatorTreeAnalysis>(); |
852 | PA.preserve<LoopAnalysis>(); |
853 | PA.preserve<ScalarEvolutionAnalysis>(); |
854 | PA.preserve<DependenceAnalysis>(); |
855 | if (MSSAAnalysis) |
856 | PA.preserve<MemorySSAAnalysis>(); |
857 | // BPI maps conditional terminators to probabilities, LoopSimplify can insert |
858 | // blocks, but it does so only by splitting existing blocks and edges. This |
859 | // results in the interesting property that all new terminators inserted are |
860 | // unconditional branches which do not appear in BPI. All deletions are |
861 | // handled via ValueHandle callbacks w/in BPI. |
862 | PA.preserve<BranchProbabilityAnalysis>(); |
863 | return PA; |
864 | } |
865 | |
866 | // FIXME: Restore this code when we re-enable verification in verifyAnalysis |
867 | // below. |
868 | #if 0 |
869 | static void verifyLoop(Loop *L) { |
870 | // Verify subloops. |
871 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
872 | verifyLoop(*I); |
873 | |
874 | // It used to be possible to just assert L->isLoopSimplifyForm(), however |
875 | // with the introduction of indirectbr, there are now cases where it's |
876 | // not possible to transform a loop as necessary. We can at least check |
877 | // that there is an indirectbr near any time there's trouble. |
878 | |
879 | // Indirectbr can interfere with preheader and unique backedge insertion. |
880 | if (!L->getLoopPreheader() || !L->getLoopLatch()) { |
881 | bool HasIndBrPred = false; |
882 | for (BasicBlock *Pred : predecessors(L->getHeader())) |
883 | if (isa<IndirectBrInst>(Pred->getTerminator())) { |
884 | HasIndBrPred = true; |
885 | break; |
886 | } |
887 | assert(HasIndBrPred && |
888 | "LoopSimplify has no excuse for missing loop header info!" ); |
889 | (void)HasIndBrPred; |
890 | } |
891 | |
892 | // Indirectbr can interfere with exit block canonicalization. |
893 | if (!L->hasDedicatedExits()) { |
894 | bool HasIndBrExiting = false; |
895 | SmallVector<BasicBlock*, 8> ExitingBlocks; |
896 | L->getExitingBlocks(ExitingBlocks); |
897 | for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { |
898 | if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { |
899 | HasIndBrExiting = true; |
900 | break; |
901 | } |
902 | } |
903 | |
904 | assert(HasIndBrExiting && |
905 | "LoopSimplify has no excuse for missing exit block info!" ); |
906 | (void)HasIndBrExiting; |
907 | } |
908 | } |
909 | #endif |
910 | |
911 | void LoopSimplify::verifyAnalysis() const { |
912 | // FIXME: This routine is being called mid-way through the loop pass manager |
913 | // as loop passes destroy this analysis. That's actually fine, but we have no |
914 | // way of expressing that here. Once all of the passes that destroy this are |
915 | // hoisted out of the loop pass manager we can add back verification here. |
916 | #if 0 |
917 | for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) |
918 | verifyLoop(*I); |
919 | #endif |
920 | } |
921 | |