1 | //===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass performs several transformations to transform natural loops into a |
10 | // simpler form, which makes subsequent analyses and transformations simpler and |
11 | // more effective. |
12 | // |
13 | // Loop pre-header insertion guarantees that there is a single, non-critical |
14 | // entry edge from outside of the loop to the loop header. This simplifies a |
15 | // number of analyses and transformations, such as LICM. |
16 | // |
17 | // Loop exit-block insertion guarantees that all exit blocks from the loop |
18 | // (blocks which are outside of the loop that have predecessors inside of the |
19 | // loop) only have predecessors from inside of the loop (and are thus dominated |
20 | // by the loop header). This simplifies transformations such as store-sinking |
21 | // that are built into LICM. |
22 | // |
23 | // This pass also guarantees that loops will have exactly one backedge. |
24 | // |
25 | // Indirectbr instructions introduce several complications. If the loop |
26 | // contains or is entered by an indirectbr instruction, it may not be possible |
27 | // to transform the loop and make these guarantees. Client code should check |
28 | // that these conditions are true before relying on them. |
29 | // |
30 | // Similar complications arise from callbr instructions, particularly in |
31 | // asm-goto where blockaddress expressions are used. |
32 | // |
33 | // Note that the simplifycfg pass will clean up blocks which are split out but |
34 | // end up being unnecessary, so usage of this pass should not pessimize |
35 | // generated code. |
36 | // |
37 | // This pass obviously modifies the CFG, but updates loop information and |
38 | // dominator information. |
39 | // |
40 | //===----------------------------------------------------------------------===// |
41 | |
42 | #include "llvm/Transforms/Utils/LoopSimplify.h" |
43 | #include "llvm/ADT/SetVector.h" |
44 | #include "llvm/ADT/SmallVector.h" |
45 | #include "llvm/ADT/Statistic.h" |
46 | #include "llvm/Analysis/AliasAnalysis.h" |
47 | #include "llvm/Analysis/AssumptionCache.h" |
48 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
49 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
50 | #include "llvm/Analysis/GlobalsModRef.h" |
51 | #include "llvm/Analysis/InstructionSimplify.h" |
52 | #include "llvm/Analysis/LoopInfo.h" |
53 | #include "llvm/Analysis/MemorySSA.h" |
54 | #include "llvm/Analysis/MemorySSAUpdater.h" |
55 | #include "llvm/Analysis/ScalarEvolution.h" |
56 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" |
57 | #include "llvm/IR/CFG.h" |
58 | #include "llvm/IR/Constants.h" |
59 | #include "llvm/IR/Dominators.h" |
60 | #include "llvm/IR/Function.h" |
61 | #include "llvm/IR/Instructions.h" |
62 | #include "llvm/IR/LLVMContext.h" |
63 | #include "llvm/IR/Module.h" |
64 | #include "llvm/InitializePasses.h" |
65 | #include "llvm/Support/Debug.h" |
66 | #include "llvm/Support/raw_ostream.h" |
67 | #include "llvm/Transforms/Utils.h" |
68 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
69 | #include "llvm/Transforms/Utils/Local.h" |
70 | #include "llvm/Transforms/Utils/LoopUtils.h" |
71 | using namespace llvm; |
72 | |
73 | #define DEBUG_TYPE "loop-simplify" |
74 | |
75 | STATISTIC(NumNested , "Number of nested loops split out" ); |
76 | |
77 | // If the block isn't already, move the new block to right after some 'outside |
78 | // block' block. This prevents the preheader from being placed inside the loop |
79 | // body, e.g. when the loop hasn't been rotated. |
80 | static void placeSplitBlockCarefully(BasicBlock *NewBB, |
81 | SmallVectorImpl<BasicBlock *> &SplitPreds, |
82 | Loop *L) { |
83 | // Check to see if NewBB is already well placed. |
84 | Function::iterator BBI = --NewBB->getIterator(); |
85 | if (llvm::is_contained(Range&: SplitPreds, Element: &*BBI)) |
86 | return; |
87 | |
88 | // If it isn't already after an outside block, move it after one. This is |
89 | // always good as it makes the uncond branch from the outside block into a |
90 | // fall-through. |
91 | |
92 | // Figure out *which* outside block to put this after. Prefer an outside |
93 | // block that neighbors a BB actually in the loop. |
94 | BasicBlock *FoundBB = nullptr; |
95 | for (BasicBlock *Pred : SplitPreds) { |
96 | Function::iterator BBI = Pred->getIterator(); |
97 | if (++BBI != NewBB->getParent()->end() && L->contains(BB: &*BBI)) { |
98 | FoundBB = Pred; |
99 | break; |
100 | } |
101 | } |
102 | |
103 | // If our heuristic for a *good* bb to place this after doesn't find |
104 | // anything, just pick something. It's likely better than leaving it within |
105 | // the loop. |
106 | if (!FoundBB) |
107 | FoundBB = SplitPreds[0]; |
108 | NewBB->moveAfter(MovePos: FoundBB); |
109 | } |
110 | |
111 | /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a |
112 | /// preheader, this method is called to insert one. This method has two phases: |
113 | /// preheader insertion and analysis updating. |
114 | /// |
115 | BasicBlock *llvm::(Loop *L, DominatorTree *DT, |
116 | LoopInfo *LI, MemorySSAUpdater *MSSAU, |
117 | bool PreserveLCSSA) { |
118 | BasicBlock * = L->getHeader(); |
119 | |
120 | // Compute the set of predecessors of the loop that are not in the loop. |
121 | SmallVector<BasicBlock*, 8> OutsideBlocks; |
122 | for (BasicBlock *P : predecessors(BB: Header)) { |
123 | if (!L->contains(BB: P)) { // Coming in from outside the loop? |
124 | // If the loop is branched to from an indirect terminator, we won't |
125 | // be able to fully transform the loop, because it prohibits |
126 | // edge splitting. |
127 | if (isa<IndirectBrInst>(Val: P->getTerminator())) |
128 | return nullptr; |
129 | |
130 | // Keep track of it. |
131 | OutsideBlocks.push_back(Elt: P); |
132 | } |
133 | } |
134 | |
135 | // Split out the loop pre-header. |
136 | BasicBlock *; |
137 | PreheaderBB = SplitBlockPredecessors(BB: Header, Preds: OutsideBlocks, Suffix: ".preheader" , DT, |
138 | LI, MSSAU, PreserveLCSSA); |
139 | if (!PreheaderBB) |
140 | return nullptr; |
141 | |
142 | LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " |
143 | << PreheaderBB->getName() << "\n" ); |
144 | |
145 | // Make sure that NewBB is put someplace intelligent, which doesn't mess up |
146 | // code layout too horribly. |
147 | placeSplitBlockCarefully(NewBB: PreheaderBB, SplitPreds&: OutsideBlocks, L); |
148 | |
149 | return PreheaderBB; |
150 | } |
151 | |
152 | /// Add the specified block, and all of its predecessors, to the specified set, |
153 | /// if it's not already in there. Stop predecessor traversal when we reach |
154 | /// StopBlock. |
155 | static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, |
156 | SmallPtrSetImpl<BasicBlock *> &Blocks) { |
157 | SmallVector<BasicBlock *, 8> Worklist; |
158 | Worklist.push_back(Elt: InputBB); |
159 | do { |
160 | BasicBlock *BB = Worklist.pop_back_val(); |
161 | if (Blocks.insert(Ptr: BB).second && BB != StopBlock) |
162 | // If BB is not already processed and it is not a stop block then |
163 | // insert its predecessor in the work list |
164 | append_range(C&: Worklist, R: predecessors(BB)); |
165 | } while (!Worklist.empty()); |
166 | } |
167 | |
168 | /// The first part of loop-nestification is to find a PHI node that tells |
169 | /// us how to partition the loops. |
170 | static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, |
171 | AssumptionCache *AC) { |
172 | const DataLayout &DL = L->getHeader()->getDataLayout(); |
173 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(Val: I); ) { |
174 | PHINode *PN = cast<PHINode>(Val&: I); |
175 | ++I; |
176 | if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) { |
177 | // This is a degenerate PHI already, don't modify it! |
178 | PN->replaceAllUsesWith(V); |
179 | PN->eraseFromParent(); |
180 | continue; |
181 | } |
182 | |
183 | // Scan this PHI node looking for a use of the PHI node by itself. |
184 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
185 | if (PN->getIncomingValue(i) == PN && |
186 | L->contains(BB: PN->getIncomingBlock(i))) |
187 | // We found something tasty to remove. |
188 | return PN; |
189 | } |
190 | return nullptr; |
191 | } |
192 | |
193 | /// If this loop has multiple backedges, try to pull one of them out into |
194 | /// a nested loop. |
195 | /// |
196 | /// This is important for code that looks like |
197 | /// this: |
198 | /// |
199 | /// Loop: |
200 | /// ... |
201 | /// br cond, Loop, Next |
202 | /// ... |
203 | /// br cond2, Loop, Out |
204 | /// |
205 | /// To identify this common case, we look at the PHI nodes in the header of the |
206 | /// loop. PHI nodes with unchanging values on one backedge correspond to values |
207 | /// that change in the "outer" loop, but not in the "inner" loop. |
208 | /// |
209 | /// If we are able to separate out a loop, return the new outer loop that was |
210 | /// created. |
211 | /// |
212 | static Loop *separateNestedLoop(Loop *L, BasicBlock *, |
213 | DominatorTree *DT, LoopInfo *LI, |
214 | ScalarEvolution *SE, bool PreserveLCSSA, |
215 | AssumptionCache *AC, MemorySSAUpdater *MSSAU) { |
216 | // Don't try to separate loops without a preheader. |
217 | if (!Preheader) |
218 | return nullptr; |
219 | |
220 | // Treat the presence of convergent functions conservatively. The |
221 | // transformation is invalid if calls to certain convergent |
222 | // functions (like an AMDGPU barrier) get included in the resulting |
223 | // inner loop. But blocks meant for the inner loop will be |
224 | // identified later at a point where it's too late to abort the |
225 | // transformation. Also, the convergent attribute is not really |
226 | // sufficient to express the semantics of functions that are |
227 | // affected by this transformation. So we choose to back off if such |
228 | // a function call is present until a better alternative becomes |
229 | // available. This is similar to the conservative treatment of |
230 | // convergent function calls in GVNHoist and JumpThreading. |
231 | for (auto *BB : L->blocks()) { |
232 | for (auto &II : *BB) { |
233 | if (auto CI = dyn_cast<CallBase>(Val: &II)) { |
234 | if (CI->isConvergent()) { |
235 | return nullptr; |
236 | } |
237 | } |
238 | } |
239 | } |
240 | |
241 | // The header is not a landing pad; preheader insertion should ensure this. |
242 | BasicBlock * = L->getHeader(); |
243 | assert(!Header->isEHPad() && "Can't insert backedge to EH pad" ); |
244 | |
245 | PHINode *PN = findPHIToPartitionLoops(L, DT, AC); |
246 | if (!PN) return nullptr; // No known way to partition. |
247 | |
248 | // Pull out all predecessors that have varying values in the loop. This |
249 | // handles the case when a PHI node has multiple instances of itself as |
250 | // arguments. |
251 | SmallVector<BasicBlock*, 8> OuterLoopPreds; |
252 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
253 | if (PN->getIncomingValue(i) != PN || |
254 | !L->contains(BB: PN->getIncomingBlock(i))) { |
255 | // We can't split indirect control flow edges. |
256 | if (isa<IndirectBrInst>(Val: PN->getIncomingBlock(i)->getTerminator())) |
257 | return nullptr; |
258 | OuterLoopPreds.push_back(Elt: PN->getIncomingBlock(i)); |
259 | } |
260 | } |
261 | LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n" ); |
262 | |
263 | // If ScalarEvolution is around and knows anything about values in |
264 | // this loop, tell it to forget them, because we're about to |
265 | // substantially change it. |
266 | if (SE) |
267 | SE->forgetLoop(L); |
268 | |
269 | BasicBlock *NewBB = SplitBlockPredecessors(BB: Header, Preds: OuterLoopPreds, Suffix: ".outer" , |
270 | DT, LI, MSSAU, PreserveLCSSA); |
271 | |
272 | // Make sure that NewBB is put someplace intelligent, which doesn't mess up |
273 | // code layout too horribly. |
274 | placeSplitBlockCarefully(NewBB, SplitPreds&: OuterLoopPreds, L); |
275 | |
276 | // Create the new outer loop. |
277 | Loop *NewOuter = LI->AllocateLoop(); |
278 | |
279 | // Change the parent loop to use the outer loop as its child now. |
280 | if (Loop *Parent = L->getParentLoop()) |
281 | Parent->replaceChildLoopWith(OldChild: L, NewChild: NewOuter); |
282 | else |
283 | LI->changeTopLevelLoop(OldLoop: L, NewLoop: NewOuter); |
284 | |
285 | // L is now a subloop of our outer loop. |
286 | NewOuter->addChildLoop(NewChild: L); |
287 | |
288 | for (BasicBlock *BB : L->blocks()) |
289 | NewOuter->addBlockEntry(BB); |
290 | |
291 | // Now reset the header in L, which had been moved by |
292 | // SplitBlockPredecessors for the outer loop. |
293 | L->moveToHeader(BB: Header); |
294 | |
295 | // Determine which blocks should stay in L and which should be moved out to |
296 | // the Outer loop now. |
297 | SmallPtrSet<BasicBlock *, 4> BlocksInL; |
298 | for (BasicBlock *P : predecessors(BB: Header)) { |
299 | if (DT->dominates(A: Header, B: P)) |
300 | addBlockAndPredsToSet(InputBB: P, StopBlock: Header, Blocks&: BlocksInL); |
301 | } |
302 | |
303 | // Scan all of the loop children of L, moving them to OuterLoop if they are |
304 | // not part of the inner loop. |
305 | const std::vector<Loop*> &SubLoops = L->getSubLoops(); |
306 | for (size_t I = 0; I != SubLoops.size(); ) |
307 | if (BlocksInL.count(Ptr: SubLoops[I]->getHeader())) |
308 | ++I; // Loop remains in L |
309 | else |
310 | NewOuter->addChildLoop(NewChild: L->removeChildLoop(I: SubLoops.begin() + I)); |
311 | |
312 | SmallVector<BasicBlock *, 8> OuterLoopBlocks; |
313 | OuterLoopBlocks.push_back(Elt: NewBB); |
314 | // Now that we know which blocks are in L and which need to be moved to |
315 | // OuterLoop, move any blocks that need it. |
316 | for (unsigned i = 0; i != L->getBlocks().size(); ++i) { |
317 | BasicBlock *BB = L->getBlocks()[i]; |
318 | if (!BlocksInL.count(Ptr: BB)) { |
319 | // Move this block to the parent, updating the exit blocks sets |
320 | L->removeBlockFromLoop(BB); |
321 | if ((*LI)[BB] == L) { |
322 | LI->changeLoopFor(BB, L: NewOuter); |
323 | OuterLoopBlocks.push_back(Elt: BB); |
324 | } |
325 | --i; |
326 | } |
327 | } |
328 | |
329 | // Split edges to exit blocks from the inner loop, if they emerged in the |
330 | // process of separating the outer one. |
331 | formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); |
332 | |
333 | if (PreserveLCSSA) { |
334 | // Fix LCSSA form for L. Some values, which previously were only used inside |
335 | // L, can now be used in NewOuter loop. We need to insert phi-nodes for them |
336 | // in corresponding exit blocks. |
337 | // We don't need to form LCSSA recursively, because there cannot be uses |
338 | // inside a newly created loop of defs from inner loops as those would |
339 | // already be a use of an LCSSA phi node. |
340 | formLCSSA(L&: *L, DT: *DT, LI, SE); |
341 | |
342 | assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && |
343 | "LCSSA is broken after separating nested loops!" ); |
344 | } |
345 | |
346 | return NewOuter; |
347 | } |
348 | |
349 | /// This method is called when the specified loop has more than one |
350 | /// backedge in it. |
351 | /// |
352 | /// If this occurs, revector all of these backedges to target a new basic block |
353 | /// and have that block branch to the loop header. This ensures that loops |
354 | /// have exactly one backedge. |
355 | static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *, |
356 | DominatorTree *DT, LoopInfo *LI, |
357 | MemorySSAUpdater *MSSAU) { |
358 | assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!" ); |
359 | |
360 | // Get information about the loop |
361 | BasicBlock * = L->getHeader(); |
362 | Function *F = Header->getParent(); |
363 | |
364 | // Unique backedge insertion currently depends on having a preheader. |
365 | if (!Preheader) |
366 | return nullptr; |
367 | |
368 | // The header is not an EH pad; preheader insertion should ensure this. |
369 | assert(!Header->isEHPad() && "Can't insert backedge to EH pad" ); |
370 | |
371 | // Figure out which basic blocks contain back-edges to the loop header. |
372 | std::vector<BasicBlock*> BackedgeBlocks; |
373 | for (BasicBlock *P : predecessors(BB: Header)) { |
374 | // Indirect edges cannot be split, so we must fail if we find one. |
375 | if (isa<IndirectBrInst>(Val: P->getTerminator())) |
376 | return nullptr; |
377 | |
378 | if (P != Preheader) BackedgeBlocks.push_back(x: P); |
379 | } |
380 | |
381 | // Create and insert the new backedge block... |
382 | BasicBlock *BEBlock = BasicBlock::Create(Context&: Header->getContext(), |
383 | Name: Header->getName() + ".backedge" , Parent: F); |
384 | BranchInst *BETerminator = BranchInst::Create(IfTrue: Header, InsertBefore: BEBlock); |
385 | BETerminator->setDebugLoc(Header->getFirstNonPHIIt()->getDebugLoc()); |
386 | |
387 | LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " |
388 | << BEBlock->getName() << "\n" ); |
389 | |
390 | // Move the new backedge block to right after the last backedge block. |
391 | Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); |
392 | F->splice(ToIt: InsertPos, FromF: F, FromIt: BEBlock->getIterator()); |
393 | |
394 | // Now that the block has been inserted into the function, create PHI nodes in |
395 | // the backedge block which correspond to any PHI nodes in the header block. |
396 | for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(Val: I); ++I) { |
397 | PHINode *PN = cast<PHINode>(Val&: I); |
398 | PHINode *NewPN = PHINode::Create(Ty: PN->getType(), NumReservedValues: BackedgeBlocks.size(), |
399 | NameStr: PN->getName()+".be" , InsertBefore: BETerminator->getIterator()); |
400 | |
401 | // Loop over the PHI node, moving all entries except the one for the |
402 | // preheader over to the new PHI node. |
403 | unsigned = ~0U; |
404 | bool HasUniqueIncomingValue = true; |
405 | Value *UniqueValue = nullptr; |
406 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { |
407 | BasicBlock *IBB = PN->getIncomingBlock(i); |
408 | Value *IV = PN->getIncomingValue(i); |
409 | if (IBB == Preheader) { |
410 | PreheaderIdx = i; |
411 | } else { |
412 | NewPN->addIncoming(V: IV, BB: IBB); |
413 | if (HasUniqueIncomingValue) { |
414 | if (!UniqueValue) |
415 | UniqueValue = IV; |
416 | else if (UniqueValue != IV) |
417 | HasUniqueIncomingValue = false; |
418 | } |
419 | } |
420 | } |
421 | |
422 | // Delete all of the incoming values from the old PN except the preheader's |
423 | assert(PreheaderIdx != ~0U && "PHI has no preheader entry??" ); |
424 | if (PreheaderIdx != 0) { |
425 | PN->setIncomingValue(i: 0, V: PN->getIncomingValue(i: PreheaderIdx)); |
426 | PN->setIncomingBlock(i: 0, BB: PN->getIncomingBlock(i: PreheaderIdx)); |
427 | } |
428 | // Nuke all entries except the zero'th. |
429 | PN->removeIncomingValueIf(Predicate: [](unsigned Idx) { return Idx != 0; }, |
430 | /* DeletePHIIfEmpty */ false); |
431 | |
432 | // Finally, add the newly constructed PHI node as the entry for the BEBlock. |
433 | PN->addIncoming(V: NewPN, BB: BEBlock); |
434 | |
435 | // As an optimization, if all incoming values in the new PhiNode (which is a |
436 | // subset of the incoming values of the old PHI node) have the same value, |
437 | // eliminate the PHI Node. |
438 | if (HasUniqueIncomingValue) { |
439 | NewPN->replaceAllUsesWith(V: UniqueValue); |
440 | NewPN->eraseFromParent(); |
441 | } |
442 | } |
443 | |
444 | // Now that all of the PHI nodes have been inserted and adjusted, modify the |
445 | // backedge blocks to jump to the BEBlock instead of the header. |
446 | // If one of the backedges has llvm.loop metadata attached, we remove |
447 | // it from the backedge and add it to BEBlock. |
448 | MDNode *LoopMD = nullptr; |
449 | for (BasicBlock *BB : BackedgeBlocks) { |
450 | Instruction *TI = BB->getTerminator(); |
451 | if (!LoopMD) |
452 | LoopMD = TI->getMetadata(KindID: LLVMContext::MD_loop); |
453 | TI->setMetadata(KindID: LLVMContext::MD_loop, Node: nullptr); |
454 | TI->replaceSuccessorWith(OldBB: Header, NewBB: BEBlock); |
455 | } |
456 | BEBlock->getTerminator()->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD); |
457 | |
458 | //===--- Update all analyses which we must preserve now -----------------===// |
459 | |
460 | // Update Loop Information - we know that this block is now in the current |
461 | // loop and all parent loops. |
462 | L->addBasicBlockToLoop(NewBB: BEBlock, LI&: *LI); |
463 | |
464 | // Update dominator information |
465 | DT->splitBlock(NewBB: BEBlock); |
466 | |
467 | if (MSSAU) |
468 | MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(LoopHeader: Header, LoopPreheader: Preheader, |
469 | BackedgeBlock: BEBlock); |
470 | |
471 | return BEBlock; |
472 | } |
473 | |
474 | /// Simplify one loop and queue further loops for simplification. |
475 | static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, |
476 | DominatorTree *DT, LoopInfo *LI, |
477 | ScalarEvolution *SE, AssumptionCache *AC, |
478 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { |
479 | bool Changed = false; |
480 | if (MSSAU && VerifyMemorySSA) |
481 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
482 | |
483 | ReprocessLoop: |
484 | |
485 | // Check to see that no blocks (other than the header) in this loop have |
486 | // predecessors that are not in the loop. This is not valid for natural |
487 | // loops, but can occur if the blocks are unreachable. Since they are |
488 | // unreachable we can just shamelessly delete those CFG edges! |
489 | for (BasicBlock *BB : L->blocks()) { |
490 | if (BB == L->getHeader()) |
491 | continue; |
492 | |
493 | SmallPtrSet<BasicBlock*, 4> BadPreds; |
494 | for (BasicBlock *P : predecessors(BB)) |
495 | if (!L->contains(BB: P)) |
496 | BadPreds.insert(Ptr: P); |
497 | |
498 | // Delete each unique out-of-loop (and thus dead) predecessor. |
499 | for (BasicBlock *P : BadPreds) { |
500 | |
501 | LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " |
502 | << P->getName() << "\n" ); |
503 | |
504 | // Zap the dead pred's terminator and replace it with unreachable. |
505 | Instruction *TI = P->getTerminator(); |
506 | changeToUnreachable(I: TI, PreserveLCSSA, |
507 | /*DTU=*/nullptr, MSSAU); |
508 | Changed = true; |
509 | } |
510 | } |
511 | |
512 | if (MSSAU && VerifyMemorySSA) |
513 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
514 | |
515 | // If there are exiting blocks with branches on undef, resolve the undef in |
516 | // the direction which will exit the loop. This will help simplify loop |
517 | // trip count computations. |
518 | SmallVector<BasicBlock*, 8> ExitingBlocks; |
519 | L->getExitingBlocks(ExitingBlocks); |
520 | for (BasicBlock *ExitingBlock : ExitingBlocks) |
521 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator())) |
522 | if (BI->isConditional()) { |
523 | if (UndefValue *Cond = dyn_cast<UndefValue>(Val: BI->getCondition())) { |
524 | |
525 | LLVM_DEBUG(dbgs() |
526 | << "LoopSimplify: Resolving \"br i1 undef\" to exit in " |
527 | << ExitingBlock->getName() << "\n" ); |
528 | |
529 | BI->setCondition(ConstantInt::get(Ty: Cond->getType(), |
530 | V: !L->contains(BB: BI->getSuccessor(i: 0)))); |
531 | |
532 | Changed = true; |
533 | } |
534 | } |
535 | |
536 | // Does the loop already have a preheader? If so, don't insert one. |
537 | BasicBlock * = L->getLoopPreheader(); |
538 | if (!Preheader) { |
539 | Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); |
540 | if (Preheader) |
541 | Changed = true; |
542 | } |
543 | |
544 | // Next, check to make sure that all exit nodes of the loop only have |
545 | // predecessors that are inside of the loop. This check guarantees that the |
546 | // loop preheader/header will dominate the exit blocks. If the exit block has |
547 | // predecessors from outside of the loop, split the edge now. |
548 | if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) |
549 | Changed = true; |
550 | |
551 | if (MSSAU && VerifyMemorySSA) |
552 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
553 | |
554 | // If the header has more than two predecessors at this point (from the |
555 | // preheader and from multiple backedges), we must adjust the loop. |
556 | BasicBlock *LoopLatch = L->getLoopLatch(); |
557 | if (!LoopLatch) { |
558 | // If this is really a nested loop, rip it out into a child loop. Don't do |
559 | // this for loops with a giant number of backedges, just factor them into a |
560 | // common backedge instead. |
561 | if (L->getNumBackEdges() < 8) { |
562 | if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, |
563 | PreserveLCSSA, AC, MSSAU)) { |
564 | ++NumNested; |
565 | // Enqueue the outer loop as it should be processed next in our |
566 | // depth-first nest walk. |
567 | Worklist.push_back(Elt: OuterL); |
568 | |
569 | // This is a big restructuring change, reprocess the whole loop. |
570 | Changed = true; |
571 | // GCC doesn't tail recursion eliminate this. |
572 | // FIXME: It isn't clear we can't rely on LLVM to TRE this. |
573 | goto ReprocessLoop; |
574 | } |
575 | } |
576 | |
577 | // If we either couldn't, or didn't want to, identify nesting of the loops, |
578 | // insert a new block that all backedges target, then make it jump to the |
579 | // loop header. |
580 | LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); |
581 | if (LoopLatch) |
582 | Changed = true; |
583 | } |
584 | |
585 | if (MSSAU && VerifyMemorySSA) |
586 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
587 | |
588 | const DataLayout &DL = L->getHeader()->getDataLayout(); |
589 | |
590 | // Scan over the PHI nodes in the loop header. Since they now have only two |
591 | // incoming values (the loop is canonicalized), we may have simplified the PHI |
592 | // down to 'X = phi [X, Y]', which should be replaced with 'Y'. |
593 | PHINode *PN; |
594 | for (BasicBlock::iterator I = L->getHeader()->begin(); |
595 | (PN = dyn_cast<PHINode>(Val: I++)); ) |
596 | if (Value *V = simplifyInstruction(I: PN, Q: {DL, nullptr, DT, AC})) { |
597 | if (SE) SE->forgetValue(V: PN); |
598 | if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(From: PN, To: V)) { |
599 | PN->replaceAllUsesWith(V); |
600 | PN->eraseFromParent(); |
601 | Changed = true; |
602 | } |
603 | } |
604 | |
605 | // If this loop has multiple exits and the exits all go to the same |
606 | // block, attempt to merge the exits. This helps several passes, such |
607 | // as LoopRotation, which do not support loops with multiple exits. |
608 | // SimplifyCFG also does this (and this code uses the same utility |
609 | // function), however this code is loop-aware, where SimplifyCFG is |
610 | // not. That gives it the advantage of being able to hoist |
611 | // loop-invariant instructions out of the way to open up more |
612 | // opportunities, and the disadvantage of having the responsibility |
613 | // to preserve dominator information. |
614 | auto HasUniqueExitBlock = [&]() { |
615 | BasicBlock *UniqueExit = nullptr; |
616 | for (auto *ExitingBB : ExitingBlocks) |
617 | for (auto *SuccBB : successors(BB: ExitingBB)) { |
618 | if (L->contains(BB: SuccBB)) |
619 | continue; |
620 | |
621 | if (!UniqueExit) |
622 | UniqueExit = SuccBB; |
623 | else if (UniqueExit != SuccBB) |
624 | return false; |
625 | } |
626 | |
627 | return true; |
628 | }; |
629 | if (HasUniqueExitBlock()) { |
630 | for (BasicBlock *ExitingBlock : ExitingBlocks) { |
631 | if (!ExitingBlock->getSinglePredecessor()) continue; |
632 | BranchInst *BI = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator()); |
633 | if (!BI || !BI->isConditional()) continue; |
634 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
635 | if (!CI || CI->getParent() != ExitingBlock) continue; |
636 | |
637 | // Attempt to hoist out all instructions except for the |
638 | // comparison and the branch. |
639 | bool AllInvariant = true; |
640 | bool AnyInvariant = false; |
641 | for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { |
642 | Instruction *Inst = &*I++; |
643 | if (Inst == CI) |
644 | continue; |
645 | if (!L->makeLoopInvariant( |
646 | I: Inst, Changed&: AnyInvariant, |
647 | InsertPt: Preheader ? Preheader->getTerminator() : nullptr, MSSAU, SE)) { |
648 | AllInvariant = false; |
649 | break; |
650 | } |
651 | } |
652 | if (AnyInvariant) |
653 | Changed = true; |
654 | if (!AllInvariant) continue; |
655 | |
656 | // The block has now been cleared of all instructions except for |
657 | // a comparison and a conditional branch. SimplifyCFG may be able |
658 | // to fold it now. |
659 | if (!foldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) |
660 | continue; |
661 | |
662 | // Success. The block is now dead, so remove it from the loop, |
663 | // update the dominator tree and delete it. |
664 | LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " |
665 | << ExitingBlock->getName() << "\n" ); |
666 | |
667 | assert(pred_empty(ExitingBlock)); |
668 | Changed = true; |
669 | LI->removeBlock(BB: ExitingBlock); |
670 | |
671 | DomTreeNode *Node = DT->getNode(BB: ExitingBlock); |
672 | while (!Node->isLeaf()) { |
673 | DomTreeNode *Child = Node->back(); |
674 | DT->changeImmediateDominator(N: Child, NewIDom: Node->getIDom()); |
675 | } |
676 | DT->eraseNode(BB: ExitingBlock); |
677 | if (MSSAU) { |
678 | SmallSetVector<BasicBlock *, 8> ExitBlockSet; |
679 | ExitBlockSet.insert(X: ExitingBlock); |
680 | MSSAU->removeBlocks(DeadBlocks: ExitBlockSet); |
681 | } |
682 | |
683 | BI->getSuccessor(i: 0)->removePredecessor( |
684 | Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); |
685 | BI->getSuccessor(i: 1)->removePredecessor( |
686 | Pred: ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); |
687 | ExitingBlock->eraseFromParent(); |
688 | } |
689 | } |
690 | |
691 | if (MSSAU && VerifyMemorySSA) |
692 | MSSAU->getMemorySSA()->verifyMemorySSA(); |
693 | |
694 | return Changed; |
695 | } |
696 | |
697 | bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, |
698 | ScalarEvolution *SE, AssumptionCache *AC, |
699 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { |
700 | bool Changed = false; |
701 | |
702 | #ifndef NDEBUG |
703 | // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA |
704 | // form. |
705 | if (PreserveLCSSA) { |
706 | assert(DT && "DT not available." ); |
707 | assert(LI && "LI not available." ); |
708 | assert(L->isRecursivelyLCSSAForm(*DT, *LI) && |
709 | "Requested to preserve LCSSA, but it's already broken." ); |
710 | } |
711 | #endif |
712 | |
713 | // Worklist maintains our depth-first queue of loops in this nest to process. |
714 | SmallVector<Loop *, 4> Worklist; |
715 | Worklist.push_back(Elt: L); |
716 | |
717 | // Walk the worklist from front to back, pushing newly found sub loops onto |
718 | // the back. This will let us process loops from back to front in depth-first |
719 | // order. We can use this simple process because loops form a tree. |
720 | for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { |
721 | Loop *L2 = Worklist[Idx]; |
722 | Worklist.append(in_start: L2->begin(), in_end: L2->end()); |
723 | } |
724 | |
725 | while (!Worklist.empty()) |
726 | Changed |= simplifyOneLoop(L: Worklist.pop_back_val(), Worklist, DT, LI, SE, |
727 | AC, MSSAU, PreserveLCSSA); |
728 | |
729 | // Changing exit conditions for blocks may affect exit counts of this loop and |
730 | // any of its parents, so we must invalidate the entire subtree if we've made |
731 | // any changes. Do this here rather than in simplifyOneLoop() as the top-most |
732 | // loop is going to be the same for all child loops. |
733 | if (Changed && SE) |
734 | SE->forgetTopmostLoop(L); |
735 | |
736 | return Changed; |
737 | } |
738 | |
739 | namespace { |
740 | struct LoopSimplify : public FunctionPass { |
741 | static char ID; // Pass identification, replacement for typeid |
742 | LoopSimplify() : FunctionPass(ID) { |
743 | initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); |
744 | } |
745 | |
746 | bool runOnFunction(Function &F) override; |
747 | |
748 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
749 | AU.addRequired<AssumptionCacheTracker>(); |
750 | |
751 | // We need loop information to identify the loops... |
752 | AU.addRequired<DominatorTreeWrapperPass>(); |
753 | AU.addPreserved<DominatorTreeWrapperPass>(); |
754 | |
755 | AU.addRequired<LoopInfoWrapperPass>(); |
756 | AU.addPreserved<LoopInfoWrapperPass>(); |
757 | |
758 | AU.addPreserved<BasicAAWrapperPass>(); |
759 | AU.addPreserved<AAResultsWrapperPass>(); |
760 | AU.addPreserved<GlobalsAAWrapperPass>(); |
761 | AU.addPreserved<ScalarEvolutionWrapperPass>(); |
762 | AU.addPreserved<SCEVAAWrapperPass>(); |
763 | AU.addPreservedID(ID&: LCSSAID); |
764 | AU.addPreservedID(ID&: BreakCriticalEdgesID); // No critical edges added. |
765 | AU.addPreserved<BranchProbabilityInfoWrapperPass>(); |
766 | AU.addPreserved<MemorySSAWrapperPass>(); |
767 | } |
768 | |
769 | /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. |
770 | void verifyAnalysis() const override; |
771 | }; |
772 | } |
773 | |
774 | char LoopSimplify::ID = 0; |
775 | INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify" , |
776 | "Canonicalize natural loops" , false, false) |
777 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
778 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
779 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
780 | INITIALIZE_PASS_END(LoopSimplify, "loop-simplify" , "Canonicalize natural loops" , |
781 | false, false) |
782 | |
783 | // Publicly exposed interface to pass... |
784 | char &llvm::LoopSimplifyID = LoopSimplify::ID; |
785 | Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } |
786 | |
787 | /// runOnFunction - Run down all loops in the CFG (recursively, but we could do |
788 | /// it in any convenient order) inserting preheaders... |
789 | /// |
790 | bool LoopSimplify::runOnFunction(Function &F) { |
791 | bool Changed = false; |
792 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
793 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
794 | auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); |
795 | ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; |
796 | AssumptionCache *AC = |
797 | &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
798 | MemorySSA *MSSA = nullptr; |
799 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
800 | auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); |
801 | if (MSSAAnalysis) { |
802 | MSSA = &MSSAAnalysis->getMSSA(); |
803 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA); |
804 | } |
805 | |
806 | bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID); |
807 | |
808 | // Simplify each loop nest in the function. |
809 | for (auto *L : *LI) |
810 | Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), PreserveLCSSA); |
811 | |
812 | #ifndef NDEBUG |
813 | if (PreserveLCSSA) { |
814 | bool InLCSSA = all_of( |
815 | *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); |
816 | assert(InLCSSA && "LCSSA is broken after loop-simplify." ); |
817 | } |
818 | #endif |
819 | return Changed; |
820 | } |
821 | |
822 | PreservedAnalyses LoopSimplifyPass::run(Function &F, |
823 | FunctionAnalysisManager &AM) { |
824 | bool Changed = false; |
825 | LoopInfo *LI = &AM.getResult<LoopAnalysis>(IR&: F); |
826 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
827 | ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(IR&: F); |
828 | AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(IR&: F); |
829 | auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
830 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
831 | if (MSSAAnalysis) { |
832 | auto *MSSA = &MSSAAnalysis->getMSSA(); |
833 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA); |
834 | } |
835 | |
836 | |
837 | // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA |
838 | // after simplifying the loops. MemorySSA is preserved if it exists. |
839 | for (auto *L : *LI) |
840 | Changed |= |
841 | simplifyLoop(L, DT, LI, SE, AC, MSSAU: MSSAU.get(), /*PreserveLCSSA*/ false); |
842 | |
843 | if (!Changed) |
844 | return PreservedAnalyses::all(); |
845 | |
846 | PreservedAnalyses PA; |
847 | PA.preserve<DominatorTreeAnalysis>(); |
848 | PA.preserve<LoopAnalysis>(); |
849 | PA.preserve<ScalarEvolutionAnalysis>(); |
850 | if (MSSAAnalysis) |
851 | PA.preserve<MemorySSAAnalysis>(); |
852 | // BPI maps conditional terminators to probabilities, LoopSimplify can insert |
853 | // blocks, but it does so only by splitting existing blocks and edges. This |
854 | // results in the interesting property that all new terminators inserted are |
855 | // unconditional branches which do not appear in BPI. All deletions are |
856 | // handled via ValueHandle callbacks w/in BPI. |
857 | PA.preserve<BranchProbabilityAnalysis>(); |
858 | return PA; |
859 | } |
860 | |
861 | // FIXME: Restore this code when we re-enable verification in verifyAnalysis |
862 | // below. |
863 | #if 0 |
864 | static void verifyLoop(Loop *L) { |
865 | // Verify subloops. |
866 | for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
867 | verifyLoop(*I); |
868 | |
869 | // It used to be possible to just assert L->isLoopSimplifyForm(), however |
870 | // with the introduction of indirectbr, there are now cases where it's |
871 | // not possible to transform a loop as necessary. We can at least check |
872 | // that there is an indirectbr near any time there's trouble. |
873 | |
874 | // Indirectbr can interfere with preheader and unique backedge insertion. |
875 | if (!L->getLoopPreheader() || !L->getLoopLatch()) { |
876 | bool HasIndBrPred = false; |
877 | for (BasicBlock *Pred : predecessors(L->getHeader())) |
878 | if (isa<IndirectBrInst>(Pred->getTerminator())) { |
879 | HasIndBrPred = true; |
880 | break; |
881 | } |
882 | assert(HasIndBrPred && |
883 | "LoopSimplify has no excuse for missing loop header info!" ); |
884 | (void)HasIndBrPred; |
885 | } |
886 | |
887 | // Indirectbr can interfere with exit block canonicalization. |
888 | if (!L->hasDedicatedExits()) { |
889 | bool HasIndBrExiting = false; |
890 | SmallVector<BasicBlock*, 8> ExitingBlocks; |
891 | L->getExitingBlocks(ExitingBlocks); |
892 | for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { |
893 | if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { |
894 | HasIndBrExiting = true; |
895 | break; |
896 | } |
897 | } |
898 | |
899 | assert(HasIndBrExiting && |
900 | "LoopSimplify has no excuse for missing exit block info!" ); |
901 | (void)HasIndBrExiting; |
902 | } |
903 | } |
904 | #endif |
905 | |
906 | void LoopSimplify::verifyAnalysis() const { |
907 | // FIXME: This routine is being called mid-way through the loop pass manager |
908 | // as loop passes destroy this analysis. That's actually fine, but we have no |
909 | // way of expressing that here. Once all of the passes that destroy this are |
910 | // hoisted out of the loop pass manager we can add back verification here. |
911 | #if 0 |
912 | for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) |
913 | verifyLoop(*I); |
914 | #endif |
915 | } |
916 | |