| 1 | //===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // INPUT CFG: The blocks H and B form an irreducible cycle with two headers. |
| 10 | // |
| 11 | // Entry |
| 12 | // / \ |
| 13 | // v v |
| 14 | // H ----> B |
| 15 | // ^ /| |
| 16 | // `----' | |
| 17 | // v |
| 18 | // Exit |
| 19 | // |
| 20 | // OUTPUT CFG: Converted to a natural loop with a new header N. |
| 21 | // |
| 22 | // Entry |
| 23 | // | |
| 24 | // v |
| 25 | // N <---. |
| 26 | // / \ \ |
| 27 | // / \ | |
| 28 | // v v / |
| 29 | // H --> B --' |
| 30 | // | |
| 31 | // v |
| 32 | // Exit |
| 33 | // |
| 34 | // To convert an irreducible cycle C to a natural loop L: |
| 35 | // |
| 36 | // 1. Add a new node N to C. |
| 37 | // 2. Redirect all external incoming edges through N. |
| 38 | // 3. Redirect all edges incident on header H through N. |
| 39 | // |
| 40 | // This is sufficient to ensure that: |
| 41 | // |
| 42 | // a. Every closed path in C also exists in L, with the modification that any |
| 43 | // path passing through H now passes through N before reaching H. |
| 44 | // b. Every external path incident on any entry of C is now incident on N and |
| 45 | // then redirected to the entry. |
| 46 | // |
| 47 | // Thus, L is a strongly connected component dominated by N, and hence L is a |
| 48 | // natural loop with header N. |
| 49 | // |
| 50 | // When an irreducible cycle C with header H is transformed into a loop, the |
| 51 | // following invariants hold: |
| 52 | // |
| 53 | // 1. No new subcycles are "discovered" in the set (C-H). The only internal |
| 54 | // edges that are redirected by the transform are incident on H. Any subcycle |
| 55 | // S in (C-H), already existed prior to this transform, and is already in the |
| 56 | // list of children for this cycle C. |
| 57 | // |
| 58 | // 2. Subcycles of C are not modified by the transform. For some subcycle S of |
| 59 | // C, edges incident on the entries of S are either internal to C, or they |
| 60 | // are now redirected through N, which is outside of S. So the list of |
| 61 | // entries to S does not change. Since the transform only adds a block |
| 62 | // outside S, and redirects edges that are not internal to S, the list of |
| 63 | // blocks in S does not change. |
| 64 | // |
| 65 | // 3. Similarly, any natural loop L included in C is not affected, with one |
| 66 | // exception: L is "destroyed" by the transform iff its header is H. The |
| 67 | // backedges of such a loop are now redirected to N instead, and hence the |
| 68 | // body of this loop gets merged into the new loop with header N. |
| 69 | // |
| 70 | // The actual transformation is handled by the ControlFlowHub, which redirects |
| 71 | // specified control flow edges through a set of guard blocks. This also moves |
| 72 | // every PHINode in an outgoing block to the hub. Since the hub dominates all |
| 73 | // the outgoing blocks, each such PHINode continues to dominate its uses. Since |
| 74 | // every header in an SCC has at least two predecessors, every value used in the |
| 75 | // header (or later) but defined in a predecessor (or earlier) is represented by |
| 76 | // a PHINode in a header. Hence the above handling of PHINodes is sufficient and |
| 77 | // no further processing is required to restore SSA. |
| 78 | // |
| 79 | // Limitation: The pass cannot handle switch statements and indirect |
| 80 | // branches. Both must be lowered to plain branches first. |
| 81 | // |
| 82 | // CallBr support: CallBr is handled as a more general branch instruction which |
| 83 | // can have multiple successors. The pass redirects the edges to intermediate |
| 84 | // target blocks that unconditionally branch to the original callbr target |
| 85 | // blocks. This allows the control flow hub to know to which of the original |
| 86 | // target blocks to jump to. |
| 87 | // Example input CFG: |
| 88 | // Entry (callbr) |
| 89 | // / \ |
| 90 | // v v |
| 91 | // H ----> B |
| 92 | // ^ /| |
| 93 | // `----' | |
| 94 | // v |
| 95 | // Exit |
| 96 | // |
| 97 | // becomes: |
| 98 | // Entry (callbr) |
| 99 | // / \ |
| 100 | // v v |
| 101 | // target.H target.B |
| 102 | // | | |
| 103 | // v v |
| 104 | // H ----> B |
| 105 | // ^ /| |
| 106 | // `----' | |
| 107 | // v |
| 108 | // Exit |
| 109 | // |
| 110 | // Note |
| 111 | // OUTPUT CFG: Converted to a natural loop with a new header N. |
| 112 | // |
| 113 | // Entry (callbr) |
| 114 | // / \ |
| 115 | // v v |
| 116 | // target.H target.B |
| 117 | // \ / |
| 118 | // \ / |
| 119 | // v v |
| 120 | // N <---. |
| 121 | // / \ \ |
| 122 | // / \ | |
| 123 | // v v / |
| 124 | // H --> B --' |
| 125 | // | |
| 126 | // v |
| 127 | // Exit |
| 128 | // |
| 129 | //===----------------------------------------------------------------------===// |
| 130 | |
| 131 | #include "llvm/Transforms/Utils/FixIrreducible.h" |
| 132 | #include "llvm/Analysis/CycleAnalysis.h" |
| 133 | #include "llvm/Analysis/DomTreeUpdater.h" |
| 134 | #include "llvm/Analysis/LoopInfo.h" |
| 135 | #include "llvm/InitializePasses.h" |
| 136 | #include "llvm/Pass.h" |
| 137 | #include "llvm/Transforms/Utils.h" |
| 138 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 139 | #include "llvm/Transforms/Utils/ControlFlowUtils.h" |
| 140 | |
| 141 | #define DEBUG_TYPE "fix-irreducible" |
| 142 | |
| 143 | using namespace llvm; |
| 144 | |
| 145 | namespace { |
| 146 | struct FixIrreducible : public FunctionPass { |
| 147 | static char ID; |
| 148 | FixIrreducible() : FunctionPass(ID) { |
| 149 | initializeFixIrreduciblePass(*PassRegistry::getPassRegistry()); |
| 150 | } |
| 151 | |
| 152 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 153 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 154 | AU.addRequired<CycleInfoWrapperPass>(); |
| 155 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 156 | AU.addPreserved<CycleInfoWrapperPass>(); |
| 157 | AU.addPreserved<LoopInfoWrapperPass>(); |
| 158 | } |
| 159 | |
| 160 | bool runOnFunction(Function &F) override; |
| 161 | }; |
| 162 | } // namespace |
| 163 | |
| 164 | char FixIrreducible::ID = 0; |
| 165 | |
| 166 | FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } |
| 167 | |
| 168 | INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible" , |
| 169 | "Convert irreducible control-flow into natural loops" , |
| 170 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 171 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 172 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 173 | INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible" , |
| 174 | "Convert irreducible control-flow into natural loops" , |
| 175 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 176 | |
| 177 | // When a new loop is created, existing children of the parent loop may now be |
| 178 | // fully inside the new loop. Reconnect these as children of the new loop. |
| 179 | static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop, |
| 180 | BasicBlock *) { |
| 181 | auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector() |
| 182 | : LI.getTopLevelLoopsVector(); |
| 183 | // Any candidate is a child iff its header is owned by the new loop. Move all |
| 184 | // the children to a new vector. |
| 185 | auto FirstChild = llvm::partition(Range&: CandidateLoops, P: [&](Loop *L) { |
| 186 | return NewLoop == L || !NewLoop->contains(BB: L->getHeader()); |
| 187 | }); |
| 188 | SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end()); |
| 189 | CandidateLoops.erase(first: FirstChild, last: CandidateLoops.end()); |
| 190 | |
| 191 | for (Loop *Child : ChildLoops) { |
| 192 | LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName() |
| 193 | << "\n" ); |
| 194 | // A child loop whose header was the old cycle header gets destroyed since |
| 195 | // its backedges are removed. |
| 196 | if (Child->getHeader() == OldHeader) { |
| 197 | for (auto *BB : Child->blocks()) { |
| 198 | if (LI.getLoopFor(BB) != Child) |
| 199 | continue; |
| 200 | LI.changeLoopFor(BB, L: NewLoop); |
| 201 | LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName() |
| 202 | << "\n" ); |
| 203 | } |
| 204 | std::vector<Loop *> GrandChildLoops; |
| 205 | std::swap(x&: GrandChildLoops, y&: Child->getSubLoopsVector()); |
| 206 | for (auto *GrandChildLoop : GrandChildLoops) { |
| 207 | GrandChildLoop->setParentLoop(nullptr); |
| 208 | NewLoop->addChildLoop(NewChild: GrandChildLoop); |
| 209 | } |
| 210 | LI.destroy(L: Child); |
| 211 | LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n" ); |
| 212 | continue; |
| 213 | } |
| 214 | |
| 215 | Child->setParentLoop(nullptr); |
| 216 | NewLoop->addChildLoop(NewChild: Child); |
| 217 | LLVM_DEBUG(dbgs() << "added child loop to new loop\n" ); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | static void updateLoopInfo(LoopInfo &LI, Cycle &C, |
| 222 | ArrayRef<BasicBlock *> GuardBlocks) { |
| 223 | // The parent loop is a natural loop L mapped to the cycle header H as long as |
| 224 | // H is not also the header of L. In the latter case, L is destroyed and we |
| 225 | // seek its parent instead. |
| 226 | BasicBlock * = C.getHeader(); |
| 227 | Loop *ParentLoop = LI.getLoopFor(BB: CycleHeader); |
| 228 | if (ParentLoop && ParentLoop->getHeader() == CycleHeader) |
| 229 | ParentLoop = ParentLoop->getParentLoop(); |
| 230 | |
| 231 | // Create a new loop from the now-transformed cycle |
| 232 | auto *NewLoop = LI.AllocateLoop(); |
| 233 | if (ParentLoop) { |
| 234 | ParentLoop->addChildLoop(NewChild: NewLoop); |
| 235 | } else { |
| 236 | LI.addTopLevelLoop(New: NewLoop); |
| 237 | } |
| 238 | |
| 239 | // Add the guard blocks to the new loop. The first guard block is |
| 240 | // the head of all the backedges, and it is the first to be inserted |
| 241 | // in the loop. This ensures that it is recognized as the |
| 242 | // header. Since the new loop is already in LoopInfo, the new blocks |
| 243 | // are also propagated up the chain of parent loops. |
| 244 | for (auto *G : GuardBlocks) { |
| 245 | LLVM_DEBUG(dbgs() << "added guard block to loop: " << G->getName() << "\n" ); |
| 246 | NewLoop->addBasicBlockToLoop(NewBB: G, LI); |
| 247 | } |
| 248 | |
| 249 | for (auto *BB : C.blocks()) { |
| 250 | NewLoop->addBlockEntry(BB); |
| 251 | if (LI.getLoopFor(BB) == ParentLoop) { |
| 252 | LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName() |
| 253 | << "\n" ); |
| 254 | LI.changeLoopFor(BB, L: NewLoop); |
| 255 | } else { |
| 256 | LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n" ); |
| 257 | } |
| 258 | } |
| 259 | LLVM_DEBUG(dbgs() << "header for new loop: " |
| 260 | << NewLoop->getHeader()->getName() << "\n" ); |
| 261 | |
| 262 | reconnectChildLoops(LI, ParentLoop, NewLoop, OldHeader: C.getHeader()); |
| 263 | |
| 264 | LLVM_DEBUG(dbgs() << "Verify new loop.\n" ; NewLoop->print(dbgs())); |
| 265 | NewLoop->verifyLoop(); |
| 266 | if (ParentLoop) { |
| 267 | LLVM_DEBUG(dbgs() << "Verify parent loop.\n" ; ParentLoop->print(dbgs())); |
| 268 | ParentLoop->verifyLoop(); |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | // Given a set of blocks and headers in an irreducible SCC, convert it into a |
| 273 | // natural loop. Also insert this new loop at its appropriate place in the |
| 274 | // hierarchy of loops. |
| 275 | static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT, |
| 276 | LoopInfo *LI) { |
| 277 | if (C.isReducible()) |
| 278 | return false; |
| 279 | LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n" ;); |
| 280 | |
| 281 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); |
| 282 | ControlFlowHub CHub; |
| 283 | SetVector<BasicBlock *> Predecessors; |
| 284 | |
| 285 | // Redirect internal edges incident on the header. |
| 286 | BasicBlock * = C.getHeader(); |
| 287 | for (BasicBlock *P : predecessors(BB: Header)) { |
| 288 | if (C.contains(Block: P)) |
| 289 | Predecessors.insert(X: P); |
| 290 | } |
| 291 | |
| 292 | for (BasicBlock *P : Predecessors) { |
| 293 | if (BranchInst *Branch = dyn_cast<BranchInst>(Val: P->getTerminator())) { |
| 294 | // Exactly one of the two successors is the header. |
| 295 | BasicBlock *Succ0 = Branch->getSuccessor(i: 0) == Header ? Header : nullptr; |
| 296 | BasicBlock *Succ1 = Succ0 ? nullptr : Header; |
| 297 | assert(Succ0 || Branch->getSuccessor(1) == Header); |
| 298 | assert(Succ0 || Succ1); |
| 299 | CHub.addBranch(BB: P, Succ0, Succ1); |
| 300 | |
| 301 | LLVM_DEBUG(dbgs() << "Added internal branch: " << printBasicBlock(P) |
| 302 | << " -> " << printBasicBlock(Succ0) |
| 303 | << (Succ0 && Succ1 ? " " : "" ) << printBasicBlock(Succ1) |
| 304 | << '\n'); |
| 305 | } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: P->getTerminator())) { |
| 306 | for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) { |
| 307 | BasicBlock *Succ = CallBr->getSuccessor(i: I); |
| 308 | if (Succ != Header) |
| 309 | continue; |
| 310 | BasicBlock *NewSucc = SplitCallBrEdge(CallBrBlock: P, Succ, SuccIdx: I, DTU: &DTU, CI: &CI, LI); |
| 311 | CHub.addBranch(BB: NewSucc, Succ0: Succ); |
| 312 | LLVM_DEBUG(dbgs() << "Added internal branch: " |
| 313 | << printBasicBlock(NewSucc) << " -> " |
| 314 | << printBasicBlock(Succ) << '\n'); |
| 315 | } |
| 316 | } else { |
| 317 | llvm_unreachable("unsupported block terminator" ); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | // Redirect external incoming edges. This includes the edges on the header. |
| 322 | Predecessors.clear(); |
| 323 | for (BasicBlock *E : C.entries()) { |
| 324 | for (BasicBlock *P : predecessors(BB: E)) { |
| 325 | if (!C.contains(Block: P)) |
| 326 | Predecessors.insert(X: P); |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | for (BasicBlock *P : Predecessors) { |
| 331 | if (BranchInst *Branch = dyn_cast<BranchInst>(Val: P->getTerminator()); Branch) { |
| 332 | BasicBlock *Succ0 = Branch->getSuccessor(i: 0); |
| 333 | Succ0 = C.contains(Block: Succ0) ? Succ0 : nullptr; |
| 334 | BasicBlock *Succ1 = |
| 335 | Branch->isUnconditional() ? nullptr : Branch->getSuccessor(i: 1); |
| 336 | Succ1 = Succ1 && C.contains(Block: Succ1) ? Succ1 : nullptr; |
| 337 | CHub.addBranch(BB: P, Succ0, Succ1); |
| 338 | |
| 339 | LLVM_DEBUG(dbgs() << "Added external branch: " << printBasicBlock(P) |
| 340 | << " -> " << printBasicBlock(Succ0) |
| 341 | << (Succ0 && Succ1 ? " " : "" ) << printBasicBlock(Succ1) |
| 342 | << '\n'); |
| 343 | } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: P->getTerminator())) { |
| 344 | for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) { |
| 345 | BasicBlock *Succ = CallBr->getSuccessor(i: I); |
| 346 | if (!C.contains(Block: Succ)) |
| 347 | continue; |
| 348 | BasicBlock *NewSucc = SplitCallBrEdge(CallBrBlock: P, Succ, SuccIdx: I, DTU: &DTU, CI: &CI, LI); |
| 349 | CHub.addBranch(BB: NewSucc, Succ0: Succ); |
| 350 | LLVM_DEBUG(dbgs() << "Added external branch: " |
| 351 | << printBasicBlock(NewSucc) << " -> " |
| 352 | << printBasicBlock(Succ) << '\n'); |
| 353 | } |
| 354 | } else { |
| 355 | llvm_unreachable("unsupported block terminator" ); |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | // Redirect all the backedges through a "hub" consisting of a series |
| 360 | // of guard blocks that manage the flow of control from the |
| 361 | // predecessors to the headers. |
| 362 | SmallVector<BasicBlock *> GuardBlocks; |
| 363 | |
| 364 | // Minor optimization: The cycle entries are discovered in an order that is |
| 365 | // the opposite of the order in which these blocks appear as branch targets. |
| 366 | // This results in a lot of condition inversions in the control flow out of |
| 367 | // the new ControlFlowHub, which can be mitigated if the orders match. So we |
| 368 | // reverse the entries when adding them to the hub. |
| 369 | SetVector<BasicBlock *> Entries; |
| 370 | Entries.insert(Start: C.entry_rbegin(), End: C.entry_rend()); |
| 371 | |
| 372 | CHub.finalize(DTU: &DTU, GuardBlocks, Prefix: "irr" ); |
| 373 | #if defined(EXPENSIVE_CHECKS) |
| 374 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
| 375 | #else |
| 376 | assert(DT.verify(DominatorTree::VerificationLevel::Fast)); |
| 377 | #endif |
| 378 | |
| 379 | // If we are updating LoopInfo, do that now before modifying the cycle. This |
| 380 | // ensures that the first guard block is the header of a new natural loop. |
| 381 | if (LI) |
| 382 | updateLoopInfo(LI&: *LI, C, GuardBlocks); |
| 383 | |
| 384 | for (auto *G : GuardBlocks) { |
| 385 | LLVM_DEBUG(dbgs() << "added guard block to cycle: " << G->getName() |
| 386 | << "\n" ); |
| 387 | CI.addBlockToCycle(Block: G, Cycle: &C); |
| 388 | } |
| 389 | C.setSingleEntry(GuardBlocks[0]); |
| 390 | |
| 391 | C.verifyCycle(); |
| 392 | if (Cycle *Parent = C.getParentCycle()) |
| 393 | Parent->verifyCycle(); |
| 394 | |
| 395 | LLVM_DEBUG(dbgs() << "Finished one cycle:\n" ; CI.print(dbgs());); |
| 396 | return true; |
| 397 | } |
| 398 | |
| 399 | static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT, |
| 400 | LoopInfo *LI) { |
| 401 | LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " |
| 402 | << F.getName() << "\n" ); |
| 403 | |
| 404 | bool Changed = false; |
| 405 | for (Cycle *TopCycle : CI.toplevel_cycles()) { |
| 406 | for (Cycle *C : depth_first(G: TopCycle)) { |
| 407 | Changed |= fixIrreducible(C&: *C, CI, DT, LI); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | if (!Changed) |
| 412 | return false; |
| 413 | |
| 414 | #if defined(EXPENSIVE_CHECKS) |
| 415 | CI.verify(); |
| 416 | if (LI) { |
| 417 | LI->verify(DT); |
| 418 | } |
| 419 | #endif // EXPENSIVE_CHECKS |
| 420 | |
| 421 | return true; |
| 422 | } |
| 423 | |
| 424 | bool FixIrreducible::runOnFunction(Function &F) { |
| 425 | auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); |
| 426 | LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; |
| 427 | auto &CI = getAnalysis<CycleInfoWrapperPass>().getResult(); |
| 428 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 429 | return FixIrreducibleImpl(F, CI, DT, LI); |
| 430 | } |
| 431 | |
| 432 | PreservedAnalyses FixIrreduciblePass::run(Function &F, |
| 433 | FunctionAnalysisManager &AM) { |
| 434 | auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F); |
| 435 | auto &CI = AM.getResult<CycleAnalysis>(IR&: F); |
| 436 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 437 | |
| 438 | if (!FixIrreducibleImpl(F, CI, DT, LI)) |
| 439 | return PreservedAnalyses::all(); |
| 440 | |
| 441 | PreservedAnalyses PA; |
| 442 | PA.preserve<LoopAnalysis>(); |
| 443 | PA.preserve<CycleAnalysis>(); |
| 444 | PA.preserve<DominatorTreeAnalysis>(); |
| 445 | return PA; |
| 446 | } |
| 447 | |