| 1 | //===- FixIrreducible.cpp - Convert irreducible control-flow into loops ---===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // INPUT CFG: The blocks H and B form an irreducible cycle with two headers. |
| 10 | // |
| 11 | // Entry |
| 12 | // / \ |
| 13 | // v v |
| 14 | // H ----> B |
| 15 | // ^ /| |
| 16 | // `----' | |
| 17 | // v |
| 18 | // Exit |
| 19 | // |
| 20 | // OUTPUT CFG: Converted to a natural loop with a new header N. |
| 21 | // |
| 22 | // Entry |
| 23 | // | |
| 24 | // v |
| 25 | // N <---. |
| 26 | // / \ \ |
| 27 | // / \ | |
| 28 | // v v / |
| 29 | // H --> B --' |
| 30 | // | |
| 31 | // v |
| 32 | // Exit |
| 33 | // |
| 34 | // To convert an irreducible cycle C to a natural loop L: |
| 35 | // |
| 36 | // 1. Add a new node N to C. |
| 37 | // 2. Redirect all external incoming edges through N. |
| 38 | // 3. Redirect all edges incident on header H through N. |
| 39 | // |
| 40 | // This is sufficient to ensure that: |
| 41 | // |
| 42 | // a. Every closed path in C also exists in L, with the modification that any |
| 43 | // path passing through H now passes through N before reaching H. |
| 44 | // b. Every external path incident on any entry of C is now incident on N and |
| 45 | // then redirected to the entry. |
| 46 | // |
| 47 | // Thus, L is a strongly connected component dominated by N, and hence L is a |
| 48 | // natural loop with header N. |
| 49 | // |
| 50 | // When an irreducible cycle C with header H is transformed into a loop, the |
| 51 | // following invariants hold: |
| 52 | // |
| 53 | // 1. No new subcycles are "discovered" in the set (C-H). The only internal |
| 54 | // edges that are redirected by the transform are incident on H. Any subcycle |
| 55 | // S in (C-H), already existed prior to this transform, and is already in the |
| 56 | // list of children for this cycle C. |
| 57 | // |
| 58 | // 2. Subcycles of C are not modified by the transform. For some subcycle S of |
| 59 | // C, edges incident on the entries of S are either internal to C, or they |
| 60 | // are now redirected through N, which is outside of S. So the list of |
| 61 | // entries to S does not change. Since the transform only adds a block |
| 62 | // outside S, and redirects edges that are not internal to S, the list of |
| 63 | // blocks in S does not change. |
| 64 | // |
| 65 | // 3. Similarly, any natural loop L included in C is not affected, with one |
| 66 | // exception: L is "destroyed" by the transform iff its header is H. The |
| 67 | // backedges of such a loop are now redirected to N instead, and hence the |
| 68 | // body of this loop gets merged into the new loop with header N. |
| 69 | // |
| 70 | // The actual transformation is handled by the ControlFlowHub, which redirects |
| 71 | // specified control flow edges through a set of guard blocks. This also moves |
| 72 | // every PHINode in an outgoing block to the hub. Since the hub dominates all |
| 73 | // the outgoing blocks, each such PHINode continues to dominate its uses. Since |
| 74 | // every header in an SCC has at least two predecessors, every value used in the |
| 75 | // header (or later) but defined in a predecessor (or earlier) is represented by |
| 76 | // a PHINode in a header. Hence the above handling of PHINodes is sufficient and |
| 77 | // no further processing is required to restore SSA. |
| 78 | // |
| 79 | // Limitation: The pass cannot handle switch statements and indirect |
| 80 | // branches. Both must be lowered to plain branches first. |
| 81 | // |
| 82 | //===----------------------------------------------------------------------===// |
| 83 | |
| 84 | #include "llvm/Transforms/Utils/FixIrreducible.h" |
| 85 | #include "llvm/Analysis/CycleAnalysis.h" |
| 86 | #include "llvm/Analysis/DomTreeUpdater.h" |
| 87 | #include "llvm/Analysis/LoopInfo.h" |
| 88 | #include "llvm/InitializePasses.h" |
| 89 | #include "llvm/Pass.h" |
| 90 | #include "llvm/Transforms/Utils.h" |
| 91 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 92 | #include "llvm/Transforms/Utils/ControlFlowUtils.h" |
| 93 | |
| 94 | #define DEBUG_TYPE "fix-irreducible" |
| 95 | |
| 96 | using namespace llvm; |
| 97 | |
| 98 | namespace { |
| 99 | struct FixIrreducible : public FunctionPass { |
| 100 | static char ID; |
| 101 | FixIrreducible() : FunctionPass(ID) { |
| 102 | initializeFixIrreduciblePass(*PassRegistry::getPassRegistry()); |
| 103 | } |
| 104 | |
| 105 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 106 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 107 | AU.addRequired<CycleInfoWrapperPass>(); |
| 108 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 109 | AU.addPreserved<CycleInfoWrapperPass>(); |
| 110 | AU.addPreserved<LoopInfoWrapperPass>(); |
| 111 | } |
| 112 | |
| 113 | bool runOnFunction(Function &F) override; |
| 114 | }; |
| 115 | } // namespace |
| 116 | |
| 117 | char FixIrreducible::ID = 0; |
| 118 | |
| 119 | FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } |
| 120 | |
| 121 | INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible" , |
| 122 | "Convert irreducible control-flow into natural loops" , |
| 123 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 124 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 125 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 126 | INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible" , |
| 127 | "Convert irreducible control-flow into natural loops" , |
| 128 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 129 | |
| 130 | // When a new loop is created, existing children of the parent loop may now be |
| 131 | // fully inside the new loop. Reconnect these as children of the new loop. |
| 132 | static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop, |
| 133 | BasicBlock *) { |
| 134 | auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector() |
| 135 | : LI.getTopLevelLoopsVector(); |
| 136 | // Any candidate is a child iff its header is owned by the new loop. Move all |
| 137 | // the children to a new vector. |
| 138 | auto FirstChild = llvm::partition(Range&: CandidateLoops, P: [&](Loop *L) { |
| 139 | return NewLoop == L || !NewLoop->contains(BB: L->getHeader()); |
| 140 | }); |
| 141 | SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end()); |
| 142 | CandidateLoops.erase(first: FirstChild, last: CandidateLoops.end()); |
| 143 | |
| 144 | for (Loop *Child : ChildLoops) { |
| 145 | LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName() |
| 146 | << "\n" ); |
| 147 | // A child loop whose header was the old cycle header gets destroyed since |
| 148 | // its backedges are removed. |
| 149 | if (Child->getHeader() == OldHeader) { |
| 150 | for (auto *BB : Child->blocks()) { |
| 151 | if (LI.getLoopFor(BB) != Child) |
| 152 | continue; |
| 153 | LI.changeLoopFor(BB, L: NewLoop); |
| 154 | LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName() |
| 155 | << "\n" ); |
| 156 | } |
| 157 | std::vector<Loop *> GrandChildLoops; |
| 158 | std::swap(x&: GrandChildLoops, y&: Child->getSubLoopsVector()); |
| 159 | for (auto *GrandChildLoop : GrandChildLoops) { |
| 160 | GrandChildLoop->setParentLoop(nullptr); |
| 161 | NewLoop->addChildLoop(NewChild: GrandChildLoop); |
| 162 | } |
| 163 | LI.destroy(L: Child); |
| 164 | LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n" ); |
| 165 | continue; |
| 166 | } |
| 167 | |
| 168 | Child->setParentLoop(nullptr); |
| 169 | NewLoop->addChildLoop(NewChild: Child); |
| 170 | LLVM_DEBUG(dbgs() << "added child loop to new loop\n" ); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | static void updateLoopInfo(LoopInfo &LI, Cycle &C, |
| 175 | ArrayRef<BasicBlock *> GuardBlocks) { |
| 176 | // The parent loop is a natural loop L mapped to the cycle header H as long as |
| 177 | // H is not also the header of L. In the latter case, L is destroyed and we |
| 178 | // seek its parent instead. |
| 179 | BasicBlock * = C.getHeader(); |
| 180 | Loop *ParentLoop = LI.getLoopFor(BB: CycleHeader); |
| 181 | if (ParentLoop && ParentLoop->getHeader() == CycleHeader) |
| 182 | ParentLoop = ParentLoop->getParentLoop(); |
| 183 | |
| 184 | // Create a new loop from the now-transformed cycle |
| 185 | auto *NewLoop = LI.AllocateLoop(); |
| 186 | if (ParentLoop) { |
| 187 | ParentLoop->addChildLoop(NewChild: NewLoop); |
| 188 | } else { |
| 189 | LI.addTopLevelLoop(New: NewLoop); |
| 190 | } |
| 191 | |
| 192 | // Add the guard blocks to the new loop. The first guard block is |
| 193 | // the head of all the backedges, and it is the first to be inserted |
| 194 | // in the loop. This ensures that it is recognized as the |
| 195 | // header. Since the new loop is already in LoopInfo, the new blocks |
| 196 | // are also propagated up the chain of parent loops. |
| 197 | for (auto *G : GuardBlocks) { |
| 198 | LLVM_DEBUG(dbgs() << "added guard block to loop: " << G->getName() << "\n" ); |
| 199 | NewLoop->addBasicBlockToLoop(NewBB: G, LI); |
| 200 | } |
| 201 | |
| 202 | for (auto *BB : C.blocks()) { |
| 203 | NewLoop->addBlockEntry(BB); |
| 204 | if (LI.getLoopFor(BB) == ParentLoop) { |
| 205 | LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName() |
| 206 | << "\n" ); |
| 207 | LI.changeLoopFor(BB, L: NewLoop); |
| 208 | } else { |
| 209 | LLVM_DEBUG(dbgs() << "added block from child: " << BB->getName() << "\n" ); |
| 210 | } |
| 211 | } |
| 212 | LLVM_DEBUG(dbgs() << "header for new loop: " |
| 213 | << NewLoop->getHeader()->getName() << "\n" ); |
| 214 | |
| 215 | reconnectChildLoops(LI, ParentLoop, NewLoop, OldHeader: C.getHeader()); |
| 216 | |
| 217 | LLVM_DEBUG(dbgs() << "Verify new loop.\n" ; NewLoop->print(dbgs())); |
| 218 | NewLoop->verifyLoop(); |
| 219 | if (ParentLoop) { |
| 220 | LLVM_DEBUG(dbgs() << "Verify parent loop.\n" ; ParentLoop->print(dbgs())); |
| 221 | ParentLoop->verifyLoop(); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | // Given a set of blocks and headers in an irreducible SCC, convert it into a |
| 226 | // natural loop. Also insert this new loop at its appropriate place in the |
| 227 | // hierarchy of loops. |
| 228 | static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT, |
| 229 | LoopInfo *LI) { |
| 230 | if (C.isReducible()) |
| 231 | return false; |
| 232 | LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n" ;); |
| 233 | |
| 234 | ControlFlowHub CHub; |
| 235 | SetVector<BasicBlock *> Predecessors; |
| 236 | |
| 237 | // Redirect internal edges incident on the header. |
| 238 | BasicBlock * = C.getHeader(); |
| 239 | for (BasicBlock *P : predecessors(BB: Header)) { |
| 240 | if (C.contains(Block: P)) |
| 241 | Predecessors.insert(X: P); |
| 242 | } |
| 243 | |
| 244 | for (BasicBlock *P : Predecessors) { |
| 245 | auto *Branch = cast<BranchInst>(Val: P->getTerminator()); |
| 246 | // Exactly one of the two successors is the header. |
| 247 | BasicBlock *Succ0 = Branch->getSuccessor(i: 0) == Header ? Header : nullptr; |
| 248 | BasicBlock *Succ1 = Succ0 ? nullptr : Header; |
| 249 | if (!Succ0) |
| 250 | assert(Branch->getSuccessor(1) == Header); |
| 251 | assert(Succ0 || Succ1); |
| 252 | CHub.addBranch(BB: P, Succ0, Succ1); |
| 253 | |
| 254 | LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> " |
| 255 | << (Succ0 ? Succ0->getName() : "" ) << " " |
| 256 | << (Succ1 ? Succ1->getName() : "" ) << "\n" ); |
| 257 | } |
| 258 | |
| 259 | // Redirect external incoming edges. This includes the edges on the header. |
| 260 | Predecessors.clear(); |
| 261 | for (BasicBlock *E : C.entries()) { |
| 262 | for (BasicBlock *P : predecessors(BB: E)) { |
| 263 | if (!C.contains(Block: P)) |
| 264 | Predecessors.insert(X: P); |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | for (BasicBlock *P : Predecessors) { |
| 269 | auto *Branch = cast<BranchInst>(Val: P->getTerminator()); |
| 270 | BasicBlock *Succ0 = Branch->getSuccessor(i: 0); |
| 271 | Succ0 = C.contains(Block: Succ0) ? Succ0 : nullptr; |
| 272 | BasicBlock *Succ1 = |
| 273 | Branch->isUnconditional() ? nullptr : Branch->getSuccessor(i: 1); |
| 274 | Succ1 = Succ1 && C.contains(Block: Succ1) ? Succ1 : nullptr; |
| 275 | CHub.addBranch(BB: P, Succ0, Succ1); |
| 276 | |
| 277 | LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> " |
| 278 | << (Succ0 ? Succ0->getName() : "" ) << " " |
| 279 | << (Succ1 ? Succ1->getName() : "" ) << "\n" ); |
| 280 | } |
| 281 | |
| 282 | // Redirect all the backedges through a "hub" consisting of a series |
| 283 | // of guard blocks that manage the flow of control from the |
| 284 | // predecessors to the headers. |
| 285 | SmallVector<BasicBlock *> GuardBlocks; |
| 286 | |
| 287 | // Minor optimization: The cycle entries are discovered in an order that is |
| 288 | // the opposite of the order in which these blocks appear as branch targets. |
| 289 | // This results in a lot of condition inversions in the control flow out of |
| 290 | // the new ControlFlowHub, which can be mitigated if the orders match. So we |
| 291 | // reverse the entries when adding them to the hub. |
| 292 | SetVector<BasicBlock *> Entries; |
| 293 | Entries.insert(Start: C.entry_rbegin(), End: C.entry_rend()); |
| 294 | |
| 295 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); |
| 296 | CHub.finalize(DTU: &DTU, GuardBlocks, Prefix: "irr" ); |
| 297 | #if defined(EXPENSIVE_CHECKS) |
| 298 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
| 299 | #else |
| 300 | assert(DT.verify(DominatorTree::VerificationLevel::Fast)); |
| 301 | #endif |
| 302 | |
| 303 | // If we are updating LoopInfo, do that now before modifying the cycle. This |
| 304 | // ensures that the first guard block is the header of a new natural loop. |
| 305 | if (LI) |
| 306 | updateLoopInfo(LI&: *LI, C, GuardBlocks); |
| 307 | |
| 308 | for (auto *G : GuardBlocks) { |
| 309 | LLVM_DEBUG(dbgs() << "added guard block to cycle: " << G->getName() |
| 310 | << "\n" ); |
| 311 | CI.addBlockToCycle(Block: G, Cycle: &C); |
| 312 | } |
| 313 | C.setSingleEntry(GuardBlocks[0]); |
| 314 | |
| 315 | C.verifyCycle(); |
| 316 | if (Cycle *Parent = C.getParentCycle()) |
| 317 | Parent->verifyCycle(); |
| 318 | |
| 319 | LLVM_DEBUG(dbgs() << "Finished one cycle:\n" ; CI.print(dbgs());); |
| 320 | return true; |
| 321 | } |
| 322 | |
| 323 | static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT, |
| 324 | LoopInfo *LI) { |
| 325 | LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " |
| 326 | << F.getName() << "\n" ); |
| 327 | |
| 328 | assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator." ); |
| 329 | |
| 330 | bool Changed = false; |
| 331 | for (Cycle *TopCycle : CI.toplevel_cycles()) { |
| 332 | for (Cycle *C : depth_first(G: TopCycle)) { |
| 333 | Changed |= fixIrreducible(C&: *C, CI, DT, LI); |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | if (!Changed) |
| 338 | return false; |
| 339 | |
| 340 | #if defined(EXPENSIVE_CHECKS) |
| 341 | CI.verify(); |
| 342 | if (LI) { |
| 343 | LI->verify(DT); |
| 344 | } |
| 345 | #endif // EXPENSIVE_CHECKS |
| 346 | |
| 347 | return true; |
| 348 | } |
| 349 | |
| 350 | bool FixIrreducible::runOnFunction(Function &F) { |
| 351 | auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); |
| 352 | LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; |
| 353 | auto &CI = getAnalysis<CycleInfoWrapperPass>().getResult(); |
| 354 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 355 | return FixIrreducibleImpl(F, CI, DT, LI); |
| 356 | } |
| 357 | |
| 358 | PreservedAnalyses FixIrreduciblePass::run(Function &F, |
| 359 | FunctionAnalysisManager &AM) { |
| 360 | auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F); |
| 361 | auto &CI = AM.getResult<CycleAnalysis>(IR&: F); |
| 362 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 363 | |
| 364 | if (!FixIrreducibleImpl(F, CI, DT, LI)) |
| 365 | return PreservedAnalyses::all(); |
| 366 | |
| 367 | PreservedAnalyses PA; |
| 368 | PA.preserve<LoopAnalysis>(); |
| 369 | PA.preserve<CycleAnalysis>(); |
| 370 | PA.preserve<DominatorTreeAnalysis>(); |
| 371 | return PA; |
| 372 | } |
| 373 | |