| 1 | //===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // For each natural loop with multiple exit blocks, this pass creates a new |
| 10 | // block N such that all exiting blocks now branch to N, and then control flow |
| 11 | // is redistributed to all the original exit blocks. |
| 12 | // |
| 13 | // Limitation: This assumes that all terminators in the CFG are direct branches |
| 14 | // (the "br" instruction). The presence of any other control flow |
| 15 | // such as indirectbr or switch will cause an assert. |
| 16 | // The callbr terminator is supported by creating intermediate |
| 17 | // target blocks that unconditionally branch to the original target |
| 18 | // blocks. These intermediate target blocks can then be redirected |
| 19 | // through the ControlFlowHub as usual. |
| 20 | // |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
| 23 | #include "llvm/Transforms/Utils/UnifyLoopExits.h" |
| 24 | #include "llvm/ADT/MapVector.h" |
| 25 | #include "llvm/Analysis/DomTreeUpdater.h" |
| 26 | #include "llvm/Analysis/LoopInfo.h" |
| 27 | #include "llvm/IR/Constants.h" |
| 28 | #include "llvm/IR/Dominators.h" |
| 29 | #include "llvm/InitializePasses.h" |
| 30 | #include "llvm/Support/CommandLine.h" |
| 31 | #include "llvm/Transforms/Utils.h" |
| 32 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 33 | #include "llvm/Transforms/Utils/ControlFlowUtils.h" |
| 34 | |
| 35 | #define DEBUG_TYPE "unify-loop-exits" |
| 36 | |
| 37 | using namespace llvm; |
| 38 | |
| 39 | static cl::opt<unsigned> MaxBooleansInControlFlowHub( |
| 40 | "max-booleans-in-control-flow-hub" , cl::init(Val: 32), cl::Hidden, |
| 41 | cl::desc("Set the maximum number of outgoing blocks for using a boolean " |
| 42 | "value to record the exiting block in the ControlFlowHub." )); |
| 43 | |
| 44 | namespace { |
| 45 | struct UnifyLoopExitsLegacyPass : public FunctionPass { |
| 46 | static char ID; |
| 47 | UnifyLoopExitsLegacyPass() : FunctionPass(ID) { |
| 48 | initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry()); |
| 49 | } |
| 50 | |
| 51 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 52 | AU.addRequired<LoopInfoWrapperPass>(); |
| 53 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 54 | AU.addPreserved<LoopInfoWrapperPass>(); |
| 55 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 56 | } |
| 57 | |
| 58 | bool runOnFunction(Function &F) override; |
| 59 | }; |
| 60 | } // namespace |
| 61 | |
| 62 | char UnifyLoopExitsLegacyPass::ID = 0; |
| 63 | |
| 64 | FunctionPass *llvm::createUnifyLoopExitsPass() { |
| 65 | return new UnifyLoopExitsLegacyPass(); |
| 66 | } |
| 67 | |
| 68 | INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits" , |
| 69 | "Fixup each natural loop to have a single exit block" , |
| 70 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 71 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 72 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 73 | INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits" , |
| 74 | "Fixup each natural loop to have a single exit block" , |
| 75 | false /* Only looks at CFG */, false /* Analysis Pass */) |
| 76 | |
| 77 | // The current transform introduces new control flow paths which may break the |
| 78 | // SSA requirement that every def must dominate all its uses. For example, |
| 79 | // consider a value D defined inside the loop that is used by some instruction |
| 80 | // U outside the loop. It follows that D dominates U, since the original |
| 81 | // program has valid SSA form. After merging the exits, all paths from D to U |
| 82 | // now flow through the unified exit block. In addition, there may be other |
| 83 | // paths that do not pass through D, but now reach the unified exit |
| 84 | // block. Thus, D no longer dominates U. |
| 85 | // |
| 86 | // Restore the dominance by creating a phi for each such D at the new unified |
| 87 | // loop exit. But when doing this, ignore any uses U that are in the new unified |
| 88 | // loop exit, since those were introduced specially when the block was created. |
| 89 | // |
| 90 | // The use of SSAUpdater seems like overkill for this operation. The location |
| 91 | // for creating the new PHI is well-known, and also the set of incoming blocks |
| 92 | // to the new PHI. |
| 93 | static void restoreSSA(const DominatorTree &DT, const Loop *L, |
| 94 | SmallVectorImpl<BasicBlock *> &Incoming, |
| 95 | BasicBlock *LoopExitBlock) { |
| 96 | using InstVector = SmallVector<Instruction *, 8>; |
| 97 | using IIMap = MapVector<Instruction *, InstVector>; |
| 98 | IIMap ExternalUsers; |
| 99 | for (auto *BB : L->blocks()) { |
| 100 | for (auto &I : *BB) { |
| 101 | for (auto &U : I.uses()) { |
| 102 | auto UserInst = cast<Instruction>(Val: U.getUser()); |
| 103 | auto UserBlock = UserInst->getParent(); |
| 104 | if (UserBlock == LoopExitBlock) |
| 105 | continue; |
| 106 | if (L->contains(BB: UserBlock)) |
| 107 | continue; |
| 108 | LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "(" |
| 109 | << BB->getName() << ")" |
| 110 | << ": " << UserInst->getName() << "(" |
| 111 | << UserBlock->getName() << ")" |
| 112 | << "\n" ); |
| 113 | ExternalUsers[&I].push_back(Elt: UserInst); |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | for (const auto &II : ExternalUsers) { |
| 119 | // For each Def used outside the loop, create NewPhi in |
| 120 | // LoopExitBlock. NewPhi receives Def only along exiting blocks that |
| 121 | // dominate it, while the remaining values are undefined since those paths |
| 122 | // didn't exist in the original CFG. |
| 123 | auto Def = II.first; |
| 124 | LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n" ); |
| 125 | auto NewPhi = |
| 126 | PHINode::Create(Ty: Def->getType(), NumReservedValues: Incoming.size(), |
| 127 | NameStr: Def->getName() + ".moved" , InsertBefore: LoopExitBlock->begin()); |
| 128 | for (auto *In : Incoming) { |
| 129 | LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": " ); |
| 130 | if (Def->getParent() == In || DT.dominates(Def, BB: In)) { |
| 131 | LLVM_DEBUG(dbgs() << "dominated\n" ); |
| 132 | NewPhi->addIncoming(V: Def, BB: In); |
| 133 | } else { |
| 134 | LLVM_DEBUG(dbgs() << "not dominated\n" ); |
| 135 | NewPhi->addIncoming(V: PoisonValue::get(T: Def->getType()), BB: In); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | LLVM_DEBUG(dbgs() << "external users:" ); |
| 140 | for (auto *U : II.second) { |
| 141 | LLVM_DEBUG(dbgs() << " " << U->getName()); |
| 142 | U->replaceUsesOfWith(From: Def, To: NewPhi); |
| 143 | } |
| 144 | LLVM_DEBUG(dbgs() << "\n" ); |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { |
| 149 | // To unify the loop exits, we need a list of the exiting blocks as |
| 150 | // well as exit blocks. The functions for locating these lists both |
| 151 | // traverse the entire loop body. It is more efficient to first |
| 152 | // locate the exiting blocks and then examine their successors to |
| 153 | // locate the exit blocks. |
| 154 | SmallVector<BasicBlock *, 8> ExitingBlocks; |
| 155 | L->getExitingBlocks(ExitingBlocks); |
| 156 | |
| 157 | // No exit blocks, so nothing to do. Just return. |
| 158 | if (ExitingBlocks.empty()) |
| 159 | return false; |
| 160 | |
| 161 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); |
| 162 | SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix; |
| 163 | |
| 164 | // Redirect exiting edges through a control flow hub. |
| 165 | ControlFlowHub CHub; |
| 166 | bool Changed = false; |
| 167 | |
| 168 | for (unsigned I = 0; I < ExitingBlocks.size(); ++I) { |
| 169 | BasicBlock *BB = ExitingBlocks[I]; |
| 170 | if (BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator())) { |
| 171 | BasicBlock *Succ0 = Branch->getSuccessor(i: 0); |
| 172 | Succ0 = L->contains(BB: Succ0) ? nullptr : Succ0; |
| 173 | |
| 174 | BasicBlock *Succ1 = |
| 175 | Branch->isUnconditional() ? nullptr : Branch->getSuccessor(i: 1); |
| 176 | Succ1 = L->contains(BB: Succ1) ? nullptr : Succ1; |
| 177 | CHub.addBranch(BB, Succ0, Succ1); |
| 178 | |
| 179 | LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB) |
| 180 | << " -> " << printBasicBlock(Succ0) |
| 181 | << (Succ0 && Succ1 ? " " : "" ) << printBasicBlock(Succ1) |
| 182 | << '\n'); |
| 183 | } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(Val: BB->getTerminator())) { |
| 184 | for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) { |
| 185 | BasicBlock *Succ = CallBr->getSuccessor(i: J); |
| 186 | if (L->contains(BB: Succ)) |
| 187 | continue; |
| 188 | bool UpdatedLI = false; |
| 189 | BasicBlock *NewSucc = |
| 190 | SplitCallBrEdge(CallBrBlock: BB, Succ, SuccIdx: J, DTU: &DTU, CI: nullptr, LI: &LI, UpdatedLI: &UpdatedLI); |
| 191 | // SplitCallBrEdge modifies the CFG because it creates an intermediate |
| 192 | // block. So we need to set the changed flag no matter what the |
| 193 | // ControlFlowHub is going to do later. |
| 194 | Changed = true; |
| 195 | // Even if CallBr and Succ do not have a common parent loop, we need to |
| 196 | // add the new target block to the parent loop of the current loop. |
| 197 | if (!UpdatedLI) |
| 198 | CallBrTargetBlocksToFix.push_back(Elt: NewSucc); |
| 199 | // ExitingBlocks is later used to restore SSA, so we need to make sure |
| 200 | // that the blocks used for phi nodes in the guard blocks match the |
| 201 | // predecessors of the guard blocks, which, in the case of callbr, are |
| 202 | // the new intermediate target blocks instead of the callbr blocks |
| 203 | // themselves. |
| 204 | ExitingBlocks[I] = NewSucc; |
| 205 | CHub.addBranch(BB: NewSucc, Succ0: Succ); |
| 206 | LLVM_DEBUG(dbgs() << "Added exiting branch: " |
| 207 | << printBasicBlock(NewSucc) << " -> " |
| 208 | << printBasicBlock(Succ) << '\n'); |
| 209 | } |
| 210 | } else { |
| 211 | llvm_unreachable("unsupported block terminator" ); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | SmallVector<BasicBlock *, 8> GuardBlocks; |
| 216 | BasicBlock *LoopExitBlock; |
| 217 | bool ChangedCFG; |
| 218 | std::tie(args&: LoopExitBlock, args&: ChangedCFG) = CHub.finalize( |
| 219 | DTU: &DTU, GuardBlocks, Prefix: "loop.exit" , MaxControlFlowBooleans: MaxBooleansInControlFlowHub.getValue()); |
| 220 | ChangedCFG |= Changed; |
| 221 | if (!ChangedCFG) |
| 222 | return false; |
| 223 | |
| 224 | restoreSSA(DT, L, Incoming&: ExitingBlocks, LoopExitBlock); |
| 225 | |
| 226 | #if defined(EXPENSIVE_CHECKS) |
| 227 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
| 228 | #else |
| 229 | assert(DT.verify(DominatorTree::VerificationLevel::Fast)); |
| 230 | #endif // EXPENSIVE_CHECKS |
| 231 | L->verifyLoop(); |
| 232 | |
| 233 | // The guard blocks were created outside the loop, so they need to become |
| 234 | // members of the parent loop. |
| 235 | // Same goes for the callbr target blocks. Although we try to add them to the |
| 236 | // smallest common parent loop of the callbr block and the corresponding |
| 237 | // original target block, there might not have been such a loop, in which case |
| 238 | // the newly created callbr target blocks are not part of any loop. For nested |
| 239 | // loops, this might result in them leading to a loop with multiple entry |
| 240 | // points. |
| 241 | if (auto *ParentLoop = L->getParentLoop()) { |
| 242 | for (auto *G : GuardBlocks) { |
| 243 | ParentLoop->addBasicBlockToLoop(NewBB: G, LI); |
| 244 | } |
| 245 | for (auto *C : CallBrTargetBlocksToFix) { |
| 246 | ParentLoop->addBasicBlockToLoop(NewBB: C, LI); |
| 247 | } |
| 248 | ParentLoop->verifyLoop(); |
| 249 | } |
| 250 | |
| 251 | #if defined(EXPENSIVE_CHECKS) |
| 252 | LI.verify(DT); |
| 253 | #endif // EXPENSIVE_CHECKS |
| 254 | |
| 255 | return true; |
| 256 | } |
| 257 | |
| 258 | static bool runImpl(LoopInfo &LI, DominatorTree &DT) { |
| 259 | |
| 260 | bool Changed = false; |
| 261 | auto Loops = LI.getLoopsInPreorder(); |
| 262 | for (auto *L : Loops) { |
| 263 | LLVM_DEBUG(dbgs() << "Processing loop:\n" ; L->print(dbgs())); |
| 264 | Changed |= unifyLoopExits(DT, LI, L); |
| 265 | } |
| 266 | return Changed; |
| 267 | } |
| 268 | |
| 269 | bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { |
| 270 | LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() |
| 271 | << "\n" ); |
| 272 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 273 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 274 | |
| 275 | return runImpl(LI, DT); |
| 276 | } |
| 277 | |
| 278 | namespace llvm { |
| 279 | |
| 280 | PreservedAnalyses UnifyLoopExitsPass::run(Function &F, |
| 281 | FunctionAnalysisManager &AM) { |
| 282 | LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() |
| 283 | << "\n" ); |
| 284 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 285 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 286 | |
| 287 | if (!runImpl(LI, DT)) |
| 288 | return PreservedAnalyses::all(); |
| 289 | PreservedAnalyses PA; |
| 290 | PA.preserve<LoopAnalysis>(); |
| 291 | PA.preserve<DominatorTreeAnalysis>(); |
| 292 | return PA; |
| 293 | } |
| 294 | } // namespace llvm |
| 295 | |