| 1 | //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Transform each threading path to effectively jump thread the DFA. For |
| 10 | // example, the CFG below could be transformed as follows, where the cloned |
| 11 | // blocks unconditionally branch to the next correct case based on what is |
| 12 | // identified in the analysis. |
| 13 | // |
| 14 | // sw.bb sw.bb |
| 15 | // / | \ / | \ |
| 16 | // case1 case2 case3 case1 case2 case3 |
| 17 | // \ | / | | | |
| 18 | // determinator det.2 det.3 det.1 |
| 19 | // br sw.bb / | \ |
| 20 | // sw.bb.2 sw.bb.3 sw.bb.1 |
| 21 | // br case2 br case3 br case1ยง |
| 22 | // |
| 23 | // Definitions and Terminology: |
| 24 | // |
| 25 | // * Threading path: |
| 26 | // a list of basic blocks, the exit state, and the block that determines |
| 27 | // the next state, for which the following notation will be used: |
| 28 | // < path of BBs that form a cycle > [ state, determinator ] |
| 29 | // |
| 30 | // * Predictable switch: |
| 31 | // The switch variable is always a known constant so that all conditional |
| 32 | // jumps based on switch variable can be converted to unconditional jump. |
| 33 | // |
| 34 | // * Determinator: |
| 35 | // The basic block that determines the next state of the DFA. |
| 36 | // |
| 37 | // Representing the optimization in C-like pseudocode: the code pattern on the |
| 38 | // left could functionally be transformed to the right pattern if the switch |
| 39 | // condition is predictable. |
| 40 | // |
| 41 | // X = A goto A |
| 42 | // for (...) A: |
| 43 | // switch (X) ... |
| 44 | // case A goto B |
| 45 | // X = B B: |
| 46 | // case B ... |
| 47 | // X = C goto C |
| 48 | // |
| 49 | // The pass first checks that switch variable X is decided by the control flow |
| 50 | // path taken in the loop; for example, in case B, the next value of X is |
| 51 | // decided to be C. It then enumerates through all paths in the loop and labels |
| 52 | // the basic blocks where the next state is decided. |
| 53 | // |
| 54 | // Using this information it creates new paths that unconditionally branch to |
| 55 | // the next case. This involves cloning code, so it only gets triggered if the |
| 56 | // amount of code duplicated is below a threshold. |
| 57 | // |
| 58 | //===----------------------------------------------------------------------===// |
| 59 | |
| 60 | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" |
| 61 | #include "llvm/ADT/APInt.h" |
| 62 | #include "llvm/ADT/DenseMap.h" |
| 63 | #include "llvm/ADT/SmallSet.h" |
| 64 | #include "llvm/ADT/Statistic.h" |
| 65 | #include "llvm/Analysis/AssumptionCache.h" |
| 66 | #include "llvm/Analysis/CodeMetrics.h" |
| 67 | #include "llvm/Analysis/DomTreeUpdater.h" |
| 68 | #include "llvm/Analysis/LoopInfo.h" |
| 69 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 70 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 71 | #include "llvm/IR/CFG.h" |
| 72 | #include "llvm/IR/Constants.h" |
| 73 | #include "llvm/IR/IntrinsicInst.h" |
| 74 | #include "llvm/Support/CommandLine.h" |
| 75 | #include "llvm/Support/Debug.h" |
| 76 | #include "llvm/Transforms/Utils/Cloning.h" |
| 77 | #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" |
| 78 | #include "llvm/Transforms/Utils/ValueMapper.h" |
| 79 | #include <deque> |
| 80 | |
| 81 | #ifdef EXPENSIVE_CHECKS |
| 82 | #include "llvm/IR/Verifier.h" |
| 83 | #endif |
| 84 | |
| 85 | using namespace llvm; |
| 86 | |
| 87 | #define DEBUG_TYPE "dfa-jump-threading" |
| 88 | |
| 89 | STATISTIC(NumTransforms, "Number of transformations done" ); |
| 90 | STATISTIC(NumCloned, "Number of blocks cloned" ); |
| 91 | STATISTIC(NumPaths, "Number of individual paths threaded" ); |
| 92 | |
| 93 | static cl::opt<bool> |
| 94 | ClViewCfgBefore("dfa-jump-view-cfg-before" , |
| 95 | cl::desc("View the CFG before DFA Jump Threading" ), |
| 96 | cl::Hidden, cl::init(Val: false)); |
| 97 | |
| 98 | static cl::opt<bool> EarlyExitHeuristic( |
| 99 | "dfa-early-exit-heuristic" , |
| 100 | cl::desc("Exit early if an unpredictable value come from the same loop" ), |
| 101 | cl::Hidden, cl::init(Val: true)); |
| 102 | |
| 103 | static cl::opt<unsigned> MaxPathLength( |
| 104 | "dfa-max-path-length" , |
| 105 | cl::desc("Max number of blocks searched to find a threading path" ), |
| 106 | cl::Hidden, cl::init(Val: 20)); |
| 107 | |
| 108 | static cl::opt<unsigned> MaxNumVisitiedPaths( |
| 109 | "dfa-max-num-visited-paths" , |
| 110 | cl::desc( |
| 111 | "Max number of blocks visited while enumerating paths around a switch" ), |
| 112 | cl::Hidden, cl::init(Val: 2500)); |
| 113 | |
| 114 | static cl::opt<unsigned> |
| 115 | MaxNumPaths("dfa-max-num-paths" , |
| 116 | cl::desc("Max number of paths enumerated around a switch" ), |
| 117 | cl::Hidden, cl::init(Val: 200)); |
| 118 | |
| 119 | static cl::opt<unsigned> |
| 120 | CostThreshold("dfa-cost-threshold" , |
| 121 | cl::desc("Maximum cost accepted for the transformation" ), |
| 122 | cl::Hidden, cl::init(Val: 50)); |
| 123 | |
| 124 | namespace { |
| 125 | |
| 126 | class SelectInstToUnfold { |
| 127 | SelectInst *SI; |
| 128 | PHINode *SIUse; |
| 129 | |
| 130 | public: |
| 131 | SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} |
| 132 | |
| 133 | SelectInst *getInst() { return SI; } |
| 134 | PHINode *getUse() { return SIUse; } |
| 135 | |
| 136 | explicit operator bool() const { return SI && SIUse; } |
| 137 | }; |
| 138 | |
| 139 | void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, |
| 140 | std::vector<SelectInstToUnfold> *NewSIsToUnfold, |
| 141 | std::vector<BasicBlock *> *NewBBs); |
| 142 | |
| 143 | class DFAJumpThreading { |
| 144 | public: |
| 145 | (AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, |
| 146 | TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) |
| 147 | : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} |
| 148 | |
| 149 | bool run(Function &F); |
| 150 | bool LoopInfoBroken; |
| 151 | |
| 152 | private: |
| 153 | void |
| 154 | unfoldSelectInstrs(DominatorTree *DT, |
| 155 | const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { |
| 156 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); |
| 157 | SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); |
| 158 | |
| 159 | while (!Stack.empty()) { |
| 160 | SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); |
| 161 | |
| 162 | std::vector<SelectInstToUnfold> NewSIsToUnfold; |
| 163 | std::vector<BasicBlock *> NewBBs; |
| 164 | unfold(DTU: &DTU, LI, SIToUnfold, NewSIsToUnfold: &NewSIsToUnfold, NewBBs: &NewBBs); |
| 165 | |
| 166 | // Put newly discovered select instructions into the work list. |
| 167 | llvm::append_range(C&: Stack, R&: NewSIsToUnfold); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | AssumptionCache *AC; |
| 172 | DominatorTree *DT; |
| 173 | LoopInfo *LI; |
| 174 | TargetTransformInfo *TTI; |
| 175 | OptimizationRemarkEmitter *ORE; |
| 176 | }; |
| 177 | |
| 178 | } // end anonymous namespace |
| 179 | |
| 180 | namespace { |
| 181 | |
| 182 | /// Unfold the select instruction held in \p SIToUnfold by replacing it with |
| 183 | /// control flow. |
| 184 | /// |
| 185 | /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly |
| 186 | /// created basic blocks into \p NewBBs. |
| 187 | /// |
| 188 | /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. |
| 189 | void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, |
| 190 | std::vector<SelectInstToUnfold> *NewSIsToUnfold, |
| 191 | std::vector<BasicBlock *> *NewBBs) { |
| 192 | SelectInst *SI = SIToUnfold.getInst(); |
| 193 | PHINode *SIUse = SIToUnfold.getUse(); |
| 194 | BasicBlock *StartBlock = SI->getParent(); |
| 195 | BranchInst *StartBlockTerm = |
| 196 | dyn_cast<BranchInst>(Val: StartBlock->getTerminator()); |
| 197 | |
| 198 | assert(StartBlockTerm); |
| 199 | assert(SI->hasOneUse()); |
| 200 | |
| 201 | if (StartBlockTerm->isUnconditional()) { |
| 202 | BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); |
| 203 | // Arbitrarily choose the 'false' side for a new input value to the PHI. |
| 204 | BasicBlock *NewBlock = BasicBlock::Create( |
| 205 | Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.false" ), |
| 206 | Parent: EndBlock->getParent(), InsertBefore: EndBlock); |
| 207 | NewBBs->push_back(x: NewBlock); |
| 208 | BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlock); |
| 209 | DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlock, EndBlock}}); |
| 210 | |
| 211 | // StartBlock |
| 212 | // | \ |
| 213 | // | NewBlock |
| 214 | // | / |
| 215 | // EndBlock |
| 216 | Value *SIOp1 = SI->getTrueValue(); |
| 217 | Value *SIOp2 = SI->getFalseValue(); |
| 218 | |
| 219 | PHINode *NewPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: 1, |
| 220 | NameStr: Twine(SIOp2->getName(), ".si.unfold.phi" ), |
| 221 | InsertBefore: NewBlock->getFirstInsertionPt()); |
| 222 | NewPhi->addIncoming(V: SIOp2, BB: StartBlock); |
| 223 | |
| 224 | // Update any other PHI nodes in EndBlock. |
| 225 | for (PHINode &Phi : EndBlock->phis()) { |
| 226 | if (SIUse == &Phi) |
| 227 | continue; |
| 228 | Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlock); |
| 229 | } |
| 230 | |
| 231 | // Update the phi node of SI, which is its only use. |
| 232 | if (EndBlock == SIUse->getParent()) { |
| 233 | SIUse->addIncoming(V: NewPhi, BB: NewBlock); |
| 234 | SIUse->replaceUsesOfWith(From: SI, To: SIOp1); |
| 235 | } else { |
| 236 | PHINode *EndPhi = PHINode::Create(Ty: SIUse->getType(), NumReservedValues: pred_size(BB: EndBlock), |
| 237 | NameStr: Twine(SI->getName(), ".si.unfold.phi" ), |
| 238 | InsertBefore: EndBlock->getFirstInsertionPt()); |
| 239 | for (BasicBlock *Pred : predecessors(BB: EndBlock)) { |
| 240 | if (Pred != StartBlock && Pred != NewBlock) |
| 241 | EndPhi->addIncoming(V: EndPhi, BB: Pred); |
| 242 | } |
| 243 | |
| 244 | EndPhi->addIncoming(V: SIOp1, BB: StartBlock); |
| 245 | EndPhi->addIncoming(V: NewPhi, BB: NewBlock); |
| 246 | SIUse->replaceUsesOfWith(From: SI, To: EndPhi); |
| 247 | SIUse = EndPhi; |
| 248 | } |
| 249 | |
| 250 | if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp1)) |
| 251 | NewSIsToUnfold->push_back(x: SelectInstToUnfold(OpSi, SIUse)); |
| 252 | if (auto *OpSi = dyn_cast<SelectInst>(Val: SIOp2)) |
| 253 | NewSIsToUnfold->push_back(x: SelectInstToUnfold(OpSi, NewPhi)); |
| 254 | |
| 255 | // Insert the real conditional branch based on the original condition. |
| 256 | StartBlockTerm->eraseFromParent(); |
| 257 | BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlock, Cond: SI->getCondition(), InsertBefore: StartBlock); |
| 258 | DTU->applyUpdates(Updates: {{DominatorTree::Insert, StartBlock, EndBlock}, |
| 259 | {DominatorTree::Insert, StartBlock, NewBlock}}); |
| 260 | } else { |
| 261 | BasicBlock *EndBlock = SIUse->getParent(); |
| 262 | BasicBlock *NewBlockT = BasicBlock::Create( |
| 263 | Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.true" ), |
| 264 | Parent: EndBlock->getParent(), InsertBefore: EndBlock); |
| 265 | BasicBlock *NewBlockF = BasicBlock::Create( |
| 266 | Context&: SI->getContext(), Name: Twine(SI->getName(), ".si.unfold.false" ), |
| 267 | Parent: EndBlock->getParent(), InsertBefore: EndBlock); |
| 268 | |
| 269 | NewBBs->push_back(x: NewBlockT); |
| 270 | NewBBs->push_back(x: NewBlockF); |
| 271 | |
| 272 | // Def only has one use in EndBlock. |
| 273 | // Before transformation: |
| 274 | // StartBlock(Def) |
| 275 | // | \ |
| 276 | // EndBlock OtherBlock |
| 277 | // (Use) |
| 278 | // |
| 279 | // After transformation: |
| 280 | // StartBlock(Def) |
| 281 | // | \ |
| 282 | // | OtherBlock |
| 283 | // NewBlockT |
| 284 | // | \ |
| 285 | // | NewBlockF |
| 286 | // | / |
| 287 | // | / |
| 288 | // EndBlock |
| 289 | // (Use) |
| 290 | BranchInst::Create(IfTrue: EndBlock, InsertBefore: NewBlockF); |
| 291 | // Insert the real conditional branch based on the original condition. |
| 292 | BranchInst::Create(IfTrue: EndBlock, IfFalse: NewBlockF, Cond: SI->getCondition(), InsertBefore: NewBlockT); |
| 293 | DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBlockT, NewBlockF}, |
| 294 | {DominatorTree::Insert, NewBlockT, EndBlock}, |
| 295 | {DominatorTree::Insert, NewBlockF, EndBlock}}); |
| 296 | |
| 297 | Value *TrueVal = SI->getTrueValue(); |
| 298 | Value *FalseVal = SI->getFalseValue(); |
| 299 | |
| 300 | PHINode *NewPhiT = PHINode::Create( |
| 301 | Ty: SIUse->getType(), NumReservedValues: 1, NameStr: Twine(TrueVal->getName(), ".si.unfold.phi" ), |
| 302 | InsertBefore: NewBlockT->getFirstInsertionPt()); |
| 303 | PHINode *NewPhiF = PHINode::Create( |
| 304 | Ty: SIUse->getType(), NumReservedValues: 1, NameStr: Twine(FalseVal->getName(), ".si.unfold.phi" ), |
| 305 | InsertBefore: NewBlockF->getFirstInsertionPt()); |
| 306 | NewPhiT->addIncoming(V: TrueVal, BB: StartBlock); |
| 307 | NewPhiF->addIncoming(V: FalseVal, BB: NewBlockT); |
| 308 | |
| 309 | if (auto *TrueSI = dyn_cast<SelectInst>(Val: TrueVal)) |
| 310 | NewSIsToUnfold->push_back(x: SelectInstToUnfold(TrueSI, NewPhiT)); |
| 311 | if (auto *FalseSi = dyn_cast<SelectInst>(Val: FalseVal)) |
| 312 | NewSIsToUnfold->push_back(x: SelectInstToUnfold(FalseSi, NewPhiF)); |
| 313 | |
| 314 | SIUse->addIncoming(V: NewPhiT, BB: NewBlockT); |
| 315 | SIUse->addIncoming(V: NewPhiF, BB: NewBlockF); |
| 316 | SIUse->removeIncomingValue(BB: StartBlock); |
| 317 | |
| 318 | // Update any other PHI nodes in EndBlock. |
| 319 | for (PHINode &Phi : EndBlock->phis()) { |
| 320 | if (SIUse == &Phi) |
| 321 | continue; |
| 322 | Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockT); |
| 323 | Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: StartBlock), BB: NewBlockF); |
| 324 | Phi.removeIncomingValue(BB: StartBlock); |
| 325 | } |
| 326 | |
| 327 | // Update the appropriate successor of the start block to point to the new |
| 328 | // unfolded block. |
| 329 | unsigned SuccNum = StartBlockTerm->getSuccessor(i: 1) == EndBlock ? 1 : 0; |
| 330 | StartBlockTerm->setSuccessor(idx: SuccNum, NewSucc: NewBlockT); |
| 331 | DTU->applyUpdates(Updates: {{DominatorTree::Delete, StartBlock, EndBlock}, |
| 332 | {DominatorTree::Insert, StartBlock, NewBlockT}}); |
| 333 | } |
| 334 | |
| 335 | // Preserve loop info |
| 336 | if (Loop *L = LI->getLoopFor(BB: SI->getParent())) { |
| 337 | for (BasicBlock *NewBB : *NewBBs) |
| 338 | L->addBasicBlockToLoop(NewBB, LI&: *LI); |
| 339 | } |
| 340 | |
| 341 | // The select is now dead. |
| 342 | assert(SI->use_empty() && "Select must be dead now" ); |
| 343 | SI->eraseFromParent(); |
| 344 | } |
| 345 | |
| 346 | struct ClonedBlock { |
| 347 | BasicBlock *BB; |
| 348 | APInt State; ///< \p State corresponds to the next value of a switch stmnt. |
| 349 | }; |
| 350 | |
| 351 | typedef std::deque<BasicBlock *> PathType; |
| 352 | typedef std::vector<PathType> PathsType; |
| 353 | typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; |
| 354 | typedef std::vector<ClonedBlock> CloneList; |
| 355 | |
| 356 | // This data structure keeps track of all blocks that have been cloned. If two |
| 357 | // different ThreadingPaths clone the same block for a certain state it should |
| 358 | // be reused, and it can be looked up in this map. |
| 359 | typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; |
| 360 | |
| 361 | // This map keeps track of all the new definitions for an instruction. This |
| 362 | // information is needed when restoring SSA form after cloning blocks. |
| 363 | typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; |
| 364 | |
| 365 | inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { |
| 366 | OS << "< " ; |
| 367 | for (const BasicBlock *BB : Path) { |
| 368 | std::string BBName; |
| 369 | if (BB->hasName()) |
| 370 | raw_string_ostream(BBName) << BB->getName(); |
| 371 | else |
| 372 | raw_string_ostream(BBName) << BB; |
| 373 | OS << BBName << " " ; |
| 374 | } |
| 375 | OS << ">" ; |
| 376 | return OS; |
| 377 | } |
| 378 | |
| 379 | /// ThreadingPath is a path in the control flow of a loop that can be threaded |
| 380 | /// by cloning necessary basic blocks and replacing conditional branches with |
| 381 | /// unconditional ones. A threading path includes a list of basic blocks, the |
| 382 | /// exit state, and the block that determines the next state. |
| 383 | struct ThreadingPath { |
| 384 | /// Exit value is DFA's exit state for the given path. |
| 385 | APInt getExitValue() const { return ExitVal; } |
| 386 | void setExitValue(const ConstantInt *V) { |
| 387 | ExitVal = V->getValue(); |
| 388 | IsExitValSet = true; |
| 389 | } |
| 390 | bool isExitValueSet() const { return IsExitValSet; } |
| 391 | |
| 392 | /// Determinator is the basic block that determines the next state of the DFA. |
| 393 | const BasicBlock *getDeterminatorBB() const { return DBB; } |
| 394 | void setDeterminator(const BasicBlock *BB) { DBB = BB; } |
| 395 | |
| 396 | /// Path is a list of basic blocks. |
| 397 | const PathType &getPath() const { return Path; } |
| 398 | void setPath(const PathType &NewPath) { Path = NewPath; } |
| 399 | void push_back(BasicBlock *BB) { Path.push_back(x: BB); } |
| 400 | void push_front(BasicBlock *BB) { Path.push_front(x: BB); } |
| 401 | void appendExcludingFirst(const PathType &OtherPath) { |
| 402 | llvm::append_range(C&: Path, R: llvm::drop_begin(RangeOrContainer: OtherPath)); |
| 403 | } |
| 404 | |
| 405 | void print(raw_ostream &OS) const { |
| 406 | OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]" ; |
| 407 | } |
| 408 | |
| 409 | private: |
| 410 | PathType Path; |
| 411 | APInt ExitVal; |
| 412 | const BasicBlock *DBB = nullptr; |
| 413 | bool IsExitValSet = false; |
| 414 | }; |
| 415 | |
| 416 | #ifndef NDEBUG |
| 417 | inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { |
| 418 | TPath.print(OS); |
| 419 | return OS; |
| 420 | } |
| 421 | #endif |
| 422 | |
| 423 | struct MainSwitch { |
| 424 | MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE) |
| 425 | : LI(LI) { |
| 426 | if (isCandidate(SI)) { |
| 427 | Instr = SI; |
| 428 | } else { |
| 429 | ORE->emit(RemarkBuilder: [&]() { |
| 430 | return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable" , SI) |
| 431 | << "Switch instruction is not predictable." ; |
| 432 | }); |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | virtual ~MainSwitch() = default; |
| 437 | |
| 438 | SwitchInst *getInstr() const { return Instr; } |
| 439 | const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { |
| 440 | return SelectInsts; |
| 441 | } |
| 442 | |
| 443 | private: |
| 444 | /// Do a use-def chain traversal starting from the switch condition to see if |
| 445 | /// \p SI is a potential condidate. |
| 446 | /// |
| 447 | /// Also, collect select instructions to unfold. |
| 448 | bool isCandidate(const SwitchInst *SI) { |
| 449 | std::deque<std::pair<Value *, BasicBlock *>> Q; |
| 450 | SmallSet<Value *, 16> SeenValues; |
| 451 | SelectInsts.clear(); |
| 452 | |
| 453 | Value *SICond = SI->getCondition(); |
| 454 | LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n" ); |
| 455 | if (!isa<PHINode>(Val: SICond)) |
| 456 | return false; |
| 457 | |
| 458 | // The switch must be in a loop. |
| 459 | const Loop *L = LI->getLoopFor(BB: SI->getParent()); |
| 460 | if (!L) |
| 461 | return false; |
| 462 | |
| 463 | addToQueue(Val: SICond, BB: nullptr, Q, SeenValues); |
| 464 | |
| 465 | while (!Q.empty()) { |
| 466 | Value *Current = Q.front().first; |
| 467 | BasicBlock *CurrentIncomingBB = Q.front().second; |
| 468 | Q.pop_front(); |
| 469 | |
| 470 | if (auto *Phi = dyn_cast<PHINode>(Val: Current)) { |
| 471 | for (BasicBlock *IncomingBB : Phi->blocks()) { |
| 472 | Value *Incoming = Phi->getIncomingValueForBlock(BB: IncomingBB); |
| 473 | addToQueue(Val: Incoming, BB: IncomingBB, Q, SeenValues); |
| 474 | } |
| 475 | LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n" ); |
| 476 | } else if (SelectInst *SelI = dyn_cast<SelectInst>(Val: Current)) { |
| 477 | if (!isValidSelectInst(SI: SelI)) |
| 478 | return false; |
| 479 | addToQueue(Val: SelI->getTrueValue(), BB: CurrentIncomingBB, Q, SeenValues); |
| 480 | addToQueue(Val: SelI->getFalseValue(), BB: CurrentIncomingBB, Q, SeenValues); |
| 481 | LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n" ); |
| 482 | if (auto *SelIUse = dyn_cast<PHINode>(Val: SelI->user_back())) |
| 483 | SelectInsts.push_back(Elt: SelectInstToUnfold(SelI, SelIUse)); |
| 484 | } else if (isa<Constant>(Val: Current)) { |
| 485 | LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n" ); |
| 486 | continue; |
| 487 | } else { |
| 488 | LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n" ); |
| 489 | // Allow unpredictable values. The hope is that those will be the |
| 490 | // initial switch values that can be ignored (they will hit the |
| 491 | // unthreaded switch) but this assumption will get checked later after |
| 492 | // paths have been enumerated (in function getStateDefMap). |
| 493 | |
| 494 | // If the unpredictable value comes from the same inner loop it is |
| 495 | // likely that it will also be on the enumerated paths, causing us to |
| 496 | // exit after we have enumerated all the paths. This heuristic save |
| 497 | // compile time because a search for all the paths can become expensive. |
| 498 | if (EarlyExitHeuristic && |
| 499 | L->contains(L: LI->getLoopFor(BB: CurrentIncomingBB))) { |
| 500 | LLVM_DEBUG(dbgs() |
| 501 | << "\tExiting early due to unpredictability heuristic.\n" ); |
| 502 | return false; |
| 503 | } |
| 504 | |
| 505 | continue; |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | return true; |
| 510 | } |
| 511 | |
| 512 | void addToQueue(Value *Val, BasicBlock *BB, |
| 513 | std::deque<std::pair<Value *, BasicBlock *>> &Q, |
| 514 | SmallSet<Value *, 16> &SeenValues) { |
| 515 | if (SeenValues.insert(Ptr: Val).second) |
| 516 | Q.push_back(x: {Val, BB}); |
| 517 | } |
| 518 | |
| 519 | bool isValidSelectInst(SelectInst *SI) { |
| 520 | if (!SI->hasOneUse()) |
| 521 | return false; |
| 522 | |
| 523 | Instruction *SIUse = dyn_cast<Instruction>(Val: SI->user_back()); |
| 524 | // The use of the select inst should be either a phi or another select. |
| 525 | if (!SIUse && !(isa<PHINode>(Val: SIUse) || isa<SelectInst>(Val: SIUse))) |
| 526 | return false; |
| 527 | |
| 528 | BasicBlock *SIBB = SI->getParent(); |
| 529 | |
| 530 | // Currently, we can only expand select instructions in basic blocks with |
| 531 | // one successor. |
| 532 | BranchInst *SITerm = dyn_cast<BranchInst>(Val: SIBB->getTerminator()); |
| 533 | if (!SITerm || !SITerm->isUnconditional()) |
| 534 | return false; |
| 535 | |
| 536 | // Only fold the select coming from directly where it is defined. |
| 537 | PHINode *PHIUser = dyn_cast<PHINode>(Val: SIUse); |
| 538 | if (PHIUser && PHIUser->getIncomingBlock(U: *SI->use_begin()) != SIBB) |
| 539 | return false; |
| 540 | |
| 541 | // If select will not be sunk during unfolding, and it is in the same basic |
| 542 | // block as another state defining select, then cannot unfold both. |
| 543 | for (SelectInstToUnfold SIToUnfold : SelectInsts) { |
| 544 | SelectInst *PrevSI = SIToUnfold.getInst(); |
| 545 | if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && |
| 546 | PrevSI->getParent() == SI->getParent()) |
| 547 | return false; |
| 548 | } |
| 549 | |
| 550 | return true; |
| 551 | } |
| 552 | |
| 553 | LoopInfo *LI; |
| 554 | SwitchInst *Instr = nullptr; |
| 555 | SmallVector<SelectInstToUnfold, 4> SelectInsts; |
| 556 | }; |
| 557 | |
| 558 | struct AllSwitchPaths { |
| 559 | AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, |
| 560 | LoopInfo *LI, Loop *L) |
| 561 | : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), |
| 562 | LI(LI), SwitchOuterLoop(L) {} |
| 563 | |
| 564 | std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } |
| 565 | unsigned getNumThreadingPaths() { return TPaths.size(); } |
| 566 | SwitchInst *getSwitchInst() { return Switch; } |
| 567 | BasicBlock *getSwitchBlock() { return SwitchBlock; } |
| 568 | |
| 569 | void run() { |
| 570 | StateDefMap StateDef = getStateDefMap(); |
| 571 | if (StateDef.empty()) { |
| 572 | ORE->emit(RemarkBuilder: [&]() { |
| 573 | return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable" , |
| 574 | Switch) |
| 575 | << "Switch instruction is not predictable." ; |
| 576 | }); |
| 577 | return; |
| 578 | } |
| 579 | |
| 580 | auto *SwitchPhi = cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0)); |
| 581 | auto *SwitchPhiDefBB = SwitchPhi->getParent(); |
| 582 | VisitedBlocks VB; |
| 583 | // Get paths from the determinator BBs to SwitchPhiDefBB |
| 584 | std::vector<ThreadingPath> PathsToPhiDef = |
| 585 | getPathsFromStateDefMap(StateDef, Phi: SwitchPhi, VB); |
| 586 | if (SwitchPhiDefBB == SwitchBlock) { |
| 587 | TPaths = std::move(PathsToPhiDef); |
| 588 | return; |
| 589 | } |
| 590 | |
| 591 | // Find and append paths from SwitchPhiDefBB to SwitchBlock. |
| 592 | PathsType PathsToSwitchBB = |
| 593 | paths(BB: SwitchPhiDefBB, ToBB: SwitchBlock, Visited&: VB, /* PathDepth = */ 1); |
| 594 | if (PathsToSwitchBB.empty()) |
| 595 | return; |
| 596 | |
| 597 | std::vector<ThreadingPath> TempList; |
| 598 | for (const ThreadingPath &Path : PathsToPhiDef) { |
| 599 | for (const PathType &PathToSw : PathsToSwitchBB) { |
| 600 | ThreadingPath PathCopy(Path); |
| 601 | PathCopy.appendExcludingFirst(OtherPath: PathToSw); |
| 602 | TempList.push_back(x: PathCopy); |
| 603 | } |
| 604 | } |
| 605 | TPaths = std::move(TempList); |
| 606 | } |
| 607 | |
| 608 | private: |
| 609 | // Value: an instruction that defines a switch state; |
| 610 | // Key: the parent basic block of that instruction. |
| 611 | typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; |
| 612 | std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef, |
| 613 | PHINode *Phi, |
| 614 | VisitedBlocks &VB) { |
| 615 | std::vector<ThreadingPath> Res; |
| 616 | auto *PhiBB = Phi->getParent(); |
| 617 | VB.insert(Ptr: PhiBB); |
| 618 | |
| 619 | VisitedBlocks UniqueBlocks; |
| 620 | for (auto *IncomingBB : Phi->blocks()) { |
| 621 | if (!UniqueBlocks.insert(Ptr: IncomingBB).second) |
| 622 | continue; |
| 623 | if (!SwitchOuterLoop->contains(BB: IncomingBB)) |
| 624 | continue; |
| 625 | |
| 626 | Value *IncomingValue = Phi->getIncomingValueForBlock(BB: IncomingBB); |
| 627 | // We found the determinator. This is the start of our path. |
| 628 | if (auto *C = dyn_cast<ConstantInt>(Val: IncomingValue)) { |
| 629 | // SwitchBlock is the determinator, unsupported unless its also the def. |
| 630 | if (PhiBB == SwitchBlock && |
| 631 | SwitchBlock != cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0))->getParent()) |
| 632 | continue; |
| 633 | ThreadingPath NewPath; |
| 634 | NewPath.setDeterminator(PhiBB); |
| 635 | NewPath.setExitValue(C); |
| 636 | // Don't add SwitchBlock at the start, this is handled later. |
| 637 | if (IncomingBB != SwitchBlock) |
| 638 | NewPath.push_back(BB: IncomingBB); |
| 639 | NewPath.push_back(BB: PhiBB); |
| 640 | Res.push_back(x: NewPath); |
| 641 | continue; |
| 642 | } |
| 643 | // Don't get into a cycle. |
| 644 | if (VB.contains(Ptr: IncomingBB) || IncomingBB == SwitchBlock) |
| 645 | continue; |
| 646 | // Recurse up the PHI chain. |
| 647 | auto *IncomingPhi = dyn_cast<PHINode>(Val: IncomingValue); |
| 648 | if (!IncomingPhi) |
| 649 | continue; |
| 650 | auto *IncomingPhiDefBB = IncomingPhi->getParent(); |
| 651 | if (!StateDef.contains(Val: IncomingPhiDefBB)) |
| 652 | continue; |
| 653 | |
| 654 | // Direct predecessor, just add to the path. |
| 655 | if (IncomingPhiDefBB == IncomingBB) { |
| 656 | std::vector<ThreadingPath> PredPaths = |
| 657 | getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB); |
| 658 | for (ThreadingPath &Path : PredPaths) { |
| 659 | Path.push_back(BB: PhiBB); |
| 660 | Res.push_back(x: std::move(Path)); |
| 661 | } |
| 662 | continue; |
| 663 | } |
| 664 | // Not a direct predecessor, find intermediate paths to append to the |
| 665 | // existing path. |
| 666 | if (VB.contains(Ptr: IncomingPhiDefBB)) |
| 667 | continue; |
| 668 | |
| 669 | PathsType IntermediatePaths; |
| 670 | IntermediatePaths = |
| 671 | paths(BB: IncomingPhiDefBB, ToBB: IncomingBB, Visited&: VB, /* PathDepth = */ 1); |
| 672 | if (IntermediatePaths.empty()) |
| 673 | continue; |
| 674 | |
| 675 | std::vector<ThreadingPath> PredPaths = |
| 676 | getPathsFromStateDefMap(StateDef, Phi: IncomingPhi, VB); |
| 677 | for (const ThreadingPath &Path : PredPaths) { |
| 678 | for (const PathType &IPath : IntermediatePaths) { |
| 679 | ThreadingPath NewPath(Path); |
| 680 | NewPath.appendExcludingFirst(OtherPath: IPath); |
| 681 | NewPath.push_back(BB: PhiBB); |
| 682 | Res.push_back(x: NewPath); |
| 683 | } |
| 684 | } |
| 685 | } |
| 686 | VB.erase(Ptr: PhiBB); |
| 687 | return Res; |
| 688 | } |
| 689 | |
| 690 | PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited, |
| 691 | unsigned PathDepth) { |
| 692 | PathsType Res; |
| 693 | |
| 694 | // Stop exploring paths after visiting MaxPathLength blocks |
| 695 | if (PathDepth > MaxPathLength) { |
| 696 | ORE->emit(RemarkBuilder: [&]() { |
| 697 | return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached" , |
| 698 | Switch) |
| 699 | << "Exploration stopped after visiting MaxPathLength=" |
| 700 | << ore::NV("MaxPathLength" , MaxPathLength) << " blocks." ; |
| 701 | }); |
| 702 | return Res; |
| 703 | } |
| 704 | |
| 705 | Visited.insert(Ptr: BB); |
| 706 | if (++NumVisited > MaxNumVisitiedPaths) |
| 707 | return Res; |
| 708 | |
| 709 | // Stop if we have reached the BB out of loop, since its successors have no |
| 710 | // impact on the DFA. |
| 711 | if (!SwitchOuterLoop->contains(BB)) |
| 712 | return Res; |
| 713 | |
| 714 | // Some blocks have multiple edges to the same successor, and this set |
| 715 | // is used to prevent a duplicate path from being generated |
| 716 | SmallSet<BasicBlock *, 4> Successors; |
| 717 | for (BasicBlock *Succ : successors(BB)) { |
| 718 | if (!Successors.insert(Ptr: Succ).second) |
| 719 | continue; |
| 720 | |
| 721 | // Found a cycle through the final block. |
| 722 | if (Succ == ToBB) { |
| 723 | Res.push_back(x: {BB, ToBB}); |
| 724 | continue; |
| 725 | } |
| 726 | |
| 727 | // We have encountered a cycle, do not get caught in it |
| 728 | if (Visited.contains(Ptr: Succ)) |
| 729 | continue; |
| 730 | |
| 731 | auto *CurrLoop = LI->getLoopFor(BB); |
| 732 | // Unlikely to be beneficial. |
| 733 | if (Succ == CurrLoop->getHeader()) |
| 734 | continue; |
| 735 | // Skip for now, revisit this condition later to see the impact on |
| 736 | // coverage and compile time. |
| 737 | if (LI->getLoopFor(BB: Succ) != CurrLoop) |
| 738 | continue; |
| 739 | |
| 740 | PathsType SuccPaths = paths(BB: Succ, ToBB, Visited, PathDepth: PathDepth + 1); |
| 741 | for (PathType &Path : SuccPaths) { |
| 742 | Path.push_front(x: BB); |
| 743 | Res.push_back(x: Path); |
| 744 | if (Res.size() >= MaxNumPaths) { |
| 745 | return Res; |
| 746 | } |
| 747 | } |
| 748 | } |
| 749 | // This block could now be visited again from a different predecessor. Note |
| 750 | // that this will result in exponential runtime. Subpaths could possibly be |
| 751 | // cached but it takes a lot of memory to store them. |
| 752 | Visited.erase(Ptr: BB); |
| 753 | return Res; |
| 754 | } |
| 755 | |
| 756 | /// Walk the use-def chain and collect all the state-defining blocks and the |
| 757 | /// PHI nodes in those blocks that define the state. |
| 758 | StateDefMap getStateDefMap() const { |
| 759 | StateDefMap Res; |
| 760 | PHINode *FirstDef = dyn_cast<PHINode>(Val: Switch->getOperand(i_nocapture: 0)); |
| 761 | assert(FirstDef && "The first definition must be a phi." ); |
| 762 | |
| 763 | SmallVector<PHINode *, 8> Stack; |
| 764 | Stack.push_back(Elt: FirstDef); |
| 765 | SmallSet<Value *, 16> SeenValues; |
| 766 | |
| 767 | while (!Stack.empty()) { |
| 768 | PHINode *CurPhi = Stack.pop_back_val(); |
| 769 | |
| 770 | Res[CurPhi->getParent()] = CurPhi; |
| 771 | SeenValues.insert(Ptr: CurPhi); |
| 772 | |
| 773 | for (BasicBlock *IncomingBB : CurPhi->blocks()) { |
| 774 | PHINode *IncomingPhi = |
| 775 | dyn_cast<PHINode>(Val: CurPhi->getIncomingValueForBlock(BB: IncomingBB)); |
| 776 | if (!IncomingPhi) |
| 777 | continue; |
| 778 | bool IsOutsideLoops = !SwitchOuterLoop->contains(BB: IncomingBB); |
| 779 | if (SeenValues.contains(Ptr: IncomingPhi) || IsOutsideLoops) |
| 780 | continue; |
| 781 | |
| 782 | Stack.push_back(Elt: IncomingPhi); |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | return Res; |
| 787 | } |
| 788 | |
| 789 | unsigned NumVisited = 0; |
| 790 | SwitchInst *Switch; |
| 791 | BasicBlock *SwitchBlock; |
| 792 | OptimizationRemarkEmitter *ORE; |
| 793 | std::vector<ThreadingPath> TPaths; |
| 794 | LoopInfo *LI; |
| 795 | Loop *SwitchOuterLoop; |
| 796 | }; |
| 797 | |
| 798 | struct TransformDFA { |
| 799 | TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, |
| 800 | AssumptionCache *AC, TargetTransformInfo *TTI, |
| 801 | OptimizationRemarkEmitter *ORE, |
| 802 | SmallPtrSet<const Value *, 32> EphValues) |
| 803 | : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), |
| 804 | EphValues(EphValues) {} |
| 805 | |
| 806 | void run() { |
| 807 | if (isLegalAndProfitableToTransform()) { |
| 808 | createAllExitPaths(); |
| 809 | NumTransforms++; |
| 810 | } |
| 811 | } |
| 812 | |
| 813 | private: |
| 814 | /// This function performs both a legality check and profitability check at |
| 815 | /// the same time since it is convenient to do so. It iterates through all |
| 816 | /// blocks that will be cloned, and keeps track of the duplication cost. It |
| 817 | /// also returns false if it is illegal to clone some required block. |
| 818 | bool isLegalAndProfitableToTransform() { |
| 819 | CodeMetrics Metrics; |
| 820 | SwitchInst *Switch = SwitchPaths->getSwitchInst(); |
| 821 | |
| 822 | // Don't thread switch without multiple successors. |
| 823 | if (Switch->getNumSuccessors() <= 1) |
| 824 | return false; |
| 825 | |
| 826 | // Note that DuplicateBlockMap is not being used as intended here. It is |
| 827 | // just being used to ensure (BB, State) pairs are only counted once. |
| 828 | DuplicateBlockMap DuplicateMap; |
| 829 | |
| 830 | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { |
| 831 | PathType PathBBs = TPath.getPath(); |
| 832 | APInt NextState = TPath.getExitValue(); |
| 833 | const BasicBlock *Determinator = TPath.getDeterminatorBB(); |
| 834 | |
| 835 | // Update Metrics for the Switch block, this is always cloned |
| 836 | BasicBlock *BB = SwitchPaths->getSwitchBlock(); |
| 837 | BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); |
| 838 | if (!VisitedBB) { |
| 839 | Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues); |
| 840 | DuplicateMap[BB].push_back(x: {.BB: BB, .State: NextState}); |
| 841 | } |
| 842 | |
| 843 | // If the Switch block is the Determinator, then we can continue since |
| 844 | // this is the only block that is cloned and we already counted for it. |
| 845 | if (PathBBs.front() == Determinator) |
| 846 | continue; |
| 847 | |
| 848 | // Otherwise update Metrics for all blocks that will be cloned. If any |
| 849 | // block is already cloned and would be reused, don't double count it. |
| 850 | auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator); |
| 851 | for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { |
| 852 | BB = *BBIt; |
| 853 | VisitedBB = getClonedBB(BB, NextState, DuplicateMap); |
| 854 | if (VisitedBB) |
| 855 | continue; |
| 856 | Metrics.analyzeBasicBlock(BB, TTI: *TTI, EphValues); |
| 857 | DuplicateMap[BB].push_back(x: {.BB: BB, .State: NextState}); |
| 858 | } |
| 859 | |
| 860 | if (Metrics.notDuplicatable) { |
| 861 | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " |
| 862 | << "non-duplicatable instructions.\n" ); |
| 863 | ORE->emit(RemarkBuilder: [&]() { |
| 864 | return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst" , |
| 865 | Switch) |
| 866 | << "Contains non-duplicatable instructions." ; |
| 867 | }); |
| 868 | return false; |
| 869 | } |
| 870 | |
| 871 | // FIXME: Allow jump threading with controlled convergence. |
| 872 | if (Metrics.Convergence != ConvergenceKind::None) { |
| 873 | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " |
| 874 | << "convergent instructions.\n" ); |
| 875 | ORE->emit(RemarkBuilder: [&]() { |
| 876 | return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst" , Switch) |
| 877 | << "Contains convergent instructions." ; |
| 878 | }); |
| 879 | return false; |
| 880 | } |
| 881 | |
| 882 | if (!Metrics.NumInsts.isValid()) { |
| 883 | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " |
| 884 | << "instructions with invalid cost.\n" ); |
| 885 | ORE->emit(RemarkBuilder: [&]() { |
| 886 | return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst" , Switch) |
| 887 | << "Contains instructions with invalid cost." ; |
| 888 | }); |
| 889 | return false; |
| 890 | } |
| 891 | } |
| 892 | |
| 893 | InstructionCost DuplicationCost = 0; |
| 894 | |
| 895 | unsigned JumpTableSize = 0; |
| 896 | TTI->getEstimatedNumberOfCaseClusters(SI: *Switch, JTSize&: JumpTableSize, PSI: nullptr, |
| 897 | BFI: nullptr); |
| 898 | if (JumpTableSize == 0) { |
| 899 | // Factor in the number of conditional branches reduced from jump |
| 900 | // threading. Assume that lowering the switch block is implemented by |
| 901 | // using binary search, hence the LogBase2(). |
| 902 | unsigned CondBranches = |
| 903 | APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); |
| 904 | assert(CondBranches > 0 && |
| 905 | "The threaded switch must have multiple branches" ); |
| 906 | DuplicationCost = Metrics.NumInsts / CondBranches; |
| 907 | } else { |
| 908 | // Compared with jump tables, the DFA optimizer removes an indirect branch |
| 909 | // on each loop iteration, thus making branch prediction more precise. The |
| 910 | // more branch targets there are, the more likely it is for the branch |
| 911 | // predictor to make a mistake, and the more benefit there is in the DFA |
| 912 | // optimizer. Thus, the more branch targets there are, the lower is the |
| 913 | // cost of the DFA opt. |
| 914 | DuplicationCost = Metrics.NumInsts / JumpTableSize; |
| 915 | } |
| 916 | |
| 917 | LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " |
| 918 | << SwitchPaths->getSwitchBlock()->getName() |
| 919 | << " is: " << DuplicationCost << "\n\n" ); |
| 920 | |
| 921 | if (DuplicationCost > CostThreshold) { |
| 922 | LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " |
| 923 | << "cost threshold.\n" ); |
| 924 | ORE->emit(RemarkBuilder: [&]() { |
| 925 | return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable" , Switch) |
| 926 | << "Duplication cost exceeds the cost threshold (cost=" |
| 927 | << ore::NV("Cost" , DuplicationCost) |
| 928 | << ", threshold=" << ore::NV("Threshold" , CostThreshold) << ")." ; |
| 929 | }); |
| 930 | return false; |
| 931 | } |
| 932 | |
| 933 | ORE->emit(RemarkBuilder: [&]() { |
| 934 | return OptimizationRemark(DEBUG_TYPE, "JumpThreaded" , Switch) |
| 935 | << "Switch statement jump-threaded." ; |
| 936 | }); |
| 937 | |
| 938 | return true; |
| 939 | } |
| 940 | |
| 941 | /// Transform each threading path to effectively jump thread the DFA. |
| 942 | void createAllExitPaths() { |
| 943 | DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); |
| 944 | |
| 945 | // Move the switch block to the end of the path, since it will be duplicated |
| 946 | BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); |
| 947 | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { |
| 948 | LLVM_DEBUG(dbgs() << TPath << "\n" ); |
| 949 | // TODO: Fix exit path creation logic so that we dont need this |
| 950 | // placeholder. |
| 951 | TPath.push_front(BB: SwitchBlock); |
| 952 | } |
| 953 | |
| 954 | // Transform the ThreadingPaths and keep track of the cloned values |
| 955 | DuplicateBlockMap DuplicateMap; |
| 956 | DefMap NewDefs; |
| 957 | |
| 958 | SmallSet<BasicBlock *, 16> BlocksToClean; |
| 959 | BlocksToClean.insert_range(R: successors(BB: SwitchBlock)); |
| 960 | |
| 961 | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { |
| 962 | createExitPath(NewDefs, Path&: TPath, DuplicateMap, BlocksToClean, DTU: &DTU); |
| 963 | NumPaths++; |
| 964 | } |
| 965 | |
| 966 | // After all paths are cloned, now update the last successor of the cloned |
| 967 | // path so it skips over the switch statement |
| 968 | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) |
| 969 | updateLastSuccessor(TPath, DuplicateMap, DTU: &DTU); |
| 970 | |
| 971 | // For each instruction that was cloned and used outside, update its uses |
| 972 | updateSSA(NewDefs); |
| 973 | |
| 974 | // Clean PHI Nodes for the newly created blocks |
| 975 | for (BasicBlock *BB : BlocksToClean) |
| 976 | cleanPhiNodes(BB); |
| 977 | } |
| 978 | |
| 979 | /// For a specific ThreadingPath \p Path, create an exit path starting from |
| 980 | /// the determinator block. |
| 981 | /// |
| 982 | /// To remember the correct destination, we have to duplicate blocks |
| 983 | /// corresponding to each state. Also update the terminating instruction of |
| 984 | /// the predecessors, and phis in the successor blocks. |
| 985 | void createExitPath(DefMap &NewDefs, ThreadingPath &Path, |
| 986 | DuplicateBlockMap &DuplicateMap, |
| 987 | SmallSet<BasicBlock *, 16> &BlocksToClean, |
| 988 | DomTreeUpdater *DTU) { |
| 989 | APInt NextState = Path.getExitValue(); |
| 990 | const BasicBlock *Determinator = Path.getDeterminatorBB(); |
| 991 | PathType PathBBs = Path.getPath(); |
| 992 | |
| 993 | // Don't select the placeholder block in front |
| 994 | if (PathBBs.front() == Determinator) |
| 995 | PathBBs.pop_front(); |
| 996 | |
| 997 | auto DetIt = llvm::find(Range&: PathBBs, Val: Determinator); |
| 998 | // When there is only one BB in PathBBs, the determinator takes itself as a |
| 999 | // direct predecessor. |
| 1000 | BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(x: DetIt); |
| 1001 | for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { |
| 1002 | BasicBlock *BB = *BBIt; |
| 1003 | BlocksToClean.insert(Ptr: BB); |
| 1004 | |
| 1005 | // We already cloned BB for this NextState, now just update the branch |
| 1006 | // and continue. |
| 1007 | BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); |
| 1008 | if (NextBB) { |
| 1009 | updatePredecessor(PrevBB, OldBB: BB, NewBB: NextBB, DTU); |
| 1010 | PrevBB = NextBB; |
| 1011 | continue; |
| 1012 | } |
| 1013 | |
| 1014 | // Clone the BB and update the successor of Prev to jump to the new block |
| 1015 | BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( |
| 1016 | BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); |
| 1017 | DuplicateMap[BB].push_back(x: {.BB: NewBB, .State: NextState}); |
| 1018 | BlocksToClean.insert(Ptr: NewBB); |
| 1019 | PrevBB = NewBB; |
| 1020 | } |
| 1021 | } |
| 1022 | |
| 1023 | /// Restore SSA form after cloning blocks. |
| 1024 | /// |
| 1025 | /// Each cloned block creates new defs for a variable, and the uses need to be |
| 1026 | /// updated to reflect this. The uses may be replaced with a cloned value, or |
| 1027 | /// some derived phi instruction. Note that all uses of a value defined in the |
| 1028 | /// same block were already remapped when cloning the block. |
| 1029 | void updateSSA(DefMap &NewDefs) { |
| 1030 | SSAUpdaterBulk SSAUpdate; |
| 1031 | SmallVector<Use *, 16> UsesToRename; |
| 1032 | |
| 1033 | for (const auto &KV : NewDefs) { |
| 1034 | Instruction *I = KV.first; |
| 1035 | BasicBlock *BB = I->getParent(); |
| 1036 | std::vector<Instruction *> Cloned = KV.second; |
| 1037 | |
| 1038 | // Scan all uses of this instruction to see if it is used outside of its |
| 1039 | // block, and if so, record them in UsesToRename. |
| 1040 | for (Use &U : I->uses()) { |
| 1041 | Instruction *User = cast<Instruction>(Val: U.getUser()); |
| 1042 | if (PHINode *UserPN = dyn_cast<PHINode>(Val: User)) { |
| 1043 | if (UserPN->getIncomingBlock(U) == BB) |
| 1044 | continue; |
| 1045 | } else if (User->getParent() == BB) { |
| 1046 | continue; |
| 1047 | } |
| 1048 | |
| 1049 | UsesToRename.push_back(Elt: &U); |
| 1050 | } |
| 1051 | |
| 1052 | // If there are no uses outside the block, we're done with this |
| 1053 | // instruction. |
| 1054 | if (UsesToRename.empty()) |
| 1055 | continue; |
| 1056 | LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I |
| 1057 | << "\n" ); |
| 1058 | |
| 1059 | // We found a use of I outside of BB. Rename all uses of I that are |
| 1060 | // outside its block to be uses of the appropriate PHI node etc. See |
| 1061 | // ValuesInBlocks with the values we know. |
| 1062 | unsigned VarNum = SSAUpdate.AddVariable(Name: I->getName(), Ty: I->getType()); |
| 1063 | SSAUpdate.AddAvailableValue(Var: VarNum, BB, V: I); |
| 1064 | for (Instruction *New : Cloned) |
| 1065 | SSAUpdate.AddAvailableValue(Var: VarNum, BB: New->getParent(), V: New); |
| 1066 | |
| 1067 | while (!UsesToRename.empty()) |
| 1068 | SSAUpdate.AddUse(Var: VarNum, U: UsesToRename.pop_back_val()); |
| 1069 | |
| 1070 | LLVM_DEBUG(dbgs() << "\n" ); |
| 1071 | } |
| 1072 | // SSAUpdater handles phi placement and renaming uses with the appropriate |
| 1073 | // value. |
| 1074 | SSAUpdate.RewriteAllUses(DT); |
| 1075 | } |
| 1076 | |
| 1077 | /// Clones a basic block, and adds it to the CFG. |
| 1078 | /// |
| 1079 | /// This function also includes updating phi nodes in the successors of the |
| 1080 | /// BB, and remapping uses that were defined locally in the cloned BB. |
| 1081 | BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, |
| 1082 | const APInt &NextState, |
| 1083 | DuplicateBlockMap &DuplicateMap, |
| 1084 | DefMap &NewDefs, |
| 1085 | DomTreeUpdater *DTU) { |
| 1086 | ValueToValueMapTy VMap; |
| 1087 | BasicBlock *NewBB = CloneBasicBlock( |
| 1088 | BB, VMap, NameSuffix: ".jt" + std::to_string(val: NextState.getLimitedValue()), |
| 1089 | F: BB->getParent()); |
| 1090 | NewBB->moveAfter(MovePos: BB); |
| 1091 | NumCloned++; |
| 1092 | |
| 1093 | for (Instruction &I : *NewBB) { |
| 1094 | // Do not remap operands of PHINode in case a definition in BB is an |
| 1095 | // incoming value to a phi in the same block. This incoming value will |
| 1096 | // be renamed later while restoring SSA. |
| 1097 | if (isa<PHINode>(Val: &I)) |
| 1098 | continue; |
| 1099 | RemapInstruction(I: &I, VM&: VMap, |
| 1100 | Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); |
| 1101 | if (AssumeInst *II = dyn_cast<AssumeInst>(Val: &I)) |
| 1102 | AC->registerAssumption(CI: II); |
| 1103 | } |
| 1104 | |
| 1105 | updateSuccessorPhis(BB, ClonedBB: NewBB, NextState, VMap, DuplicateMap); |
| 1106 | updatePredecessor(PrevBB, OldBB: BB, NewBB, DTU); |
| 1107 | updateDefMap(NewDefs, VMap); |
| 1108 | |
| 1109 | // Add all successors to the DominatorTree |
| 1110 | SmallPtrSet<BasicBlock *, 4> SuccSet; |
| 1111 | for (auto *SuccBB : successors(BB: NewBB)) { |
| 1112 | if (SuccSet.insert(Ptr: SuccBB).second) |
| 1113 | DTU->applyUpdates(Updates: {{DominatorTree::Insert, NewBB, SuccBB}}); |
| 1114 | } |
| 1115 | SuccSet.clear(); |
| 1116 | return NewBB; |
| 1117 | } |
| 1118 | |
| 1119 | /// Update the phi nodes in BB's successors. |
| 1120 | /// |
| 1121 | /// This means creating a new incoming value from NewBB with the new |
| 1122 | /// instruction wherever there is an incoming value from BB. |
| 1123 | void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, |
| 1124 | const APInt &NextState, ValueToValueMapTy &VMap, |
| 1125 | DuplicateBlockMap &DuplicateMap) { |
| 1126 | std::vector<BasicBlock *> BlocksToUpdate; |
| 1127 | |
| 1128 | // If BB is the last block in the path, we can simply update the one case |
| 1129 | // successor that will be reached. |
| 1130 | if (BB == SwitchPaths->getSwitchBlock()) { |
| 1131 | SwitchInst *Switch = SwitchPaths->getSwitchInst(); |
| 1132 | BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); |
| 1133 | BlocksToUpdate.push_back(x: NextCase); |
| 1134 | BasicBlock *ClonedSucc = getClonedBB(BB: NextCase, NextState, DuplicateMap); |
| 1135 | if (ClonedSucc) |
| 1136 | BlocksToUpdate.push_back(x: ClonedSucc); |
| 1137 | } |
| 1138 | // Otherwise update phis in all successors. |
| 1139 | else { |
| 1140 | for (BasicBlock *Succ : successors(BB)) { |
| 1141 | BlocksToUpdate.push_back(x: Succ); |
| 1142 | |
| 1143 | // Check if a successor has already been cloned for the particular exit |
| 1144 | // value. In this case if a successor was already cloned, the phi nodes |
| 1145 | // in the cloned block should be updated directly. |
| 1146 | BasicBlock *ClonedSucc = getClonedBB(BB: Succ, NextState, DuplicateMap); |
| 1147 | if (ClonedSucc) |
| 1148 | BlocksToUpdate.push_back(x: ClonedSucc); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | // If there is a phi with an incoming value from BB, create a new incoming |
| 1153 | // value for the new predecessor ClonedBB. The value will either be the same |
| 1154 | // value from BB or a cloned value. |
| 1155 | for (BasicBlock *Succ : BlocksToUpdate) { |
| 1156 | for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); |
| 1157 | ++II) { |
| 1158 | Value *Incoming = Phi->getIncomingValueForBlock(BB); |
| 1159 | if (Incoming) { |
| 1160 | if (isa<Constant>(Val: Incoming)) { |
| 1161 | Phi->addIncoming(V: Incoming, BB: ClonedBB); |
| 1162 | continue; |
| 1163 | } |
| 1164 | Value *ClonedVal = VMap[Incoming]; |
| 1165 | if (ClonedVal) |
| 1166 | Phi->addIncoming(V: ClonedVal, BB: ClonedBB); |
| 1167 | else |
| 1168 | Phi->addIncoming(V: Incoming, BB: ClonedBB); |
| 1169 | } |
| 1170 | } |
| 1171 | } |
| 1172 | } |
| 1173 | |
| 1174 | /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all |
| 1175 | /// other successors are kept as well. |
| 1176 | void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, |
| 1177 | BasicBlock *NewBB, DomTreeUpdater *DTU) { |
| 1178 | // When a path is reused, there is a chance that predecessors were already |
| 1179 | // updated before. Check if the predecessor needs to be updated first. |
| 1180 | if (!isPredecessor(BB: OldBB, IncomingBB: PrevBB)) |
| 1181 | return; |
| 1182 | |
| 1183 | Instruction *PrevTerm = PrevBB->getTerminator(); |
| 1184 | for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { |
| 1185 | if (PrevTerm->getSuccessor(Idx) == OldBB) { |
| 1186 | OldBB->removePredecessor(Pred: PrevBB, /* KeepOneInputPHIs = */ true); |
| 1187 | PrevTerm->setSuccessor(Idx, BB: NewBB); |
| 1188 | } |
| 1189 | } |
| 1190 | DTU->applyUpdates(Updates: {{DominatorTree::Delete, PrevBB, OldBB}, |
| 1191 | {DominatorTree::Insert, PrevBB, NewBB}}); |
| 1192 | } |
| 1193 | |
| 1194 | /// Add new value mappings to the DefMap to keep track of all new definitions |
| 1195 | /// for a particular instruction. These will be used while updating SSA form. |
| 1196 | void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { |
| 1197 | SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; |
| 1198 | NewDefsVector.reserve(N: VMap.size()); |
| 1199 | |
| 1200 | for (auto Entry : VMap) { |
| 1201 | Instruction *Inst = |
| 1202 | dyn_cast<Instruction>(Val: const_cast<Value *>(Entry.first)); |
| 1203 | if (!Inst || !Entry.second || isa<BranchInst>(Val: Inst) || |
| 1204 | isa<SwitchInst>(Val: Inst)) { |
| 1205 | continue; |
| 1206 | } |
| 1207 | |
| 1208 | Instruction *Cloned = dyn_cast<Instruction>(Val&: Entry.second); |
| 1209 | if (!Cloned) |
| 1210 | continue; |
| 1211 | |
| 1212 | NewDefsVector.push_back(Elt: {Inst, Cloned}); |
| 1213 | } |
| 1214 | |
| 1215 | // Sort the defs to get deterministic insertion order into NewDefs. |
| 1216 | sort(C&: NewDefsVector, Comp: [](const auto &LHS, const auto &RHS) { |
| 1217 | if (LHS.first == RHS.first) |
| 1218 | return LHS.second->comesBefore(RHS.second); |
| 1219 | return LHS.first->comesBefore(RHS.first); |
| 1220 | }); |
| 1221 | |
| 1222 | for (const auto &KV : NewDefsVector) |
| 1223 | NewDefs[KV.first].push_back(x: KV.second); |
| 1224 | } |
| 1225 | |
| 1226 | /// Update the last branch of a particular cloned path to point to the correct |
| 1227 | /// case successor. |
| 1228 | /// |
| 1229 | /// Note that this is an optional step and would have been done in later |
| 1230 | /// optimizations, but it makes the CFG significantly easier to work with. |
| 1231 | void updateLastSuccessor(ThreadingPath &TPath, |
| 1232 | DuplicateBlockMap &DuplicateMap, |
| 1233 | DomTreeUpdater *DTU) { |
| 1234 | APInt NextState = TPath.getExitValue(); |
| 1235 | BasicBlock *BB = TPath.getPath().back(); |
| 1236 | BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); |
| 1237 | |
| 1238 | // Note multiple paths can end at the same block so check that it is not |
| 1239 | // updated yet |
| 1240 | if (!isa<SwitchInst>(Val: LastBlock->getTerminator())) |
| 1241 | return; |
| 1242 | SwitchInst *Switch = cast<SwitchInst>(Val: LastBlock->getTerminator()); |
| 1243 | BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); |
| 1244 | |
| 1245 | std::vector<DominatorTree::UpdateType> DTUpdates; |
| 1246 | SmallPtrSet<BasicBlock *, 4> SuccSet; |
| 1247 | for (BasicBlock *Succ : successors(BB: LastBlock)) { |
| 1248 | if (Succ != NextCase && SuccSet.insert(Ptr: Succ).second) |
| 1249 | DTUpdates.push_back(x: {DominatorTree::Delete, LastBlock, Succ}); |
| 1250 | } |
| 1251 | |
| 1252 | Switch->eraseFromParent(); |
| 1253 | BranchInst::Create(IfTrue: NextCase, InsertBefore: LastBlock); |
| 1254 | |
| 1255 | DTU->applyUpdates(Updates: DTUpdates); |
| 1256 | } |
| 1257 | |
| 1258 | /// After cloning blocks, some of the phi nodes have extra incoming values |
| 1259 | /// that are no longer used. This function removes them. |
| 1260 | void cleanPhiNodes(BasicBlock *BB) { |
| 1261 | // If BB is no longer reachable, remove any remaining phi nodes |
| 1262 | if (pred_empty(BB)) { |
| 1263 | std::vector<PHINode *> PhiToRemove; |
| 1264 | for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) { |
| 1265 | PhiToRemove.push_back(x: Phi); |
| 1266 | } |
| 1267 | for (PHINode *PN : PhiToRemove) { |
| 1268 | PN->replaceAllUsesWith(V: PoisonValue::get(T: PN->getType())); |
| 1269 | PN->eraseFromParent(); |
| 1270 | } |
| 1271 | return; |
| 1272 | } |
| 1273 | |
| 1274 | // Remove any incoming values that come from an invalid predecessor |
| 1275 | for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(Val&: II); ++II) { |
| 1276 | std::vector<BasicBlock *> BlocksToRemove; |
| 1277 | for (BasicBlock *IncomingBB : Phi->blocks()) { |
| 1278 | if (!isPredecessor(BB, IncomingBB)) |
| 1279 | BlocksToRemove.push_back(x: IncomingBB); |
| 1280 | } |
| 1281 | for (BasicBlock *BB : BlocksToRemove) |
| 1282 | Phi->removeIncomingValue(BB); |
| 1283 | } |
| 1284 | } |
| 1285 | |
| 1286 | /// Checks if BB was already cloned for a particular next state value. If it |
| 1287 | /// was then it returns this cloned block, and otherwise null. |
| 1288 | BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, |
| 1289 | DuplicateBlockMap &DuplicateMap) { |
| 1290 | CloneList ClonedBBs = DuplicateMap[BB]; |
| 1291 | |
| 1292 | // Find an entry in the CloneList with this NextState. If it exists then |
| 1293 | // return the corresponding BB |
| 1294 | auto It = llvm::find_if(Range&: ClonedBBs, P: [NextState](const ClonedBlock &C) { |
| 1295 | return C.State == NextState; |
| 1296 | }); |
| 1297 | return It != ClonedBBs.end() ? (*It).BB : nullptr; |
| 1298 | } |
| 1299 | |
| 1300 | /// Helper to get the successor corresponding to a particular case value for |
| 1301 | /// a switch statement. |
| 1302 | BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { |
| 1303 | BasicBlock *NextCase = nullptr; |
| 1304 | for (auto Case : Switch->cases()) { |
| 1305 | if (Case.getCaseValue()->getValue() == NextState) { |
| 1306 | NextCase = Case.getCaseSuccessor(); |
| 1307 | break; |
| 1308 | } |
| 1309 | } |
| 1310 | if (!NextCase) |
| 1311 | NextCase = Switch->getDefaultDest(); |
| 1312 | return NextCase; |
| 1313 | } |
| 1314 | |
| 1315 | /// Returns true if IncomingBB is a predecessor of BB. |
| 1316 | bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { |
| 1317 | return llvm::is_contained(Range: predecessors(BB), Element: IncomingBB); |
| 1318 | } |
| 1319 | |
| 1320 | AllSwitchPaths *SwitchPaths; |
| 1321 | DominatorTree *DT; |
| 1322 | AssumptionCache *AC; |
| 1323 | TargetTransformInfo *TTI; |
| 1324 | OptimizationRemarkEmitter *ORE; |
| 1325 | SmallPtrSet<const Value *, 32> EphValues; |
| 1326 | std::vector<ThreadingPath> TPaths; |
| 1327 | }; |
| 1328 | |
| 1329 | bool DFAJumpThreading::run(Function &F) { |
| 1330 | LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n" ); |
| 1331 | |
| 1332 | if (F.hasOptSize()) { |
| 1333 | LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n" ); |
| 1334 | return false; |
| 1335 | } |
| 1336 | |
| 1337 | if (ClViewCfgBefore) |
| 1338 | F.viewCFG(); |
| 1339 | |
| 1340 | SmallVector<AllSwitchPaths, 2> ThreadableLoops; |
| 1341 | bool MadeChanges = false; |
| 1342 | LoopInfoBroken = false; |
| 1343 | |
| 1344 | for (BasicBlock &BB : F) { |
| 1345 | auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator()); |
| 1346 | if (!SI) |
| 1347 | continue; |
| 1348 | |
| 1349 | LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() |
| 1350 | << " is a candidate\n" ); |
| 1351 | MainSwitch Switch(SI, LI, ORE); |
| 1352 | |
| 1353 | if (!Switch.getInstr()) { |
| 1354 | LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a " |
| 1355 | << "candidate for jump threading\n" ); |
| 1356 | continue; |
| 1357 | } |
| 1358 | |
| 1359 | LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " |
| 1360 | << "candidate for jump threading\n" ); |
| 1361 | LLVM_DEBUG(SI->dump()); |
| 1362 | |
| 1363 | unfoldSelectInstrs(DT, SelectInsts: Switch.getSelectInsts()); |
| 1364 | if (!Switch.getSelectInsts().empty()) |
| 1365 | MadeChanges = true; |
| 1366 | |
| 1367 | AllSwitchPaths SwitchPaths(&Switch, ORE, LI, |
| 1368 | LI->getLoopFor(BB: &BB)->getOutermostLoop()); |
| 1369 | SwitchPaths.run(); |
| 1370 | |
| 1371 | if (SwitchPaths.getNumThreadingPaths() > 0) { |
| 1372 | ThreadableLoops.push_back(Elt: SwitchPaths); |
| 1373 | |
| 1374 | // For the time being limit this optimization to occurring once in a |
| 1375 | // function since it can change the CFG significantly. This is not a |
| 1376 | // strict requirement but it can cause buggy behavior if there is an |
| 1377 | // overlap of blocks in different opportunities. There is a lot of room to |
| 1378 | // experiment with catching more opportunities here. |
| 1379 | // NOTE: To release this contraint, we must handle LoopInfo invalidation |
| 1380 | break; |
| 1381 | } |
| 1382 | } |
| 1383 | |
| 1384 | #ifdef NDEBUG |
| 1385 | LI->verify(DomTree: *DT); |
| 1386 | #endif |
| 1387 | |
| 1388 | SmallPtrSet<const Value *, 32> EphValues; |
| 1389 | if (ThreadableLoops.size() > 0) |
| 1390 | CodeMetrics::collectEphemeralValues(L: &F, AC, EphValues); |
| 1391 | |
| 1392 | for (AllSwitchPaths SwitchPaths : ThreadableLoops) { |
| 1393 | TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); |
| 1394 | Transform.run(); |
| 1395 | MadeChanges = true; |
| 1396 | LoopInfoBroken = true; |
| 1397 | } |
| 1398 | |
| 1399 | #ifdef EXPENSIVE_CHECKS |
| 1400 | assert(DT->verify(DominatorTree::VerificationLevel::Full)); |
| 1401 | verifyFunction(F, &dbgs()); |
| 1402 | #endif |
| 1403 | |
| 1404 | return MadeChanges; |
| 1405 | } |
| 1406 | |
| 1407 | } // end anonymous namespace |
| 1408 | |
| 1409 | /// Integrate with the new Pass Manager |
| 1410 | PreservedAnalyses DFAJumpThreadingPass::run(Function &F, |
| 1411 | FunctionAnalysisManager &AM) { |
| 1412 | AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
| 1413 | DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 1414 | LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 1415 | TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
| 1416 | OptimizationRemarkEmitter ORE(&F); |
| 1417 | DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); |
| 1418 | if (!ThreadImpl.run(F)) |
| 1419 | return PreservedAnalyses::all(); |
| 1420 | |
| 1421 | PreservedAnalyses PA; |
| 1422 | PA.preserve<DominatorTreeAnalysis>(); |
| 1423 | if (!ThreadImpl.LoopInfoBroken) |
| 1424 | PA.preserve<LoopAnalysis>(); |
| 1425 | return PA; |
| 1426 | } |
| 1427 | |