| 1 | //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Loops should be simplified before this analysis. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
| 14 | #include "llvm/ADT/PostOrderIterator.h" |
| 15 | #include "llvm/ADT/SCCIterator.h" |
| 16 | #include "llvm/ADT/STLExtras.h" |
| 17 | #include "llvm/ADT/SmallVector.h" |
| 18 | #include "llvm/Analysis/ConstantFolding.h" |
| 19 | #include "llvm/Analysis/LoopInfo.h" |
| 20 | #include "llvm/Analysis/PostDominators.h" |
| 21 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 22 | #include "llvm/IR/Attributes.h" |
| 23 | #include "llvm/IR/BasicBlock.h" |
| 24 | #include "llvm/IR/CFG.h" |
| 25 | #include "llvm/IR/Constants.h" |
| 26 | #include "llvm/IR/Dominators.h" |
| 27 | #include "llvm/IR/Function.h" |
| 28 | #include "llvm/IR/InstrTypes.h" |
| 29 | #include "llvm/IR/Instruction.h" |
| 30 | #include "llvm/IR/Instructions.h" |
| 31 | #include "llvm/IR/LLVMContext.h" |
| 32 | #include "llvm/IR/Metadata.h" |
| 33 | #include "llvm/IR/PassManager.h" |
| 34 | #include "llvm/IR/ProfDataUtils.h" |
| 35 | #include "llvm/IR/Type.h" |
| 36 | #include "llvm/IR/Value.h" |
| 37 | #include "llvm/InitializePasses.h" |
| 38 | #include "llvm/Pass.h" |
| 39 | #include "llvm/Support/BranchProbability.h" |
| 40 | #include "llvm/Support/Casting.h" |
| 41 | #include "llvm/Support/CommandLine.h" |
| 42 | #include "llvm/Support/Debug.h" |
| 43 | #include "llvm/Support/raw_ostream.h" |
| 44 | #include <cassert> |
| 45 | #include <cstdint> |
| 46 | #include <map> |
| 47 | #include <utility> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | |
| 51 | #define DEBUG_TYPE "branch-prob" |
| 52 | |
| 53 | static cl::opt<bool> PrintBranchProb( |
| 54 | "print-bpi" , cl::init(Val: false), cl::Hidden, |
| 55 | cl::desc("Print the branch probability info." )); |
| 56 | |
| 57 | static cl::opt<std::string> PrintBranchProbFuncName( |
| 58 | "print-bpi-func-name" , cl::Hidden, |
| 59 | cl::desc("The option to specify the name of the function " |
| 60 | "whose branch probability info is printed." )); |
| 61 | |
| 62 | INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob" , |
| 63 | "Branch Probability Analysis" , false, true) |
| 64 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 65 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 66 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 67 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
| 68 | INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob" , |
| 69 | "Branch Probability Analysis" , false, true) |
| 70 | |
| 71 | BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() |
| 72 | : FunctionPass(ID) {} |
| 73 | |
| 74 | char BranchProbabilityInfoWrapperPass::ID = 0; |
| 75 | |
| 76 | // Weights are for internal use only. They are used by heuristics to help to |
| 77 | // estimate edges' probability. Example: |
| 78 | // |
| 79 | // Using "Loop Branch Heuristics" we predict weights of edges for the |
| 80 | // block BB2. |
| 81 | // ... |
| 82 | // | |
| 83 | // V |
| 84 | // BB1<-+ |
| 85 | // | | |
| 86 | // | | (Weight = 124) |
| 87 | // V | |
| 88 | // BB2--+ |
| 89 | // | |
| 90 | // | (Weight = 4) |
| 91 | // V |
| 92 | // BB3 |
| 93 | // |
| 94 | // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 |
| 95 | // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 |
| 96 | static const uint32_t LBH_TAKEN_WEIGHT = 124; |
| 97 | static const uint32_t LBH_NONTAKEN_WEIGHT = 4; |
| 98 | |
| 99 | /// Unreachable-terminating branch taken probability. |
| 100 | /// |
| 101 | /// This is the probability for a branch being taken to a block that terminates |
| 102 | /// (eventually) in unreachable. These are predicted as unlikely as possible. |
| 103 | /// All reachable probability will proportionally share the remaining part. |
| 104 | static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(N: 1); |
| 105 | |
| 106 | /// Heuristics and lookup tables for non-loop branches: |
| 107 | /// Pointer Heuristics (PH) |
| 108 | static const uint32_t PH_TAKEN_WEIGHT = 20; |
| 109 | static const uint32_t PH_NONTAKEN_WEIGHT = 12; |
| 110 | static const BranchProbability |
| 111 | PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
| 112 | static const BranchProbability |
| 113 | PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
| 114 | |
| 115 | using ProbabilityList = SmallVector<BranchProbability>; |
| 116 | using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; |
| 117 | |
| 118 | /// Pointer comparisons: |
| 119 | static const ProbabilityTable PointerTable{ |
| 120 | {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely |
| 121 | {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely |
| 122 | }; |
| 123 | |
| 124 | /// Zero Heuristics (ZH) |
| 125 | static const uint32_t ZH_TAKEN_WEIGHT = 20; |
| 126 | static const uint32_t ZH_NONTAKEN_WEIGHT = 12; |
| 127 | static const BranchProbability |
| 128 | ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
| 129 | static const BranchProbability |
| 130 | ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
| 131 | |
| 132 | /// Integer compares with 0: |
| 133 | static const ProbabilityTable ICmpWithZeroTable{ |
| 134 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely |
| 135 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely |
| 136 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely |
| 137 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely |
| 138 | }; |
| 139 | |
| 140 | /// Integer compares with -1: |
| 141 | static const ProbabilityTable ICmpWithMinusOneTable{ |
| 142 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely |
| 143 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely |
| 144 | // InstCombine canonicalizes X >= 0 into X > -1 |
| 145 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely |
| 146 | }; |
| 147 | |
| 148 | /// Integer compares with 1: |
| 149 | static const ProbabilityTable ICmpWithOneTable{ |
| 150 | // InstCombine canonicalizes X <= 0 into X < 1 |
| 151 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely |
| 152 | }; |
| 153 | |
| 154 | /// strcmp and similar functions return zero, negative, or positive, if the |
| 155 | /// first string is equal, less, or greater than the second. We consider it |
| 156 | /// likely that the strings are not equal, so a comparison with zero is |
| 157 | /// probably false, but also a comparison with any other number is also |
| 158 | /// probably false given that what exactly is returned for nonzero values is |
| 159 | /// not specified. Any kind of comparison other than equality we know |
| 160 | /// nothing about. |
| 161 | static const ProbabilityTable ICmpWithLibCallTable{ |
| 162 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, |
| 163 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, |
| 164 | }; |
| 165 | |
| 166 | // Floating-Point Heuristics (FPH) |
| 167 | static const uint32_t FPH_TAKEN_WEIGHT = 20; |
| 168 | static const uint32_t FPH_NONTAKEN_WEIGHT = 12; |
| 169 | |
| 170 | /// This is the probability for an ordered floating point comparison. |
| 171 | static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; |
| 172 | /// This is the probability for an unordered floating point comparison, it means |
| 173 | /// one or two of the operands are NaN. Usually it is used to test for an |
| 174 | /// exceptional case, so the result is unlikely. |
| 175 | static const uint32_t FPH_UNO_WEIGHT = 1; |
| 176 | |
| 177 | static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, |
| 178 | FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
| 179 | static const BranchProbability |
| 180 | FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
| 181 | static const BranchProbability |
| 182 | FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
| 183 | static const BranchProbability |
| 184 | FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
| 185 | |
| 186 | /// Floating-Point compares: |
| 187 | static const ProbabilityTable FCmpTable{ |
| 188 | {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely |
| 189 | {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely |
| 190 | }; |
| 191 | |
| 192 | /// Set of dedicated "absolute" execution weights for a block. These weights are |
| 193 | /// meaningful relative to each other and their derivatives only. |
| 194 | enum class BlockExecWeight : std::uint32_t { |
| 195 | /// Special weight used for cases with exact zero probability. |
| 196 | ZERO = 0x0, |
| 197 | /// Minimal possible non zero weight. |
| 198 | LOWEST_NON_ZERO = 0x1, |
| 199 | /// Weight to an 'unreachable' block. |
| 200 | UNREACHABLE = ZERO, |
| 201 | /// Weight to a block containing non returning call. |
| 202 | NORETURN = LOWEST_NON_ZERO, |
| 203 | /// Weight to 'unwind' block of an invoke instruction. |
| 204 | UNWIND = LOWEST_NON_ZERO, |
| 205 | /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked |
| 206 | /// with attribute 'cold'. |
| 207 | COLD = 0xffff, |
| 208 | /// Default weight is used in cases when there is no dedicated execution |
| 209 | /// weight set. It is not propagated through the domination line either. |
| 210 | DEFAULT = 0xfffff |
| 211 | }; |
| 212 | |
| 213 | BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { |
| 214 | // Record SCC numbers of blocks in the CFG to identify irreducible loops. |
| 215 | // FIXME: We could only calculate this if the CFG is known to be irreducible |
| 216 | // (perhaps cache this info in LoopInfo if we can easily calculate it there?). |
| 217 | int SccNum = 0; |
| 218 | for (scc_iterator<const Function *> It = scc_begin(G: &F); !It.isAtEnd(); |
| 219 | ++It, ++SccNum) { |
| 220 | // Ignore single-block SCCs since they either aren't loops or LoopInfo will |
| 221 | // catch them. |
| 222 | const std::vector<const BasicBlock *> &Scc = *It; |
| 223 | if (Scc.size() == 1) |
| 224 | continue; |
| 225 | |
| 226 | LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":" ); |
| 227 | for (const auto *BB : Scc) { |
| 228 | LLVM_DEBUG(dbgs() << " " << BB->getName()); |
| 229 | SccNums[BB] = SccNum; |
| 230 | calculateSccBlockType(BB, SccNum); |
| 231 | } |
| 232 | LLVM_DEBUG(dbgs() << "\n" ); |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { |
| 237 | auto SccIt = SccNums.find(Val: BB); |
| 238 | if (SccIt == SccNums.end()) |
| 239 | return -1; |
| 240 | return SccIt->second; |
| 241 | } |
| 242 | |
| 243 | void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( |
| 244 | int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { |
| 245 | |
| 246 | for (auto MapIt : SccBlocks[SccNum]) { |
| 247 | const auto *BB = MapIt.first; |
| 248 | if (isSCCHeader(BB, SccNum)) |
| 249 | for (const auto *Pred : predecessors(BB)) |
| 250 | if (getSCCNum(BB: Pred) != SccNum) |
| 251 | Enters.push_back(Elt: const_cast<BasicBlock *>(BB)); |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | void BranchProbabilityInfo::SccInfo::getSccExitBlocks( |
| 256 | int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { |
| 257 | for (auto MapIt : SccBlocks[SccNum]) { |
| 258 | const auto *BB = MapIt.first; |
| 259 | if (isSCCExitingBlock(BB, SccNum)) |
| 260 | for (const auto *Succ : successors(BB)) |
| 261 | if (getSCCNum(BB: Succ) != SccNum) |
| 262 | Exits.push_back(Elt: const_cast<BasicBlock *>(Succ)); |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, |
| 267 | int SccNum) const { |
| 268 | assert(getSCCNum(BB) == SccNum); |
| 269 | |
| 270 | assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC" ); |
| 271 | const auto &SccBlockTypes = SccBlocks[SccNum]; |
| 272 | |
| 273 | auto It = SccBlockTypes.find(Val: BB); |
| 274 | if (It != SccBlockTypes.end()) { |
| 275 | return It->second; |
| 276 | } |
| 277 | return Inner; |
| 278 | } |
| 279 | |
| 280 | void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, |
| 281 | int SccNum) { |
| 282 | assert(getSCCNum(BB) == SccNum); |
| 283 | uint32_t BlockType = Inner; |
| 284 | |
| 285 | if (llvm::any_of(Range: predecessors(BB), P: [&](const BasicBlock *Pred) { |
| 286 | // Consider any block that is an entry point to the SCC as |
| 287 | // a header. |
| 288 | return getSCCNum(BB: Pred) != SccNum; |
| 289 | })) |
| 290 | BlockType |= Header; |
| 291 | |
| 292 | if (llvm::any_of(Range: successors(BB), P: [&](const BasicBlock *Succ) { |
| 293 | return getSCCNum(BB: Succ) != SccNum; |
| 294 | })) |
| 295 | BlockType |= Exiting; |
| 296 | |
| 297 | // Lazily compute the set of headers for a given SCC and cache the results |
| 298 | // in the SccHeaderMap. |
| 299 | if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) |
| 300 | SccBlocks.resize(new_size: SccNum + 1); |
| 301 | auto &SccBlockTypes = SccBlocks[SccNum]; |
| 302 | |
| 303 | if (BlockType != Inner) { |
| 304 | bool IsInserted; |
| 305 | std::tie(args: std::ignore, args&: IsInserted) = |
| 306 | SccBlockTypes.insert(KV: std::make_pair(x&: BB, y&: BlockType)); |
| 307 | assert(IsInserted && "Duplicated block in SCC" ); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, |
| 312 | const LoopInfo &LI, |
| 313 | const SccInfo &SccI) |
| 314 | : BB(BB) { |
| 315 | LD.first = LI.getLoopFor(BB); |
| 316 | if (!LD.first) { |
| 317 | LD.second = SccI.getSCCNum(BB); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { |
| 322 | const auto &SrcBlock = Edge.first; |
| 323 | const auto &DstBlock = Edge.second; |
| 324 | return (DstBlock.getLoop() && |
| 325 | !DstBlock.getLoop()->contains(L: SrcBlock.getLoop())) || |
| 326 | // Assume that SCCs can't be nested. |
| 327 | (DstBlock.getSccNum() != -1 && |
| 328 | SrcBlock.getSccNum() != DstBlock.getSccNum()); |
| 329 | } |
| 330 | |
| 331 | bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { |
| 332 | return isLoopEnteringEdge(Edge: {Edge.second, Edge.first}); |
| 333 | } |
| 334 | |
| 335 | bool BranchProbabilityInfo::isLoopEnteringExitingEdge( |
| 336 | const LoopEdge &Edge) const { |
| 337 | return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); |
| 338 | } |
| 339 | |
| 340 | bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { |
| 341 | const auto &SrcBlock = Edge.first; |
| 342 | const auto &DstBlock = Edge.second; |
| 343 | return SrcBlock.belongsToSameLoop(LB: DstBlock) && |
| 344 | ((DstBlock.getLoop() && |
| 345 | DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || |
| 346 | (DstBlock.getSccNum() != -1 && |
| 347 | SccI->isSCCHeader(BB: DstBlock.getBlock(), SccNum: DstBlock.getSccNum()))); |
| 348 | } |
| 349 | |
| 350 | void BranchProbabilityInfo::getLoopEnterBlocks( |
| 351 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { |
| 352 | if (LB.getLoop()) { |
| 353 | auto * = LB.getLoop()->getHeader(); |
| 354 | Enters.append(in_start: pred_begin(BB: Header), in_end: pred_end(BB: Header)); |
| 355 | } else { |
| 356 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
| 357 | SccI->getSccEnterBlocks(SccNum: LB.getSccNum(), Enters); |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | void BranchProbabilityInfo::getLoopExitBlocks( |
| 362 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { |
| 363 | if (LB.getLoop()) { |
| 364 | LB.getLoop()->getExitBlocks(ExitBlocks&: Exits); |
| 365 | } else { |
| 366 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
| 367 | SccI->getSccExitBlocks(SccNum: LB.getSccNum(), Exits); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | // Propagate existing explicit probabilities from either profile data or |
| 372 | // 'expect' intrinsic processing. Examine metadata against unreachable |
| 373 | // heuristic. The probability of the edge coming to unreachable block is |
| 374 | // set to min of metadata and unreachable heuristic. |
| 375 | bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { |
| 376 | const Instruction *TI = BB->getTerminator(); |
| 377 | assert(TI->getNumSuccessors() > 1 && "expected more than one successor!" ); |
| 378 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || isa<IndirectBrInst>(Val: TI) || |
| 379 | isa<InvokeInst>(Val: TI) || isa<CallBrInst>(Val: TI))) |
| 380 | return false; |
| 381 | |
| 382 | MDNode *WeightsNode = getValidBranchWeightMDNode(I: *TI); |
| 383 | if (!WeightsNode) |
| 384 | return false; |
| 385 | |
| 386 | // Check that the number of successors is manageable. |
| 387 | assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors" ); |
| 388 | |
| 389 | // Build up the final weights that will be used in a temporary buffer. |
| 390 | // Compute the sum of all weights to later decide whether they need to |
| 391 | // be scaled to fit in 32 bits. |
| 392 | uint64_t WeightSum = 0; |
| 393 | SmallVector<uint32_t, 2> Weights; |
| 394 | SmallVector<unsigned, 2> UnreachableIdxs; |
| 395 | SmallVector<unsigned, 2> ReachableIdxs; |
| 396 | |
| 397 | extractBranchWeights(ProfileData: WeightsNode, Weights); |
| 398 | for (unsigned I = 0, E = Weights.size(); I != E; ++I) { |
| 399 | WeightSum += Weights[I]; |
| 400 | const LoopBlock SrcLoopBB = getLoopBlock(BB); |
| 401 | const LoopBlock DstLoopBB = getLoopBlock(BB: TI->getSuccessor(Idx: I)); |
| 402 | auto EstimatedWeight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
| 403 | if (EstimatedWeight && |
| 404 | *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
| 405 | UnreachableIdxs.push_back(Elt: I); |
| 406 | else |
| 407 | ReachableIdxs.push_back(Elt: I); |
| 408 | } |
| 409 | assert(Weights.size() == TI->getNumSuccessors() && "Checked above" ); |
| 410 | |
| 411 | // If the sum of weights does not fit in 32 bits, scale every weight down |
| 412 | // accordingly. |
| 413 | uint64_t ScalingFactor = |
| 414 | (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; |
| 415 | |
| 416 | if (ScalingFactor > 1) { |
| 417 | WeightSum = 0; |
| 418 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
| 419 | Weights[I] /= ScalingFactor; |
| 420 | WeightSum += Weights[I]; |
| 421 | } |
| 422 | } |
| 423 | assert(WeightSum <= UINT32_MAX && |
| 424 | "Expected weights to scale down to 32 bits" ); |
| 425 | |
| 426 | if (WeightSum == 0 || ReachableIdxs.size() == 0) { |
| 427 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
| 428 | Weights[I] = 1; |
| 429 | WeightSum = TI->getNumSuccessors(); |
| 430 | } |
| 431 | |
| 432 | // Set the probability. |
| 433 | SmallVector<BranchProbability, 2> BP; |
| 434 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
| 435 | BP.push_back(Elt: { Weights[I], static_cast<uint32_t>(WeightSum) }); |
| 436 | |
| 437 | // Examine the metadata against unreachable heuristic. |
| 438 | // If the unreachable heuristic is more strong then we use it for this edge. |
| 439 | if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { |
| 440 | setEdgeProbability(Src: BB, Probs: BP); |
| 441 | return true; |
| 442 | } |
| 443 | |
| 444 | auto UnreachableProb = UR_TAKEN_PROB; |
| 445 | for (auto I : UnreachableIdxs) |
| 446 | if (UnreachableProb < BP[I]) { |
| 447 | BP[I] = UnreachableProb; |
| 448 | } |
| 449 | |
| 450 | // Sum of all edge probabilities must be 1.0. If we modified the probability |
| 451 | // of some edges then we must distribute the introduced difference over the |
| 452 | // reachable blocks. |
| 453 | // |
| 454 | // Proportional distribution: the relation between probabilities of the |
| 455 | // reachable edges is kept unchanged. That is for any reachable edges i and j: |
| 456 | // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => |
| 457 | // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K |
| 458 | // Where K is independent of i,j. |
| 459 | // newBP[i] == oldBP[i] * K |
| 460 | // We need to find K. |
| 461 | // Make sum of all reachables of the left and right parts: |
| 462 | // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) |
| 463 | // Sum of newBP must be equal to 1.0: |
| 464 | // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => |
| 465 | // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) |
| 466 | // Where sum_of_unreachable(newBP) is what has been just changed. |
| 467 | // Finally: |
| 468 | // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => |
| 469 | // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) |
| 470 | BranchProbability NewUnreachableSum = BranchProbability::getZero(); |
| 471 | for (auto I : UnreachableIdxs) |
| 472 | NewUnreachableSum += BP[I]; |
| 473 | |
| 474 | BranchProbability NewReachableSum = |
| 475 | BranchProbability::getOne() - NewUnreachableSum; |
| 476 | |
| 477 | BranchProbability OldReachableSum = BranchProbability::getZero(); |
| 478 | for (auto I : ReachableIdxs) |
| 479 | OldReachableSum += BP[I]; |
| 480 | |
| 481 | if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? |
| 482 | if (OldReachableSum.isZero()) { |
| 483 | // If all oldBP[i] are zeroes then the proportional distribution results |
| 484 | // in all zero probabilities and the error stays big. In this case we |
| 485 | // evenly spread NewReachableSum over the reachable edges. |
| 486 | BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); |
| 487 | for (auto I : ReachableIdxs) |
| 488 | BP[I] = PerEdge; |
| 489 | } else { |
| 490 | for (auto I : ReachableIdxs) { |
| 491 | // We use uint64_t to avoid double rounding error of the following |
| 492 | // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum |
| 493 | // The formula is taken from the private constructor |
| 494 | // BranchProbability(uint32_t Numerator, uint32_t Denominator) |
| 495 | uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * |
| 496 | BP[I].getNumerator(); |
| 497 | uint32_t Div = static_cast<uint32_t>( |
| 498 | divideNearest(Numerator: Mul, Denominator: OldReachableSum.getNumerator())); |
| 499 | BP[I] = BranchProbability::getRaw(N: Div); |
| 500 | } |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | setEdgeProbability(Src: BB, Probs: BP); |
| 505 | |
| 506 | return true; |
| 507 | } |
| 508 | |
| 509 | // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison |
| 510 | // between two pointer or pointer and NULL will fail. |
| 511 | bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { |
| 512 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
| 513 | if (!BI || !BI->isConditional()) |
| 514 | return false; |
| 515 | |
| 516 | Value *Cond = BI->getCondition(); |
| 517 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
| 518 | if (!CI || !CI->isEquality()) |
| 519 | return false; |
| 520 | |
| 521 | Value *LHS = CI->getOperand(i_nocapture: 0); |
| 522 | |
| 523 | if (!LHS->getType()->isPointerTy()) |
| 524 | return false; |
| 525 | |
| 526 | assert(CI->getOperand(1)->getType()->isPointerTy()); |
| 527 | |
| 528 | auto Search = PointerTable.find(x: CI->getPredicate()); |
| 529 | if (Search == PointerTable.end()) |
| 530 | return false; |
| 531 | setEdgeProbability(Src: BB, Probs: Search->second); |
| 532 | return true; |
| 533 | } |
| 534 | |
| 535 | // Compute the unlikely successors to the block BB in the loop L, specifically |
| 536 | // those that are unlikely because this is a loop, and add them to the |
| 537 | // UnlikelyBlocks set. |
| 538 | static void |
| 539 | computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, |
| 540 | SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { |
| 541 | // Sometimes in a loop we have a branch whose condition is made false by |
| 542 | // taking it. This is typically something like |
| 543 | // int n = 0; |
| 544 | // while (...) { |
| 545 | // if (++n >= MAX) { |
| 546 | // n = 0; |
| 547 | // } |
| 548 | // } |
| 549 | // In this sort of situation taking the branch means that at the very least it |
| 550 | // won't be taken again in the next iteration of the loop, so we should |
| 551 | // consider it less likely than a typical branch. |
| 552 | // |
| 553 | // We detect this by looking back through the graph of PHI nodes that sets the |
| 554 | // value that the condition depends on, and seeing if we can reach a successor |
| 555 | // block which can be determined to make the condition false. |
| 556 | // |
| 557 | // FIXME: We currently consider unlikely blocks to be half as likely as other |
| 558 | // blocks, but if we consider the example above the likelyhood is actually |
| 559 | // 1/MAX. We could therefore be more precise in how unlikely we consider |
| 560 | // blocks to be, but it would require more careful examination of the form |
| 561 | // of the comparison expression. |
| 562 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
| 563 | if (!BI || !BI->isConditional()) |
| 564 | return; |
| 565 | |
| 566 | // Check if the branch is based on an instruction compared with a constant |
| 567 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
| 568 | if (!CI || !isa<Instruction>(Val: CI->getOperand(i_nocapture: 0)) || |
| 569 | !isa<Constant>(Val: CI->getOperand(i_nocapture: 1))) |
| 570 | return; |
| 571 | |
| 572 | // Either the instruction must be a PHI, or a chain of operations involving |
| 573 | // constants that ends in a PHI which we can then collapse into a single value |
| 574 | // if the PHI value is known. |
| 575 | Instruction *CmpLHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0)); |
| 576 | PHINode *CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
| 577 | Constant *CmpConst = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 1)); |
| 578 | // Collect the instructions until we hit a PHI |
| 579 | SmallVector<BinaryOperator *, 1> InstChain; |
| 580 | while (!CmpPHI && CmpLHS && isa<BinaryOperator>(Val: CmpLHS) && |
| 581 | isa<Constant>(Val: CmpLHS->getOperand(i: 1))) { |
| 582 | // Stop if the chain extends outside of the loop |
| 583 | if (!L->contains(Inst: CmpLHS)) |
| 584 | return; |
| 585 | InstChain.push_back(Elt: cast<BinaryOperator>(Val: CmpLHS)); |
| 586 | CmpLHS = dyn_cast<Instruction>(Val: CmpLHS->getOperand(i: 0)); |
| 587 | if (CmpLHS) |
| 588 | CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
| 589 | } |
| 590 | if (!CmpPHI || !L->contains(Inst: CmpPHI)) |
| 591 | return; |
| 592 | |
| 593 | // Trace the phi node to find all values that come from successors of BB |
| 594 | SmallPtrSet<PHINode*, 8> VisitedInsts; |
| 595 | SmallVector<PHINode*, 8> WorkList; |
| 596 | WorkList.push_back(Elt: CmpPHI); |
| 597 | VisitedInsts.insert(Ptr: CmpPHI); |
| 598 | while (!WorkList.empty()) { |
| 599 | PHINode *P = WorkList.pop_back_val(); |
| 600 | for (BasicBlock *B : P->blocks()) { |
| 601 | // Skip blocks that aren't part of the loop |
| 602 | if (!L->contains(BB: B)) |
| 603 | continue; |
| 604 | Value *V = P->getIncomingValueForBlock(BB: B); |
| 605 | // If the source is a PHI add it to the work list if we haven't |
| 606 | // already visited it. |
| 607 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
| 608 | if (VisitedInsts.insert(Ptr: PN).second) |
| 609 | WorkList.push_back(Elt: PN); |
| 610 | continue; |
| 611 | } |
| 612 | // If this incoming value is a constant and B is a successor of BB, then |
| 613 | // we can constant-evaluate the compare to see if it makes the branch be |
| 614 | // taken or not. |
| 615 | Constant *CmpLHSConst = dyn_cast<Constant>(Val: V); |
| 616 | if (!CmpLHSConst || !llvm::is_contained(Range: successors(BB), Element: B)) |
| 617 | continue; |
| 618 | // First collapse InstChain |
| 619 | const DataLayout &DL = BB->getDataLayout(); |
| 620 | for (Instruction *I : llvm::reverse(C&: InstChain)) { |
| 621 | CmpLHSConst = ConstantFoldBinaryOpOperands( |
| 622 | Opcode: I->getOpcode(), LHS: CmpLHSConst, RHS: cast<Constant>(Val: I->getOperand(i: 1)), DL); |
| 623 | if (!CmpLHSConst) |
| 624 | break; |
| 625 | } |
| 626 | if (!CmpLHSConst) |
| 627 | continue; |
| 628 | // Now constant-evaluate the compare |
| 629 | Constant *Result = ConstantFoldCompareInstOperands( |
| 630 | Predicate: CI->getPredicate(), LHS: CmpLHSConst, RHS: CmpConst, DL); |
| 631 | // If the result means we don't branch to the block then that block is |
| 632 | // unlikely. |
| 633 | if (Result && |
| 634 | ((Result->isZeroValue() && B == BI->getSuccessor(i: 0)) || |
| 635 | (Result->isOneValue() && B == BI->getSuccessor(i: 1)))) |
| 636 | UnlikelyBlocks.insert(Ptr: B); |
| 637 | } |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | std::optional<uint32_t> |
| 642 | BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { |
| 643 | auto WeightIt = EstimatedBlockWeight.find(Val: BB); |
| 644 | if (WeightIt == EstimatedBlockWeight.end()) |
| 645 | return std::nullopt; |
| 646 | return WeightIt->second; |
| 647 | } |
| 648 | |
| 649 | std::optional<uint32_t> |
| 650 | BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { |
| 651 | auto WeightIt = EstimatedLoopWeight.find(Val: L); |
| 652 | if (WeightIt == EstimatedLoopWeight.end()) |
| 653 | return std::nullopt; |
| 654 | return WeightIt->second; |
| 655 | } |
| 656 | |
| 657 | std::optional<uint32_t> |
| 658 | BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { |
| 659 | // For edges entering a loop take weight of a loop rather than an individual |
| 660 | // block in the loop. |
| 661 | return isLoopEnteringEdge(Edge) |
| 662 | ? getEstimatedLoopWeight(L: Edge.second.getLoopData()) |
| 663 | : getEstimatedBlockWeight(BB: Edge.second.getBlock()); |
| 664 | } |
| 665 | |
| 666 | template <class IterT> |
| 667 | std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( |
| 668 | const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { |
| 669 | std::optional<uint32_t> MaxWeight; |
| 670 | for (const BasicBlock *DstBB : Successors) { |
| 671 | const LoopBlock DstLoopBB = getLoopBlock(BB: DstBB); |
| 672 | auto Weight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
| 673 | |
| 674 | if (!Weight) |
| 675 | return std::nullopt; |
| 676 | |
| 677 | if (!MaxWeight || *MaxWeight < *Weight) |
| 678 | MaxWeight = Weight; |
| 679 | } |
| 680 | |
| 681 | return MaxWeight; |
| 682 | } |
| 683 | |
| 684 | // Updates \p LoopBB's weight and returns true. If \p LoopBB has already |
| 685 | // an associated weight it is unchanged and false is returned. |
| 686 | // |
| 687 | // Please note by the algorithm the weight is not expected to change once set |
| 688 | // thus 'false' status is used to track visited blocks. |
| 689 | bool BranchProbabilityInfo::updateEstimatedBlockWeight( |
| 690 | LoopBlock &LoopBB, uint32_t BBWeight, |
| 691 | SmallVectorImpl<BasicBlock *> &BlockWorkList, |
| 692 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
| 693 | BasicBlock *BB = LoopBB.getBlock(); |
| 694 | |
| 695 | // In general, weight is assigned to a block when it has final value and |
| 696 | // can't/shouldn't be changed. However, there are cases when a block |
| 697 | // inherently has several (possibly "contradicting") weights. For example, |
| 698 | // "unwind" block may also contain "cold" call. In that case the first |
| 699 | // set weight is favored and all consequent weights are ignored. |
| 700 | if (!EstimatedBlockWeight.insert(KV: {BB, BBWeight}).second) |
| 701 | return false; |
| 702 | |
| 703 | for (BasicBlock *PredBlock : predecessors(BB)) { |
| 704 | LoopBlock PredLoop = getLoopBlock(BB: PredBlock); |
| 705 | // Add affected block/loop to a working list. |
| 706 | if (isLoopExitingEdge(Edge: {PredLoop, LoopBB})) { |
| 707 | if (!EstimatedLoopWeight.count(Val: PredLoop.getLoopData())) |
| 708 | LoopWorkList.push_back(Elt: PredLoop); |
| 709 | } else if (!EstimatedBlockWeight.count(Val: PredBlock)) |
| 710 | BlockWorkList.push_back(Elt: PredBlock); |
| 711 | } |
| 712 | return true; |
| 713 | } |
| 714 | |
| 715 | // Starting from \p BB traverse through dominator blocks and assign \p BBWeight |
| 716 | // to all such blocks that are post dominated by \BB. In other words to all |
| 717 | // blocks that the one is executed if and only if another one is executed. |
| 718 | // Importantly, we skip loops here for two reasons. First weights of blocks in |
| 719 | // a loop should be scaled by trip count (yet possibly unknown). Second there is |
| 720 | // no any value in doing that because that doesn't give any additional |
| 721 | // information regarding distribution of probabilities inside the loop. |
| 722 | // Exception is loop 'enter' and 'exit' edges that are handled in a special way |
| 723 | // at calcEstimatedHeuristics. |
| 724 | // |
| 725 | // In addition, \p WorkList is populated with basic blocks if at leas one |
| 726 | // successor has updated estimated weight. |
| 727 | void BranchProbabilityInfo::propagateEstimatedBlockWeight( |
| 728 | const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, |
| 729 | uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, |
| 730 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
| 731 | const BasicBlock *BB = LoopBB.getBlock(); |
| 732 | const auto *DTStartNode = DT->getNode(BB); |
| 733 | const auto *PDTStartNode = PDT->getNode(BB); |
| 734 | |
| 735 | // TODO: Consider propagating weight down the domination line as well. |
| 736 | for (const auto *DTNode = DTStartNode; DTNode != nullptr; |
| 737 | DTNode = DTNode->getIDom()) { |
| 738 | auto *DomBB = DTNode->getBlock(); |
| 739 | // Consider blocks which lie on one 'line'. |
| 740 | if (!PDT->dominates(A: PDTStartNode, B: PDT->getNode(BB: DomBB))) |
| 741 | // If BB doesn't post dominate DomBB it will not post dominate dominators |
| 742 | // of DomBB as well. |
| 743 | break; |
| 744 | |
| 745 | LoopBlock DomLoopBB = getLoopBlock(BB: DomBB); |
| 746 | const LoopEdge Edge{DomLoopBB, LoopBB}; |
| 747 | // Don't propagate weight to blocks belonging to different loops. |
| 748 | if (!isLoopEnteringExitingEdge(Edge)) { |
| 749 | if (!updateEstimatedBlockWeight(LoopBB&: DomLoopBB, BBWeight, BlockWorkList, |
| 750 | LoopWorkList)) |
| 751 | // If DomBB has weight set then all it's predecessors are already |
| 752 | // processed (since we propagate weight up to the top of IR each time). |
| 753 | break; |
| 754 | } else if (isLoopExitingEdge(Edge)) { |
| 755 | LoopWorkList.push_back(Elt: DomLoopBB); |
| 756 | } |
| 757 | } |
| 758 | } |
| 759 | |
| 760 | std::optional<uint32_t> |
| 761 | BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) { |
| 762 | // Returns true if \p BB has call marked with "NoReturn" attribute. |
| 763 | auto hasNoReturn = [&](const BasicBlock *BB) { |
| 764 | for (const auto &I : reverse(C: *BB)) |
| 765 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
| 766 | if (CI->hasFnAttr(Kind: Attribute::NoReturn)) |
| 767 | return true; |
| 768 | |
| 769 | return false; |
| 770 | }; |
| 771 | |
| 772 | // Important note regarding the order of checks. They are ordered by weight |
| 773 | // from lowest to highest. Doing that allows to avoid "unstable" results |
| 774 | // when several conditions heuristics can be applied simultaneously. |
| 775 | if (isa<UnreachableInst>(Val: BB->getTerminator()) || |
| 776 | // If this block is terminated by a call to |
| 777 | // @llvm.experimental.deoptimize then treat it like an unreachable |
| 778 | // since it is expected to practically never execute. |
| 779 | // TODO: Should we actually treat as never returning call? |
| 780 | BB->getTerminatingDeoptimizeCall()) |
| 781 | return hasNoReturn(BB) |
| 782 | ? static_cast<uint32_t>(BlockExecWeight::NORETURN) |
| 783 | : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); |
| 784 | |
| 785 | // Check if the block is an exception handling block. |
| 786 | if (BB->isEHPad()) |
| 787 | return static_cast<uint32_t>(BlockExecWeight::UNWIND); |
| 788 | |
| 789 | // Check if the block contains 'cold' call. |
| 790 | for (const auto &I : *BB) |
| 791 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
| 792 | if (CI->hasFnAttr(Kind: Attribute::Cold)) |
| 793 | return static_cast<uint32_t>(BlockExecWeight::COLD); |
| 794 | |
| 795 | return std::nullopt; |
| 796 | } |
| 797 | |
| 798 | // Does RPO traversal over all blocks in \p F and assigns weights to |
| 799 | // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its |
| 800 | // best to propagate the weight to up/down the IR. |
| 801 | void BranchProbabilityInfo::estimateBlockWeights(const Function &F, |
| 802 | DominatorTree *DT, |
| 803 | PostDominatorTree *PDT) { |
| 804 | SmallVector<BasicBlock *, 8> BlockWorkList; |
| 805 | SmallVector<LoopBlock, 8> LoopWorkList; |
| 806 | SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks; |
| 807 | |
| 808 | // By doing RPO we make sure that all predecessors already have weights |
| 809 | // calculated before visiting theirs successors. |
| 810 | ReversePostOrderTraversal<const Function *> RPOT(&F); |
| 811 | for (const auto *BB : RPOT) |
| 812 | if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) |
| 813 | // If we were able to find estimated weight for the block set it to this |
| 814 | // block and propagate up the IR. |
| 815 | propagateEstimatedBlockWeight(LoopBB: getLoopBlock(BB), DT, PDT, BBWeight: *BBWeight, |
| 816 | BlockWorkList, LoopWorkList); |
| 817 | |
| 818 | // BlockWorklist/LoopWorkList contains blocks/loops with at least one |
| 819 | // successor/exit having estimated weight. Try to propagate weight to such |
| 820 | // blocks/loops from successors/exits. |
| 821 | // Process loops and blocks. Order is not important. |
| 822 | do { |
| 823 | while (!LoopWorkList.empty()) { |
| 824 | const LoopBlock LoopBB = LoopWorkList.pop_back_val(); |
| 825 | const LoopData LD = LoopBB.getLoopData(); |
| 826 | if (EstimatedLoopWeight.count(Val: LD)) |
| 827 | continue; |
| 828 | |
| 829 | auto Res = LoopExitBlocks.try_emplace(Key: LD); |
| 830 | SmallVectorImpl<BasicBlock *> &Exits = Res.first->second; |
| 831 | if (Res.second) |
| 832 | getLoopExitBlocks(LB: LoopBB, Exits); |
| 833 | auto LoopWeight = getMaxEstimatedEdgeWeight( |
| 834 | SrcLoopBB: LoopBB, Successors: make_range(x: Exits.begin(), y: Exits.end())); |
| 835 | |
| 836 | if (LoopWeight) { |
| 837 | // If we never exit the loop then we can enter it once at maximum. |
| 838 | if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
| 839 | LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
| 840 | |
| 841 | EstimatedLoopWeight.insert(KV: {LD, *LoopWeight}); |
| 842 | // Add all blocks entering the loop into working list. |
| 843 | getLoopEnterBlocks(LB: LoopBB, Enters&: BlockWorkList); |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | while (!BlockWorkList.empty()) { |
| 848 | // We can reach here only if BlockWorkList is not empty. |
| 849 | const BasicBlock *BB = BlockWorkList.pop_back_val(); |
| 850 | if (EstimatedBlockWeight.count(Val: BB)) |
| 851 | continue; |
| 852 | |
| 853 | // We take maximum over all weights of successors. In other words we take |
| 854 | // weight of "hot" path. In theory we can probably find a better function |
| 855 | // which gives higher accuracy results (comparing to "maximum") but I |
| 856 | // can't |
| 857 | // think of any right now. And I doubt it will make any difference in |
| 858 | // practice. |
| 859 | const LoopBlock LoopBB = getLoopBlock(BB); |
| 860 | auto MaxWeight = getMaxEstimatedEdgeWeight(SrcLoopBB: LoopBB, Successors: successors(BB)); |
| 861 | |
| 862 | if (MaxWeight) |
| 863 | propagateEstimatedBlockWeight(LoopBB, DT, PDT, BBWeight: *MaxWeight, |
| 864 | BlockWorkList, LoopWorkList); |
| 865 | } |
| 866 | } while (!BlockWorkList.empty() || !LoopWorkList.empty()); |
| 867 | } |
| 868 | |
| 869 | // Calculate edge probabilities based on block's estimated weight. |
| 870 | // Note that gathered weights were not scaled for loops. Thus edges entering |
| 871 | // and exiting loops requires special processing. |
| 872 | bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { |
| 873 | assert(BB->getTerminator()->getNumSuccessors() > 1 && |
| 874 | "expected more than one successor!" ); |
| 875 | |
| 876 | const LoopBlock LoopBB = getLoopBlock(BB); |
| 877 | |
| 878 | SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; |
| 879 | uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; |
| 880 | if (LoopBB.getLoop()) |
| 881 | computeUnlikelySuccessors(BB, L: LoopBB.getLoop(), UnlikelyBlocks); |
| 882 | |
| 883 | // Changed to 'true' if at least one successor has estimated weight. |
| 884 | bool FoundEstimatedWeight = false; |
| 885 | SmallVector<uint32_t, 4> SuccWeights; |
| 886 | uint64_t TotalWeight = 0; |
| 887 | // Go over all successors of BB and put their weights into SuccWeights. |
| 888 | for (const BasicBlock *SuccBB : successors(BB)) { |
| 889 | std::optional<uint32_t> Weight; |
| 890 | const LoopBlock SuccLoopBB = getLoopBlock(BB: SuccBB); |
| 891 | const LoopEdge Edge{LoopBB, SuccLoopBB}; |
| 892 | |
| 893 | Weight = getEstimatedEdgeWeight(Edge); |
| 894 | |
| 895 | if (isLoopExitingEdge(Edge) && |
| 896 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
| 897 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
| 898 | // Scale down loop exiting weight by trip count. |
| 899 | Weight = std::max( |
| 900 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
| 901 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / |
| 902 | TC); |
| 903 | } |
| 904 | bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(Ptr: SuccBB); |
| 905 | if (IsUnlikelyEdge && |
| 906 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
| 907 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
| 908 | // 'Unlikely' blocks have twice lower weight. |
| 909 | Weight = std::max( |
| 910 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
| 911 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2); |
| 912 | } |
| 913 | |
| 914 | if (Weight) |
| 915 | FoundEstimatedWeight = true; |
| 916 | |
| 917 | auto WeightVal = |
| 918 | Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)); |
| 919 | TotalWeight += WeightVal; |
| 920 | SuccWeights.push_back(Elt: WeightVal); |
| 921 | } |
| 922 | |
| 923 | // If non of blocks have estimated weight bail out. |
| 924 | // If TotalWeight is 0 that means weight of each successor is 0 as well and |
| 925 | // equally likely. Bail out early to not deal with devision by zero. |
| 926 | if (!FoundEstimatedWeight || TotalWeight == 0) |
| 927 | return false; |
| 928 | |
| 929 | assert(SuccWeights.size() == succ_size(BB) && "Missed successor?" ); |
| 930 | const unsigned SuccCount = SuccWeights.size(); |
| 931 | |
| 932 | // If the sum of weights does not fit in 32 bits, scale every weight down |
| 933 | // accordingly. |
| 934 | if (TotalWeight > UINT32_MAX) { |
| 935 | uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; |
| 936 | TotalWeight = 0; |
| 937 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
| 938 | SuccWeights[Idx] /= ScalingFactor; |
| 939 | if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) |
| 940 | SuccWeights[Idx] = |
| 941 | static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
| 942 | TotalWeight += SuccWeights[Idx]; |
| 943 | } |
| 944 | assert(TotalWeight <= UINT32_MAX && "Total weight overflows" ); |
| 945 | } |
| 946 | |
| 947 | // Finally set probabilities to edges according to estimated block weights. |
| 948 | SmallVector<BranchProbability, 4> EdgeProbabilities( |
| 949 | SuccCount, BranchProbability::getUnknown()); |
| 950 | |
| 951 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
| 952 | EdgeProbabilities[Idx] = |
| 953 | BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); |
| 954 | } |
| 955 | setEdgeProbability(Src: BB, Probs: EdgeProbabilities); |
| 956 | return true; |
| 957 | } |
| 958 | |
| 959 | bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, |
| 960 | const TargetLibraryInfo *TLI) { |
| 961 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
| 962 | if (!BI || !BI->isConditional()) |
| 963 | return false; |
| 964 | |
| 965 | Value *Cond = BI->getCondition(); |
| 966 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
| 967 | if (!CI) |
| 968 | return false; |
| 969 | |
| 970 | auto GetConstantInt = [](Value *V) { |
| 971 | if (auto *I = dyn_cast<BitCastInst>(Val: V)) |
| 972 | return dyn_cast<ConstantInt>(Val: I->getOperand(i_nocapture: 0)); |
| 973 | return dyn_cast<ConstantInt>(Val: V); |
| 974 | }; |
| 975 | |
| 976 | Value *RHS = CI->getOperand(i_nocapture: 1); |
| 977 | ConstantInt *CV = GetConstantInt(RHS); |
| 978 | if (!CV) |
| 979 | return false; |
| 980 | |
| 981 | // If the LHS is the result of AND'ing a value with a single bit bitmask, |
| 982 | // we don't have information about probabilities. |
| 983 | if (Instruction *LHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0))) |
| 984 | if (LHS->getOpcode() == Instruction::And) |
| 985 | if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(i: 1))) |
| 986 | if (AndRHS->getValue().isPowerOf2()) |
| 987 | return false; |
| 988 | |
| 989 | // Check if the LHS is the return value of a library function |
| 990 | LibFunc Func = NumLibFuncs; |
| 991 | if (TLI) |
| 992 | if (CallInst *Call = dyn_cast<CallInst>(Val: CI->getOperand(i_nocapture: 0))) |
| 993 | if (Function *CalledFn = Call->getCalledFunction()) |
| 994 | TLI->getLibFunc(FDecl: *CalledFn, F&: Func); |
| 995 | |
| 996 | ProbabilityTable::const_iterator Search; |
| 997 | if (Func == LibFunc_strcasecmp || |
| 998 | Func == LibFunc_strcmp || |
| 999 | Func == LibFunc_strncasecmp || |
| 1000 | Func == LibFunc_strncmp || |
| 1001 | Func == LibFunc_memcmp || |
| 1002 | Func == LibFunc_bcmp) { |
| 1003 | Search = ICmpWithLibCallTable.find(x: CI->getPredicate()); |
| 1004 | if (Search == ICmpWithLibCallTable.end()) |
| 1005 | return false; |
| 1006 | } else if (CV->isZero()) { |
| 1007 | Search = ICmpWithZeroTable.find(x: CI->getPredicate()); |
| 1008 | if (Search == ICmpWithZeroTable.end()) |
| 1009 | return false; |
| 1010 | } else if (CV->isOne()) { |
| 1011 | Search = ICmpWithOneTable.find(x: CI->getPredicate()); |
| 1012 | if (Search == ICmpWithOneTable.end()) |
| 1013 | return false; |
| 1014 | } else if (CV->isMinusOne()) { |
| 1015 | Search = ICmpWithMinusOneTable.find(x: CI->getPredicate()); |
| 1016 | if (Search == ICmpWithMinusOneTable.end()) |
| 1017 | return false; |
| 1018 | } else { |
| 1019 | return false; |
| 1020 | } |
| 1021 | |
| 1022 | setEdgeProbability(Src: BB, Probs: Search->second); |
| 1023 | return true; |
| 1024 | } |
| 1025 | |
| 1026 | bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { |
| 1027 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
| 1028 | if (!BI || !BI->isConditional()) |
| 1029 | return false; |
| 1030 | |
| 1031 | Value *Cond = BI->getCondition(); |
| 1032 | FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cond); |
| 1033 | if (!FCmp) |
| 1034 | return false; |
| 1035 | |
| 1036 | ProbabilityList ProbList; |
| 1037 | if (FCmp->isEquality()) { |
| 1038 | ProbList = !FCmp->isTrueWhenEqual() ? |
| 1039 | // f1 == f2 -> Unlikely |
| 1040 | ProbabilityList({FPTakenProb, FPUntakenProb}) : |
| 1041 | // f1 != f2 -> Likely |
| 1042 | ProbabilityList({FPUntakenProb, FPTakenProb}); |
| 1043 | } else { |
| 1044 | auto Search = FCmpTable.find(x: FCmp->getPredicate()); |
| 1045 | if (Search == FCmpTable.end()) |
| 1046 | return false; |
| 1047 | ProbList = Search->second; |
| 1048 | } |
| 1049 | |
| 1050 | setEdgeProbability(Src: BB, Probs: ProbList); |
| 1051 | return true; |
| 1052 | } |
| 1053 | |
| 1054 | void BranchProbabilityInfo::releaseMemory() { |
| 1055 | Probs.clear(); |
| 1056 | Handles.clear(); |
| 1057 | } |
| 1058 | |
| 1059 | bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, |
| 1060 | FunctionAnalysisManager::Invalidator &) { |
| 1061 | // Check whether the analysis, all analyses on functions, or the function's |
| 1062 | // CFG have been preserved. |
| 1063 | auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); |
| 1064 | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || |
| 1065 | PAC.preservedSet<CFGAnalyses>()); |
| 1066 | } |
| 1067 | |
| 1068 | void BranchProbabilityInfo::print(raw_ostream &OS) const { |
| 1069 | OS << "---- Branch Probabilities ----\n" ; |
| 1070 | // We print the probabilities from the last function the analysis ran over, |
| 1071 | // or the function it is currently running over. |
| 1072 | assert(LastF && "Cannot print prior to running over a function" ); |
| 1073 | for (const auto &BI : *LastF) { |
| 1074 | for (const BasicBlock *Succ : successors(BB: &BI)) |
| 1075 | printEdgeProbability(OS&: OS << " " , Src: &BI, Dst: Succ); |
| 1076 | } |
| 1077 | } |
| 1078 | |
| 1079 | bool BranchProbabilityInfo:: |
| 1080 | isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { |
| 1081 | // Hot probability is at least 4/5 = 80% |
| 1082 | // FIXME: Compare against a static "hot" BranchProbability. |
| 1083 | return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); |
| 1084 | } |
| 1085 | |
| 1086 | /// Get the raw edge probability for the edge. If can't find it, return a |
| 1087 | /// default probability 1/N where N is the number of successors. Here an edge is |
| 1088 | /// specified using PredBlock and an |
| 1089 | /// index to the successors. |
| 1090 | BranchProbability |
| 1091 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1092 | unsigned IndexInSuccessors) const { |
| 1093 | auto I = Probs.find(Val: std::make_pair(x&: Src, y&: IndexInSuccessors)); |
| 1094 | assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == |
| 1095 | (Probs.end() == I) && |
| 1096 | "Probability for I-th successor must always be defined along with the " |
| 1097 | "probability for the first successor" ); |
| 1098 | |
| 1099 | if (I != Probs.end()) |
| 1100 | return I->second; |
| 1101 | |
| 1102 | return {1, static_cast<uint32_t>(succ_size(BB: Src))}; |
| 1103 | } |
| 1104 | |
| 1105 | BranchProbability |
| 1106 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1107 | const_succ_iterator Dst) const { |
| 1108 | return getEdgeProbability(Src, IndexInSuccessors: Dst.getSuccessorIndex()); |
| 1109 | } |
| 1110 | |
| 1111 | /// Get the raw edge probability calculated for the block pair. This returns the |
| 1112 | /// sum of all raw edge probabilities from Src to Dst. |
| 1113 | BranchProbability |
| 1114 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1115 | const BasicBlock *Dst) const { |
| 1116 | if (!Probs.count(Val: std::make_pair(x&: Src, y: 0))) |
| 1117 | return BranchProbability(llvm::count(Range: successors(BB: Src), Element: Dst), succ_size(BB: Src)); |
| 1118 | |
| 1119 | auto Prob = BranchProbability::getZero(); |
| 1120 | for (const_succ_iterator I = succ_begin(BB: Src), E = succ_end(BB: Src); I != E; ++I) |
| 1121 | if (*I == Dst) |
| 1122 | Prob += Probs.find(Val: std::make_pair(x&: Src, y: I.getSuccessorIndex()))->second; |
| 1123 | |
| 1124 | return Prob; |
| 1125 | } |
| 1126 | |
| 1127 | /// Set the edge probability for all edges at once. |
| 1128 | void BranchProbabilityInfo::setEdgeProbability( |
| 1129 | const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { |
| 1130 | assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); |
| 1131 | eraseBlock(BB: Src); // Erase stale data if any. |
| 1132 | if (Probs.size() == 0) |
| 1133 | return; // Nothing to set. |
| 1134 | |
| 1135 | Handles.insert(V: BasicBlockCallbackVH(Src, this)); |
| 1136 | uint64_t TotalNumerator = 0; |
| 1137 | for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { |
| 1138 | this->Probs[std::make_pair(x&: Src, y&: SuccIdx)] = Probs[SuccIdx]; |
| 1139 | LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx |
| 1140 | << " successor probability to " << Probs[SuccIdx] |
| 1141 | << "\n" ); |
| 1142 | TotalNumerator += Probs[SuccIdx].getNumerator(); |
| 1143 | } |
| 1144 | |
| 1145 | // Because of rounding errors the total probability cannot be checked to be |
| 1146 | // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. |
| 1147 | // Instead, every single probability in Probs must be as accurate as possible. |
| 1148 | // This results in error 1/denominator at most, thus the total absolute error |
| 1149 | // should be within Probs.size / BranchProbability::getDenominator. |
| 1150 | assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); |
| 1151 | assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); |
| 1152 | (void)TotalNumerator; |
| 1153 | } |
| 1154 | |
| 1155 | void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, |
| 1156 | BasicBlock *Dst) { |
| 1157 | eraseBlock(BB: Dst); // Erase stale data if any. |
| 1158 | unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); |
| 1159 | assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); |
| 1160 | if (NumSuccessors == 0) |
| 1161 | return; // Nothing to set. |
| 1162 | if (!this->Probs.contains(Val: std::make_pair(x&: Src, y: 0))) |
| 1163 | return; // No probability is set for edges from Src. Keep the same for Dst. |
| 1164 | |
| 1165 | Handles.insert(V: BasicBlockCallbackVH(Dst, this)); |
| 1166 | for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { |
| 1167 | auto Prob = this->Probs[std::make_pair(x&: Src, y&: SuccIdx)]; |
| 1168 | this->Probs[std::make_pair(x&: Dst, y&: SuccIdx)] = Prob; |
| 1169 | LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx |
| 1170 | << " successor probability to " << Prob << "\n" ); |
| 1171 | } |
| 1172 | } |
| 1173 | |
| 1174 | void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) { |
| 1175 | assert(Src->getTerminator()->getNumSuccessors() == 2); |
| 1176 | auto It0 = Probs.find(Val: std::make_pair(x&: Src, y: 0)); |
| 1177 | if (It0 == Probs.end()) |
| 1178 | return; // No probability is set for edges from Src |
| 1179 | auto It1 = Probs.find(Val: std::make_pair(x&: Src, y: 1)); |
| 1180 | assert(It1 != Probs.end()); |
| 1181 | std::swap(a&: It0->second, b&: It1->second); |
| 1182 | } |
| 1183 | |
| 1184 | raw_ostream & |
| 1185 | BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, |
| 1186 | const BasicBlock *Src, |
| 1187 | const BasicBlock *Dst) const { |
| 1188 | const BranchProbability Prob = getEdgeProbability(Src, Dst); |
| 1189 | OS << "edge " ; |
| 1190 | Src->printAsOperand(O&: OS, PrintType: false, M: Src->getModule()); |
| 1191 | OS << " -> " ; |
| 1192 | Dst->printAsOperand(O&: OS, PrintType: false, M: Dst->getModule()); |
| 1193 | OS << " probability is " << Prob |
| 1194 | << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n" ); |
| 1195 | |
| 1196 | return OS; |
| 1197 | } |
| 1198 | |
| 1199 | void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { |
| 1200 | LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n" ); |
| 1201 | |
| 1202 | // Note that we cannot use successors of BB because the terminator of BB may |
| 1203 | // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. |
| 1204 | // Instead we remove prob data for the block by iterating successors by their |
| 1205 | // indices from 0 till the last which exists. There could not be prob data for |
| 1206 | // a pair (BB, N) if there is no data for (BB, N-1) because the data is always |
| 1207 | // set for all successors from 0 to M at once by the method |
| 1208 | // setEdgeProbability(). |
| 1209 | Handles.erase(V: BasicBlockCallbackVH(BB, this)); |
| 1210 | for (unsigned I = 0;; ++I) { |
| 1211 | auto MapI = Probs.find(Val: std::make_pair(x&: BB, y&: I)); |
| 1212 | if (MapI == Probs.end()) { |
| 1213 | assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && |
| 1214 | "Must be no more successors" ); |
| 1215 | return; |
| 1216 | } |
| 1217 | Probs.erase(I: MapI); |
| 1218 | } |
| 1219 | } |
| 1220 | |
| 1221 | void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, |
| 1222 | const TargetLibraryInfo *TLI, |
| 1223 | DominatorTree *DT, |
| 1224 | PostDominatorTree *PDT) { |
| 1225 | LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() |
| 1226 | << " ----\n\n" ); |
| 1227 | LastF = &F; // Store the last function we ran on for printing. |
| 1228 | LI = &LoopI; |
| 1229 | |
| 1230 | SccI = std::make_unique<SccInfo>(args: F); |
| 1231 | |
| 1232 | assert(EstimatedBlockWeight.empty()); |
| 1233 | assert(EstimatedLoopWeight.empty()); |
| 1234 | |
| 1235 | std::unique_ptr<DominatorTree> DTPtr; |
| 1236 | std::unique_ptr<PostDominatorTree> PDTPtr; |
| 1237 | |
| 1238 | if (!DT) { |
| 1239 | DTPtr = std::make_unique<DominatorTree>(args&: const_cast<Function &>(F)); |
| 1240 | DT = DTPtr.get(); |
| 1241 | } |
| 1242 | |
| 1243 | if (!PDT) { |
| 1244 | PDTPtr = std::make_unique<PostDominatorTree>(args&: const_cast<Function &>(F)); |
| 1245 | PDT = PDTPtr.get(); |
| 1246 | } |
| 1247 | |
| 1248 | estimateBlockWeights(F, DT, PDT); |
| 1249 | |
| 1250 | // Walk the basic blocks in post-order so that we can build up state about |
| 1251 | // the successors of a block iteratively. |
| 1252 | for (const auto *BB : post_order(G: &F.getEntryBlock())) { |
| 1253 | LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() |
| 1254 | << "\n" ); |
| 1255 | // If there is no at least two successors, no sense to set probability. |
| 1256 | if (BB->getTerminator()->getNumSuccessors() < 2) |
| 1257 | continue; |
| 1258 | if (calcMetadataWeights(BB)) |
| 1259 | continue; |
| 1260 | if (calcEstimatedHeuristics(BB)) |
| 1261 | continue; |
| 1262 | if (calcPointerHeuristics(BB)) |
| 1263 | continue; |
| 1264 | if (calcZeroHeuristics(BB, TLI)) |
| 1265 | continue; |
| 1266 | if (calcFloatingPointHeuristics(BB)) |
| 1267 | continue; |
| 1268 | } |
| 1269 | |
| 1270 | EstimatedLoopWeight.clear(); |
| 1271 | EstimatedBlockWeight.clear(); |
| 1272 | SccI.reset(); |
| 1273 | |
| 1274 | if (PrintBranchProb && (PrintBranchProbFuncName.empty() || |
| 1275 | F.getName() == PrintBranchProbFuncName)) { |
| 1276 | print(OS&: dbgs()); |
| 1277 | } |
| 1278 | } |
| 1279 | |
| 1280 | void BranchProbabilityInfoWrapperPass::getAnalysisUsage( |
| 1281 | AnalysisUsage &AU) const { |
| 1282 | // We require DT so it's available when LI is available. The LI updating code |
| 1283 | // asserts that DT is also present so if we don't make sure that we have DT |
| 1284 | // here, that assert will trigger. |
| 1285 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 1286 | AU.addRequired<LoopInfoWrapperPass>(); |
| 1287 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| 1288 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 1289 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
| 1290 | AU.setPreservesAll(); |
| 1291 | } |
| 1292 | |
| 1293 | bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { |
| 1294 | const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 1295 | const TargetLibraryInfo &TLI = |
| 1296 | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| 1297 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 1298 | PostDominatorTree &PDT = |
| 1299 | getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
| 1300 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
| 1301 | return false; |
| 1302 | } |
| 1303 | |
| 1304 | void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } |
| 1305 | |
| 1306 | void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, |
| 1307 | const Module *) const { |
| 1308 | BPI.print(OS); |
| 1309 | } |
| 1310 | |
| 1311 | AnalysisKey BranchProbabilityAnalysis::Key; |
| 1312 | BranchProbabilityInfo |
| 1313 | BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { |
| 1314 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 1315 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
| 1316 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 1317 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
| 1318 | BranchProbabilityInfo BPI; |
| 1319 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
| 1320 | return BPI; |
| 1321 | } |
| 1322 | |
| 1323 | PreservedAnalyses |
| 1324 | BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 1325 | OS << "Printing analysis 'Branch Probability Analysis' for function '" |
| 1326 | << F.getName() << "':\n" ; |
| 1327 | AM.getResult<BranchProbabilityAnalysis>(IR&: F).print(OS); |
| 1328 | return PreservedAnalyses::all(); |
| 1329 | } |
| 1330 | |