| 1 | //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Loops should be simplified before this analysis. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
| 14 | #include "llvm/ADT/PostOrderIterator.h" |
| 15 | #include "llvm/ADT/SCCIterator.h" |
| 16 | #include "llvm/ADT/STLExtras.h" |
| 17 | #include "llvm/ADT/SmallVector.h" |
| 18 | #include "llvm/Analysis/ConstantFolding.h" |
| 19 | #include "llvm/Analysis/LoopInfo.h" |
| 20 | #include "llvm/Analysis/PostDominators.h" |
| 21 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 22 | #include "llvm/IR/Attributes.h" |
| 23 | #include "llvm/IR/BasicBlock.h" |
| 24 | #include "llvm/IR/CFG.h" |
| 25 | #include "llvm/IR/Constants.h" |
| 26 | #include "llvm/IR/Dominators.h" |
| 27 | #include "llvm/IR/Function.h" |
| 28 | #include "llvm/IR/InstrTypes.h" |
| 29 | #include "llvm/IR/Instruction.h" |
| 30 | #include "llvm/IR/Instructions.h" |
| 31 | #include "llvm/IR/LLVMContext.h" |
| 32 | #include "llvm/IR/Metadata.h" |
| 33 | #include "llvm/IR/PassManager.h" |
| 34 | #include "llvm/IR/ProfDataUtils.h" |
| 35 | #include "llvm/IR/Type.h" |
| 36 | #include "llvm/IR/Value.h" |
| 37 | #include "llvm/InitializePasses.h" |
| 38 | #include "llvm/Pass.h" |
| 39 | #include "llvm/Support/BranchProbability.h" |
| 40 | #include "llvm/Support/Casting.h" |
| 41 | #include "llvm/Support/CommandLine.h" |
| 42 | #include "llvm/Support/Debug.h" |
| 43 | #include "llvm/Support/raw_ostream.h" |
| 44 | #include <cassert> |
| 45 | #include <cstdint> |
| 46 | #include <map> |
| 47 | #include <utility> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | |
| 51 | #define DEBUG_TYPE "branch-prob" |
| 52 | |
| 53 | static cl::opt<bool> PrintBranchProb( |
| 54 | "print-bpi" , cl::init(Val: false), cl::Hidden, |
| 55 | cl::desc("Print the branch probability info." )); |
| 56 | |
| 57 | static cl::opt<std::string> PrintBranchProbFuncName( |
| 58 | "print-bpi-func-name" , cl::Hidden, |
| 59 | cl::desc("The option to specify the name of the function " |
| 60 | "whose branch probability info is printed." )); |
| 61 | |
| 62 | INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob" , |
| 63 | "Branch Probability Analysis" , false, true) |
| 64 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 65 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 66 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 67 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
| 68 | INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob" , |
| 69 | "Branch Probability Analysis" , false, true) |
| 70 | |
| 71 | BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() |
| 72 | : FunctionPass(ID) {} |
| 73 | |
| 74 | char BranchProbabilityInfoWrapperPass::ID = 0; |
| 75 | |
| 76 | // Weights are for internal use only. They are used by heuristics to help to |
| 77 | // estimate edges' probability. Example: |
| 78 | // |
| 79 | // Using "Loop Branch Heuristics" we predict weights of edges for the |
| 80 | // block BB2. |
| 81 | // ... |
| 82 | // | |
| 83 | // V |
| 84 | // BB1<-+ |
| 85 | // | | |
| 86 | // | | (Weight = 124) |
| 87 | // V | |
| 88 | // BB2--+ |
| 89 | // | |
| 90 | // | (Weight = 4) |
| 91 | // V |
| 92 | // BB3 |
| 93 | // |
| 94 | // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 |
| 95 | // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 |
| 96 | static const uint32_t LBH_TAKEN_WEIGHT = 124; |
| 97 | static const uint32_t LBH_NONTAKEN_WEIGHT = 4; |
| 98 | |
| 99 | /// Unreachable-terminating branch taken probability. |
| 100 | /// |
| 101 | /// This is the probability for a branch being taken to a block that terminates |
| 102 | /// (eventually) in unreachable. These are predicted as unlikely as possible. |
| 103 | /// All reachable probability will proportionally share the remaining part. |
| 104 | static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(N: 1); |
| 105 | |
| 106 | /// Heuristics and lookup tables for non-loop branches: |
| 107 | /// Pointer Heuristics (PH) |
| 108 | static const uint32_t PH_TAKEN_WEIGHT = 20; |
| 109 | static const uint32_t PH_NONTAKEN_WEIGHT = 12; |
| 110 | static const BranchProbability |
| 111 | PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
| 112 | static const BranchProbability |
| 113 | PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
| 114 | |
| 115 | using ProbabilityList = SmallVector<BranchProbability>; |
| 116 | using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; |
| 117 | |
| 118 | /// Pointer comparisons: |
| 119 | static const ProbabilityTable PointerTable{ |
| 120 | {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely |
| 121 | {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely |
| 122 | }; |
| 123 | |
| 124 | /// Zero Heuristics (ZH) |
| 125 | static const uint32_t ZH_TAKEN_WEIGHT = 20; |
| 126 | static const uint32_t ZH_NONTAKEN_WEIGHT = 12; |
| 127 | static const BranchProbability |
| 128 | ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
| 129 | static const BranchProbability |
| 130 | ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
| 131 | |
| 132 | /// Integer compares with 0: |
| 133 | static const ProbabilityTable ICmpWithZeroTable{ |
| 134 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely |
| 135 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely |
| 136 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely |
| 137 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely |
| 138 | }; |
| 139 | |
| 140 | /// Integer compares with -1: |
| 141 | static const ProbabilityTable ICmpWithMinusOneTable{ |
| 142 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely |
| 143 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely |
| 144 | // InstCombine canonicalizes X >= 0 into X > -1 |
| 145 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely |
| 146 | }; |
| 147 | |
| 148 | /// Integer compares with 1: |
| 149 | static const ProbabilityTable ICmpWithOneTable{ |
| 150 | // InstCombine canonicalizes X <= 0 into X < 1 |
| 151 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely |
| 152 | }; |
| 153 | |
| 154 | /// strcmp and similar functions return zero, negative, or positive, if the |
| 155 | /// first string is equal, less, or greater than the second. We consider it |
| 156 | /// likely that the strings are not equal, so a comparison with zero is |
| 157 | /// probably false, but also a comparison with any other number is also |
| 158 | /// probably false given that what exactly is returned for nonzero values is |
| 159 | /// not specified. Any kind of comparison other than equality we know |
| 160 | /// nothing about. |
| 161 | static const ProbabilityTable ICmpWithLibCallTable{ |
| 162 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, |
| 163 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, |
| 164 | }; |
| 165 | |
| 166 | // Floating-Point Heuristics (FPH) |
| 167 | static const uint32_t FPH_TAKEN_WEIGHT = 20; |
| 168 | static const uint32_t FPH_NONTAKEN_WEIGHT = 12; |
| 169 | |
| 170 | /// This is the probability for an ordered floating point comparison. |
| 171 | static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; |
| 172 | /// This is the probability for an unordered floating point comparison, it means |
| 173 | /// one or two of the operands are NaN. Usually it is used to test for an |
| 174 | /// exceptional case, so the result is unlikely. |
| 175 | static const uint32_t FPH_UNO_WEIGHT = 1; |
| 176 | |
| 177 | static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, |
| 178 | FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
| 179 | static const BranchProbability |
| 180 | FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
| 181 | static const BranchProbability |
| 182 | FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
| 183 | static const BranchProbability |
| 184 | FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
| 185 | |
| 186 | /// Floating-Point compares: |
| 187 | static const ProbabilityTable FCmpTable{ |
| 188 | {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely |
| 189 | {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely |
| 190 | }; |
| 191 | |
| 192 | /// Set of dedicated "absolute" execution weights for a block. These weights are |
| 193 | /// meaningful relative to each other and their derivatives only. |
| 194 | enum class BlockExecWeight : std::uint32_t { |
| 195 | /// Special weight used for cases with exact zero probability. |
| 196 | ZERO = 0x0, |
| 197 | /// Minimal possible non zero weight. |
| 198 | LOWEST_NON_ZERO = 0x1, |
| 199 | /// Weight to an 'unreachable' block. |
| 200 | UNREACHABLE = ZERO, |
| 201 | /// Weight to a block containing non returning call. |
| 202 | NORETURN = LOWEST_NON_ZERO, |
| 203 | /// Weight to 'unwind' block of an invoke instruction. |
| 204 | UNWIND = LOWEST_NON_ZERO, |
| 205 | /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked |
| 206 | /// with attribute 'cold'. |
| 207 | COLD = 0xffff, |
| 208 | /// Default weight is used in cases when there is no dedicated execution |
| 209 | /// weight set. It is not propagated through the domination line either. |
| 210 | DEFAULT = 0xfffff |
| 211 | }; |
| 212 | |
| 213 | namespace { |
| 214 | class BPIConstruction { |
| 215 | public: |
| 216 | BPIConstruction(BranchProbabilityInfo &BPI) : BPI(BPI) {} |
| 217 | void calculate(const Function &F, const LoopInfo &LI, |
| 218 | const TargetLibraryInfo *TLI, DominatorTree *DT, |
| 219 | PostDominatorTree *PDT); |
| 220 | |
| 221 | private: |
| 222 | // Data structure to track SCCs for handling irreducible loops. |
| 223 | class SccInfo { |
| 224 | // Enum of types to classify basic blocks in SCC. Basic block belonging to |
| 225 | // SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a |
| 226 | // basic block can be 'Header' and 'Exiting' at the same time. |
| 227 | enum SccBlockType { |
| 228 | Inner = 0x0, |
| 229 | = 0x1, |
| 230 | Exiting = 0x2, |
| 231 | }; |
| 232 | // Map of basic blocks to SCC IDs they belong to. If basic block doesn't |
| 233 | // belong to any SCC it is not in the map. |
| 234 | using SccMap = DenseMap<const BasicBlock *, int>; |
| 235 | // Each basic block in SCC is attributed with one or several types from |
| 236 | // SccBlockType. Map value has uint32_t type (instead of SccBlockType) |
| 237 | // since basic block may be for example "Header" and "Exiting" at the same |
| 238 | // time and we need to be able to keep more than one value from |
| 239 | // SccBlockType. |
| 240 | using SccBlockTypeMap = DenseMap<const BasicBlock *, uint32_t>; |
| 241 | // Vector containing classification of basic blocks for all SCCs where i'th |
| 242 | // vector element corresponds to SCC with ID equal to i. |
| 243 | using SccBlockTypeMaps = std::vector<SccBlockTypeMap>; |
| 244 | |
| 245 | SccMap SccNums; |
| 246 | SccBlockTypeMaps SccBlocks; |
| 247 | |
| 248 | public: |
| 249 | LLVM_ABI explicit SccInfo(const Function &F); |
| 250 | |
| 251 | /// If \p BB belongs to some SCC then ID of that SCC is returned, otherwise |
| 252 | /// -1 is returned. If \p BB belongs to more than one SCC at the same time |
| 253 | /// result is undefined. |
| 254 | LLVM_ABI int getSCCNum(const BasicBlock *BB) const; |
| 255 | /// Returns true if \p BB is a 'header' block in SCC with \p SccNum ID, |
| 256 | /// false otherwise. |
| 257 | bool (const BasicBlock *BB, int SccNum) const { |
| 258 | return getSccBlockType(BB, SccNum) & Header; |
| 259 | } |
| 260 | /// Returns true if \p BB is an 'exiting' block in SCC with \p SccNum ID, |
| 261 | /// false otherwise. |
| 262 | bool isSCCExitingBlock(const BasicBlock *BB, int SccNum) const { |
| 263 | return getSccBlockType(BB, SccNum) & Exiting; |
| 264 | } |
| 265 | /// Fills in \p Enters vector with all such blocks that don't belong to |
| 266 | /// SCC with \p SccNum ID but there is an edge to a block belonging to the |
| 267 | /// SCC. |
| 268 | LLVM_ABI void |
| 269 | getSccEnterBlocks(int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const; |
| 270 | /// Fills in \p Exits vector with all such blocks that don't belong to |
| 271 | /// SCC with \p SccNum ID but there is an edge from a block belonging to the |
| 272 | /// SCC. |
| 273 | LLVM_ABI void getSccExitBlocks(int SccNum, |
| 274 | SmallVectorImpl<BasicBlock *> &Exits) const; |
| 275 | |
| 276 | private: |
| 277 | /// Returns \p BB's type according to classification given by SccBlockType |
| 278 | /// enum. Please note that \p BB must belong to SSC with \p SccNum ID. |
| 279 | LLVM_ABI uint32_t getSccBlockType(const BasicBlock *BB, int SccNum) const; |
| 280 | /// Calculates \p BB's type and stores it in internal data structures for |
| 281 | /// future use. Please note that \p BB must belong to SSC with \p SccNum ID. |
| 282 | void calculateSccBlockType(const BasicBlock *BB, int SccNum); |
| 283 | }; |
| 284 | |
| 285 | /// Pair of Loop and SCC ID number. Used to unify handling of normal and |
| 286 | /// SCC based loop representations. |
| 287 | using LoopData = std::pair<Loop *, int>; |
| 288 | /// Helper class to keep basic block along with its loop data information. |
| 289 | class LoopBlock { |
| 290 | public: |
| 291 | LLVM_ABI explicit LoopBlock(const BasicBlock *BB, const LoopInfo &LI, |
| 292 | const SccInfo &SccI); |
| 293 | |
| 294 | const BasicBlock *getBlock() const { return BB; } |
| 295 | BasicBlock *getBlock() { return const_cast<BasicBlock *>(BB); } |
| 296 | LoopData getLoopData() const { return LD; } |
| 297 | Loop *getLoop() const { return LD.first; } |
| 298 | int getSccNum() const { return LD.second; } |
| 299 | |
| 300 | bool belongsToLoop() const { return getLoop() || getSccNum() != -1; } |
| 301 | bool belongsToSameLoop(const LoopBlock &LB) const { |
| 302 | return (LB.getLoop() && getLoop() == LB.getLoop()) || |
| 303 | (LB.getSccNum() != -1 && getSccNum() == LB.getSccNum()); |
| 304 | } |
| 305 | |
| 306 | private: |
| 307 | const BasicBlock *const BB = nullptr; |
| 308 | LoopData LD = {nullptr, -1}; |
| 309 | }; |
| 310 | |
| 311 | // Pair of LoopBlocks representing an edge from first to second block. |
| 312 | using LoopEdge = std::pair<const LoopBlock &, const LoopBlock &>; |
| 313 | |
| 314 | /// Helper to construct LoopBlock for \p BB. |
| 315 | LoopBlock getLoopBlock(const BasicBlock *BB) const { |
| 316 | return LoopBlock(BB, *LI, *SccI); |
| 317 | } |
| 318 | |
| 319 | /// Returns true if destination block belongs to some loop and source block is |
| 320 | /// either doesn't belong to any loop or belongs to a loop which is not inner |
| 321 | /// relative to the destination block. |
| 322 | bool isLoopEnteringEdge(const LoopEdge &Edge) const; |
| 323 | /// Returns true if source block belongs to some loop and destination block is |
| 324 | /// either doesn't belong to any loop or belongs to a loop which is not inner |
| 325 | /// relative to the source block. |
| 326 | bool isLoopExitingEdge(const LoopEdge &Edge) const; |
| 327 | /// Returns true if \p Edge is either enters to or exits from some loop, false |
| 328 | /// in all other cases. |
| 329 | bool isLoopEnteringExitingEdge(const LoopEdge &Edge) const; |
| 330 | /// Returns true if source and destination blocks belongs to the same loop and |
| 331 | /// destination block is loop header. |
| 332 | bool isLoopBackEdge(const LoopEdge &Edge) const; |
| 333 | // Fills in \p Enters vector with all "enter" blocks to a loop \LB belongs to. |
| 334 | void getLoopEnterBlocks(const LoopBlock &LB, |
| 335 | SmallVectorImpl<BasicBlock *> &Enters) const; |
| 336 | // Fills in \p Exits vector with all "exit" blocks from a loop \LB belongs to. |
| 337 | void getLoopExitBlocks(const LoopBlock &LB, |
| 338 | SmallVectorImpl<BasicBlock *> &Exits) const; |
| 339 | |
| 340 | /// Returns estimated weight for \p BB. std::nullopt if \p BB has no estimated |
| 341 | /// weight. |
| 342 | std::optional<uint32_t> getEstimatedBlockWeight(const BasicBlock *BB) const; |
| 343 | |
| 344 | /// Returns estimated weight to enter \p L. In other words it is weight of |
| 345 | /// loop's header block not scaled by trip count. Returns std::nullopt if \p L |
| 346 | /// has no no estimated weight. |
| 347 | std::optional<uint32_t> getEstimatedLoopWeight(const LoopData &L) const; |
| 348 | |
| 349 | /// Return estimated weight for \p Edge. Returns std::nullopt if estimated |
| 350 | /// weight is unknown. |
| 351 | std::optional<uint32_t> getEstimatedEdgeWeight(const LoopEdge &Edge) const; |
| 352 | |
| 353 | /// Iterates over all edges leading from \p SrcBB to \p Successors and |
| 354 | /// returns maximum of all estimated weights. If at least one edge has unknown |
| 355 | /// estimated weight std::nullopt is returned. |
| 356 | template <class IterT> |
| 357 | std::optional<uint32_t> |
| 358 | getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB, |
| 359 | iterator_range<IterT> Successors) const; |
| 360 | |
| 361 | /// If \p LoopBB has no estimated weight then set it to \p BBWeight and |
| 362 | /// return true. Otherwise \p BB's weight remains unchanged and false is |
| 363 | /// returned. In addition all blocks/loops that might need their weight to be |
| 364 | /// re-estimated are put into BlockWorkList/LoopWorkList. |
| 365 | bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight, |
| 366 | SmallVectorImpl<BasicBlock *> &BlockWorkList, |
| 367 | SmallVectorImpl<LoopBlock> &LoopWorkList); |
| 368 | |
| 369 | /// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight |
| 370 | /// up the domination tree. |
| 371 | void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT, |
| 372 | PostDominatorTree *PDT, uint32_t BBWeight, |
| 373 | SmallVectorImpl<BasicBlock *> &WorkList, |
| 374 | SmallVectorImpl<LoopBlock> &LoopWorkList); |
| 375 | |
| 376 | /// Returns block's weight encoded in the IR. |
| 377 | std::optional<uint32_t> getInitialEstimatedBlockWeight(const BasicBlock *BB); |
| 378 | |
| 379 | // Computes estimated weights for all blocks in \p F. |
| 380 | void estimateBlockWeights(const Function &F, DominatorTree *DT, |
| 381 | PostDominatorTree *PDT); |
| 382 | |
| 383 | /// Based on computed weights by \p computeEstimatedBlockWeight set |
| 384 | /// probabilities on branches. |
| 385 | bool calcEstimatedHeuristics(const BasicBlock *BB); |
| 386 | bool calcMetadataWeights(const BasicBlock *BB); |
| 387 | bool calcPointerHeuristics(const BasicBlock *BB); |
| 388 | bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); |
| 389 | bool calcFloatingPointHeuristics(const BasicBlock *BB); |
| 390 | |
| 391 | BranchProbabilityInfo &BPI; |
| 392 | |
| 393 | const LoopInfo *LI = nullptr; |
| 394 | |
| 395 | /// Keeps information about all SCCs in a function. |
| 396 | std::unique_ptr<const SccInfo> SccI; |
| 397 | |
| 398 | /// Keeps mapping of a basic block to its estimated weight. |
| 399 | SmallDenseMap<const BasicBlock *, uint32_t> EstimatedBlockWeight; |
| 400 | |
| 401 | /// Keeps mapping of a loop to estimated weight to enter the loop. |
| 402 | SmallDenseMap<LoopData, uint32_t> EstimatedLoopWeight; |
| 403 | }; |
| 404 | |
| 405 | BPIConstruction::SccInfo::SccInfo(const Function &F) { |
| 406 | // Record SCC numbers of blocks in the CFG to identify irreducible loops. |
| 407 | // FIXME: We could only calculate this if the CFG is known to be irreducible |
| 408 | // (perhaps cache this info in LoopInfo if we can easily calculate it there?). |
| 409 | int SccNum = 0; |
| 410 | for (scc_iterator<const Function *> It = scc_begin(G: &F); !It.isAtEnd(); |
| 411 | ++It, ++SccNum) { |
| 412 | // Ignore single-block SCCs since they either aren't loops or LoopInfo will |
| 413 | // catch them. |
| 414 | const std::vector<const BasicBlock *> &Scc = *It; |
| 415 | if (Scc.size() == 1) |
| 416 | continue; |
| 417 | |
| 418 | LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":" ); |
| 419 | for (const auto *BB : Scc) { |
| 420 | LLVM_DEBUG(dbgs() << " " << BB->getName()); |
| 421 | SccNums[BB] = SccNum; |
| 422 | calculateSccBlockType(BB, SccNum); |
| 423 | } |
| 424 | LLVM_DEBUG(dbgs() << "\n" ); |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | int BPIConstruction::SccInfo::getSCCNum(const BasicBlock *BB) const { |
| 429 | auto SccIt = SccNums.find(Val: BB); |
| 430 | if (SccIt == SccNums.end()) |
| 431 | return -1; |
| 432 | return SccIt->second; |
| 433 | } |
| 434 | |
| 435 | void BPIConstruction::SccInfo::getSccEnterBlocks( |
| 436 | int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { |
| 437 | |
| 438 | for (auto MapIt : SccBlocks[SccNum]) { |
| 439 | const auto *BB = MapIt.first; |
| 440 | if (isSCCHeader(BB, SccNum)) |
| 441 | for (const auto *Pred : predecessors(BB)) |
| 442 | if (getSCCNum(BB: Pred) != SccNum) |
| 443 | Enters.push_back(Elt: const_cast<BasicBlock *>(BB)); |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | void BPIConstruction::SccInfo::getSccExitBlocks( |
| 448 | int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { |
| 449 | for (auto MapIt : SccBlocks[SccNum]) { |
| 450 | const auto *BB = MapIt.first; |
| 451 | if (isSCCExitingBlock(BB, SccNum)) |
| 452 | for (const auto *Succ : successors(BB)) |
| 453 | if (getSCCNum(BB: Succ) != SccNum) |
| 454 | Exits.push_back(Elt: const_cast<BasicBlock *>(Succ)); |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | uint32_t BPIConstruction::SccInfo::getSccBlockType(const BasicBlock *BB, |
| 459 | int SccNum) const { |
| 460 | assert(getSCCNum(BB) == SccNum); |
| 461 | |
| 462 | assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC" ); |
| 463 | const auto &SccBlockTypes = SccBlocks[SccNum]; |
| 464 | |
| 465 | auto It = SccBlockTypes.find(Val: BB); |
| 466 | if (It != SccBlockTypes.end()) { |
| 467 | return It->second; |
| 468 | } |
| 469 | return Inner; |
| 470 | } |
| 471 | |
| 472 | void BPIConstruction::SccInfo::calculateSccBlockType(const BasicBlock *BB, |
| 473 | int SccNum) { |
| 474 | assert(getSCCNum(BB) == SccNum); |
| 475 | uint32_t BlockType = Inner; |
| 476 | |
| 477 | if (llvm::any_of(Range: predecessors(BB), P: [&](const BasicBlock *Pred) { |
| 478 | // Consider any block that is an entry point to the SCC as |
| 479 | // a header. |
| 480 | return getSCCNum(BB: Pred) != SccNum; |
| 481 | })) |
| 482 | BlockType |= Header; |
| 483 | |
| 484 | if (llvm::any_of(Range: successors(BB), P: [&](const BasicBlock *Succ) { |
| 485 | return getSCCNum(BB: Succ) != SccNum; |
| 486 | })) |
| 487 | BlockType |= Exiting; |
| 488 | |
| 489 | // Lazily compute the set of headers for a given SCC and cache the results |
| 490 | // in the SccHeaderMap. |
| 491 | if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) |
| 492 | SccBlocks.resize(new_size: SccNum + 1); |
| 493 | auto &SccBlockTypes = SccBlocks[SccNum]; |
| 494 | |
| 495 | if (BlockType != Inner) { |
| 496 | bool IsInserted; |
| 497 | std::tie(args: std::ignore, args&: IsInserted) = |
| 498 | SccBlockTypes.insert(KV: std::make_pair(x&: BB, y&: BlockType)); |
| 499 | assert(IsInserted && "Duplicated block in SCC" ); |
| 500 | } |
| 501 | } |
| 502 | |
| 503 | BPIConstruction::LoopBlock::LoopBlock(const BasicBlock *BB, const LoopInfo &LI, |
| 504 | const SccInfo &SccI) |
| 505 | : BB(BB) { |
| 506 | LD.first = LI.getLoopFor(BB); |
| 507 | if (!LD.first) { |
| 508 | LD.second = SccI.getSCCNum(BB); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | bool BPIConstruction::isLoopEnteringEdge(const LoopEdge &Edge) const { |
| 513 | const auto &SrcBlock = Edge.first; |
| 514 | const auto &DstBlock = Edge.second; |
| 515 | return (DstBlock.getLoop() && |
| 516 | !DstBlock.getLoop()->contains(L: SrcBlock.getLoop())) || |
| 517 | // Assume that SCCs can't be nested. |
| 518 | (DstBlock.getSccNum() != -1 && |
| 519 | SrcBlock.getSccNum() != DstBlock.getSccNum()); |
| 520 | } |
| 521 | |
| 522 | bool BPIConstruction::isLoopExitingEdge(const LoopEdge &Edge) const { |
| 523 | return isLoopEnteringEdge(Edge: {Edge.second, Edge.first}); |
| 524 | } |
| 525 | |
| 526 | bool BPIConstruction::isLoopEnteringExitingEdge(const LoopEdge &Edge) const { |
| 527 | return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); |
| 528 | } |
| 529 | |
| 530 | bool BPIConstruction::isLoopBackEdge(const LoopEdge &Edge) const { |
| 531 | const auto &SrcBlock = Edge.first; |
| 532 | const auto &DstBlock = Edge.second; |
| 533 | return SrcBlock.belongsToSameLoop(LB: DstBlock) && |
| 534 | ((DstBlock.getLoop() && |
| 535 | DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || |
| 536 | (DstBlock.getSccNum() != -1 && |
| 537 | SccI->isSCCHeader(BB: DstBlock.getBlock(), SccNum: DstBlock.getSccNum()))); |
| 538 | } |
| 539 | |
| 540 | void BPIConstruction::getLoopEnterBlocks( |
| 541 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { |
| 542 | if (LB.getLoop()) { |
| 543 | auto * = LB.getLoop()->getHeader(); |
| 544 | Enters.append(in_start: pred_begin(BB: Header), in_end: pred_end(BB: Header)); |
| 545 | } else { |
| 546 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
| 547 | SccI->getSccEnterBlocks(SccNum: LB.getSccNum(), Enters); |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | void BPIConstruction::getLoopExitBlocks( |
| 552 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { |
| 553 | if (LB.getLoop()) { |
| 554 | LB.getLoop()->getExitBlocks(ExitBlocks&: Exits); |
| 555 | } else { |
| 556 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
| 557 | SccI->getSccExitBlocks(SccNum: LB.getSccNum(), Exits); |
| 558 | } |
| 559 | } |
| 560 | |
| 561 | // Propagate existing explicit probabilities from either profile data or |
| 562 | // 'expect' intrinsic processing. Examine metadata against unreachable |
| 563 | // heuristic. The probability of the edge coming to unreachable block is |
| 564 | // set to min of metadata and unreachable heuristic. |
| 565 | bool BPIConstruction::calcMetadataWeights(const BasicBlock *BB) { |
| 566 | const Instruction *TI = BB->getTerminator(); |
| 567 | assert(TI->getNumSuccessors() > 1 && "expected more than one successor!" ); |
| 568 | if (!(isa<CondBrInst>(Val: TI) || isa<SwitchInst>(Val: TI) || isa<IndirectBrInst>(Val: TI) || |
| 569 | isa<InvokeInst>(Val: TI) || isa<CallBrInst>(Val: TI))) |
| 570 | return false; |
| 571 | |
| 572 | MDNode *WeightsNode = getValidBranchWeightMDNode(I: *TI); |
| 573 | if (!WeightsNode) |
| 574 | return false; |
| 575 | |
| 576 | // Check that the number of successors is manageable. |
| 577 | assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors" ); |
| 578 | |
| 579 | // Build up the final weights that will be used in a temporary buffer. |
| 580 | // Compute the sum of all weights to later decide whether they need to |
| 581 | // be scaled to fit in 32 bits. |
| 582 | uint64_t WeightSum = 0; |
| 583 | SmallVector<uint32_t, 2> Weights; |
| 584 | SmallVector<unsigned, 2> UnreachableIdxs; |
| 585 | SmallVector<unsigned, 2> ReachableIdxs; |
| 586 | |
| 587 | extractBranchWeights(ProfileData: WeightsNode, Weights); |
| 588 | auto Succs = succ_begin(I: TI); |
| 589 | for (unsigned I = 0, E = Weights.size(); I != E; ++I) { |
| 590 | WeightSum += Weights[I]; |
| 591 | const LoopBlock SrcLoopBB = getLoopBlock(BB); |
| 592 | const LoopBlock DstLoopBB = getLoopBlock(BB: *Succs++); |
| 593 | auto EstimatedWeight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
| 594 | if (EstimatedWeight && |
| 595 | *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
| 596 | UnreachableIdxs.push_back(Elt: I); |
| 597 | else |
| 598 | ReachableIdxs.push_back(Elt: I); |
| 599 | } |
| 600 | assert(Weights.size() == TI->getNumSuccessors() && "Checked above" ); |
| 601 | |
| 602 | // If the sum of weights does not fit in 32 bits, scale every weight down |
| 603 | // accordingly. |
| 604 | uint64_t ScalingFactor = |
| 605 | (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; |
| 606 | |
| 607 | if (ScalingFactor > 1) { |
| 608 | WeightSum = 0; |
| 609 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
| 610 | Weights[I] /= ScalingFactor; |
| 611 | WeightSum += Weights[I]; |
| 612 | } |
| 613 | } |
| 614 | assert(WeightSum <= UINT32_MAX && |
| 615 | "Expected weights to scale down to 32 bits" ); |
| 616 | |
| 617 | if (WeightSum == 0 || ReachableIdxs.size() == 0) { |
| 618 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
| 619 | Weights[I] = 1; |
| 620 | WeightSum = TI->getNumSuccessors(); |
| 621 | } |
| 622 | |
| 623 | // Set the probability. |
| 624 | SmallVector<BranchProbability, 2> BP; |
| 625 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
| 626 | BP.push_back(Elt: { Weights[I], static_cast<uint32_t>(WeightSum) }); |
| 627 | |
| 628 | // Examine the metadata against unreachable heuristic. |
| 629 | // If the unreachable heuristic is more strong then we use it for this edge. |
| 630 | if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { |
| 631 | BPI.setEdgeProbability(Src: BB, Probs: BP); |
| 632 | return true; |
| 633 | } |
| 634 | |
| 635 | auto UnreachableProb = UR_TAKEN_PROB; |
| 636 | for (auto I : UnreachableIdxs) |
| 637 | if (UnreachableProb < BP[I]) { |
| 638 | BP[I] = UnreachableProb; |
| 639 | } |
| 640 | |
| 641 | // Sum of all edge probabilities must be 1.0. If we modified the probability |
| 642 | // of some edges then we must distribute the introduced difference over the |
| 643 | // reachable blocks. |
| 644 | // |
| 645 | // Proportional distribution: the relation between probabilities of the |
| 646 | // reachable edges is kept unchanged. That is for any reachable edges i and j: |
| 647 | // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => |
| 648 | // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K |
| 649 | // Where K is independent of i,j. |
| 650 | // newBP[i] == oldBP[i] * K |
| 651 | // We need to find K. |
| 652 | // Make sum of all reachables of the left and right parts: |
| 653 | // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) |
| 654 | // Sum of newBP must be equal to 1.0: |
| 655 | // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => |
| 656 | // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) |
| 657 | // Where sum_of_unreachable(newBP) is what has been just changed. |
| 658 | // Finally: |
| 659 | // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => |
| 660 | // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) |
| 661 | BranchProbability NewUnreachableSum = BranchProbability::getZero(); |
| 662 | for (auto I : UnreachableIdxs) |
| 663 | NewUnreachableSum += BP[I]; |
| 664 | |
| 665 | BranchProbability NewReachableSum = |
| 666 | BranchProbability::getOne() - NewUnreachableSum; |
| 667 | |
| 668 | BranchProbability OldReachableSum = BranchProbability::getZero(); |
| 669 | for (auto I : ReachableIdxs) |
| 670 | OldReachableSum += BP[I]; |
| 671 | |
| 672 | if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? |
| 673 | if (OldReachableSum.isZero()) { |
| 674 | // If all oldBP[i] are zeroes then the proportional distribution results |
| 675 | // in all zero probabilities and the error stays big. In this case we |
| 676 | // evenly spread NewReachableSum over the reachable edges. |
| 677 | BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); |
| 678 | for (auto I : ReachableIdxs) |
| 679 | BP[I] = PerEdge; |
| 680 | } else { |
| 681 | for (auto I : ReachableIdxs) { |
| 682 | // We use uint64_t to avoid double rounding error of the following |
| 683 | // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum |
| 684 | // The formula is taken from the private constructor |
| 685 | // BranchProbability(uint32_t Numerator, uint32_t Denominator) |
| 686 | uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * |
| 687 | BP[I].getNumerator(); |
| 688 | uint32_t Div = static_cast<uint32_t>( |
| 689 | divideNearest(Numerator: Mul, Denominator: OldReachableSum.getNumerator())); |
| 690 | BP[I] = BranchProbability::getRaw(N: Div); |
| 691 | } |
| 692 | } |
| 693 | } |
| 694 | |
| 695 | BPI.setEdgeProbability(Src: BB, Probs: BP); |
| 696 | |
| 697 | return true; |
| 698 | } |
| 699 | |
| 700 | // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison |
| 701 | // between two pointer or pointer and NULL will fail. |
| 702 | bool BPIConstruction::calcPointerHeuristics(const BasicBlock *BB) { |
| 703 | const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator()); |
| 704 | if (!BI) |
| 705 | return false; |
| 706 | |
| 707 | Value *Cond = BI->getCondition(); |
| 708 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
| 709 | if (!CI || !CI->isEquality()) |
| 710 | return false; |
| 711 | |
| 712 | Value *LHS = CI->getOperand(i_nocapture: 0); |
| 713 | |
| 714 | if (!LHS->getType()->isPointerTy()) |
| 715 | return false; |
| 716 | |
| 717 | assert(CI->getOperand(1)->getType()->isPointerTy()); |
| 718 | |
| 719 | auto Search = PointerTable.find(x: CI->getPredicate()); |
| 720 | if (Search == PointerTable.end()) |
| 721 | return false; |
| 722 | BPI.setEdgeProbability(Src: BB, Probs: Search->second); |
| 723 | return true; |
| 724 | } |
| 725 | |
| 726 | // Compute the unlikely successors to the block BB in the loop L, specifically |
| 727 | // those that are unlikely because this is a loop, and add them to the |
| 728 | // UnlikelyBlocks set. |
| 729 | static void |
| 730 | computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, |
| 731 | SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { |
| 732 | // Sometimes in a loop we have a branch whose condition is made false by |
| 733 | // taking it. This is typically something like |
| 734 | // int n = 0; |
| 735 | // while (...) { |
| 736 | // if (++n >= MAX) { |
| 737 | // n = 0; |
| 738 | // } |
| 739 | // } |
| 740 | // In this sort of situation taking the branch means that at the very least it |
| 741 | // won't be taken again in the next iteration of the loop, so we should |
| 742 | // consider it less likely than a typical branch. |
| 743 | // |
| 744 | // We detect this by looking back through the graph of PHI nodes that sets the |
| 745 | // value that the condition depends on, and seeing if we can reach a successor |
| 746 | // block which can be determined to make the condition false. |
| 747 | // |
| 748 | // FIXME: We currently consider unlikely blocks to be half as likely as other |
| 749 | // blocks, but if we consider the example above the likelyhood is actually |
| 750 | // 1/MAX. We could therefore be more precise in how unlikely we consider |
| 751 | // blocks to be, but it would require more careful examination of the form |
| 752 | // of the comparison expression. |
| 753 | const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator()); |
| 754 | if (!BI) |
| 755 | return; |
| 756 | |
| 757 | // Check if the branch is based on an instruction compared with a constant |
| 758 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
| 759 | if (!CI || !isa<Instruction>(Val: CI->getOperand(i_nocapture: 0)) || |
| 760 | !isa<Constant>(Val: CI->getOperand(i_nocapture: 1))) |
| 761 | return; |
| 762 | |
| 763 | // Either the instruction must be a PHI, or a chain of operations involving |
| 764 | // constants that ends in a PHI which we can then collapse into a single value |
| 765 | // if the PHI value is known. |
| 766 | Instruction *CmpLHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0)); |
| 767 | PHINode *CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
| 768 | Constant *CmpConst = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 1)); |
| 769 | // Collect the instructions until we hit a PHI |
| 770 | SmallVector<BinaryOperator *, 1> InstChain; |
| 771 | while (!CmpPHI && CmpLHS && isa<BinaryOperator>(Val: CmpLHS) && |
| 772 | isa<Constant>(Val: CmpLHS->getOperand(i: 1))) { |
| 773 | // Stop if the chain extends outside of the loop |
| 774 | if (!L->contains(Inst: CmpLHS)) |
| 775 | return; |
| 776 | InstChain.push_back(Elt: cast<BinaryOperator>(Val: CmpLHS)); |
| 777 | CmpLHS = dyn_cast<Instruction>(Val: CmpLHS->getOperand(i: 0)); |
| 778 | if (CmpLHS) |
| 779 | CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
| 780 | } |
| 781 | if (!CmpPHI || !L->contains(Inst: CmpPHI)) |
| 782 | return; |
| 783 | |
| 784 | // Trace the phi node to find all values that come from successors of BB |
| 785 | SmallPtrSet<PHINode*, 8> VisitedInsts; |
| 786 | SmallVector<PHINode*, 8> WorkList; |
| 787 | WorkList.push_back(Elt: CmpPHI); |
| 788 | VisitedInsts.insert(Ptr: CmpPHI); |
| 789 | while (!WorkList.empty()) { |
| 790 | PHINode *P = WorkList.pop_back_val(); |
| 791 | for (BasicBlock *B : P->blocks()) { |
| 792 | // Skip blocks that aren't part of the loop |
| 793 | if (!L->contains(BB: B)) |
| 794 | continue; |
| 795 | Value *V = P->getIncomingValueForBlock(BB: B); |
| 796 | // If the source is a PHI add it to the work list if we haven't |
| 797 | // already visited it. |
| 798 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
| 799 | if (VisitedInsts.insert(Ptr: PN).second) |
| 800 | WorkList.push_back(Elt: PN); |
| 801 | continue; |
| 802 | } |
| 803 | // If this incoming value is a constant and B is a successor of BB, then |
| 804 | // we can constant-evaluate the compare to see if it makes the branch be |
| 805 | // taken or not. |
| 806 | Constant *CmpLHSConst = dyn_cast<Constant>(Val: V); |
| 807 | if (!CmpLHSConst || !llvm::is_contained(Range: successors(BB), Element: B)) |
| 808 | continue; |
| 809 | // First collapse InstChain |
| 810 | const DataLayout &DL = BB->getDataLayout(); |
| 811 | for (Instruction *I : llvm::reverse(C&: InstChain)) { |
| 812 | CmpLHSConst = ConstantFoldBinaryOpOperands( |
| 813 | Opcode: I->getOpcode(), LHS: CmpLHSConst, RHS: cast<Constant>(Val: I->getOperand(i: 1)), DL); |
| 814 | if (!CmpLHSConst) |
| 815 | break; |
| 816 | } |
| 817 | if (!CmpLHSConst) |
| 818 | continue; |
| 819 | // Now constant-evaluate the compare |
| 820 | Constant *Result = ConstantFoldCompareInstOperands( |
| 821 | Predicate: CI->getPredicate(), LHS: CmpLHSConst, RHS: CmpConst, DL); |
| 822 | // If the result means we don't branch to the block then that block is |
| 823 | // unlikely. |
| 824 | if (Result && ((Result->isNullValue() && B == BI->getSuccessor(i: 0)) || |
| 825 | (Result->isOneValue() && B == BI->getSuccessor(i: 1)))) |
| 826 | UnlikelyBlocks.insert(Ptr: B); |
| 827 | } |
| 828 | } |
| 829 | } |
| 830 | |
| 831 | std::optional<uint32_t> |
| 832 | BPIConstruction::getEstimatedBlockWeight(const BasicBlock *BB) const { |
| 833 | auto WeightIt = EstimatedBlockWeight.find(Val: BB); |
| 834 | if (WeightIt == EstimatedBlockWeight.end()) |
| 835 | return std::nullopt; |
| 836 | return WeightIt->second; |
| 837 | } |
| 838 | |
| 839 | std::optional<uint32_t> |
| 840 | BPIConstruction::getEstimatedLoopWeight(const LoopData &L) const { |
| 841 | auto WeightIt = EstimatedLoopWeight.find(Val: L); |
| 842 | if (WeightIt == EstimatedLoopWeight.end()) |
| 843 | return std::nullopt; |
| 844 | return WeightIt->second; |
| 845 | } |
| 846 | |
| 847 | std::optional<uint32_t> |
| 848 | BPIConstruction::getEstimatedEdgeWeight(const LoopEdge &Edge) const { |
| 849 | // For edges entering a loop take weight of a loop rather than an individual |
| 850 | // block in the loop. |
| 851 | return isLoopEnteringEdge(Edge) |
| 852 | ? getEstimatedLoopWeight(L: Edge.second.getLoopData()) |
| 853 | : getEstimatedBlockWeight(BB: Edge.second.getBlock()); |
| 854 | } |
| 855 | |
| 856 | template <class IterT> |
| 857 | std::optional<uint32_t> BPIConstruction::getMaxEstimatedEdgeWeight( |
| 858 | const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { |
| 859 | std::optional<uint32_t> MaxWeight; |
| 860 | for (const BasicBlock *DstBB : Successors) { |
| 861 | const LoopBlock DstLoopBB = getLoopBlock(BB: DstBB); |
| 862 | auto Weight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
| 863 | |
| 864 | if (!Weight) |
| 865 | return std::nullopt; |
| 866 | |
| 867 | if (!MaxWeight || *MaxWeight < *Weight) |
| 868 | MaxWeight = Weight; |
| 869 | } |
| 870 | |
| 871 | return MaxWeight; |
| 872 | } |
| 873 | |
| 874 | // Updates \p LoopBB's weight and returns true. If \p LoopBB has already |
| 875 | // an associated weight it is unchanged and false is returned. |
| 876 | // |
| 877 | // Please note by the algorithm the weight is not expected to change once set |
| 878 | // thus 'false' status is used to track visited blocks. |
| 879 | bool BPIConstruction::updateEstimatedBlockWeight( |
| 880 | LoopBlock &LoopBB, uint32_t BBWeight, |
| 881 | SmallVectorImpl<BasicBlock *> &BlockWorkList, |
| 882 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
| 883 | BasicBlock *BB = LoopBB.getBlock(); |
| 884 | |
| 885 | // In general, weight is assigned to a block when it has final value and |
| 886 | // can't/shouldn't be changed. However, there are cases when a block |
| 887 | // inherently has several (possibly "contradicting") weights. For example, |
| 888 | // "unwind" block may also contain "cold" call. In that case the first |
| 889 | // set weight is favored and all consequent weights are ignored. |
| 890 | if (!EstimatedBlockWeight.insert(KV: {BB, BBWeight}).second) |
| 891 | return false; |
| 892 | |
| 893 | for (BasicBlock *PredBlock : predecessors(BB)) { |
| 894 | LoopBlock PredLoop = getLoopBlock(BB: PredBlock); |
| 895 | // Add affected block/loop to a working list. |
| 896 | if (isLoopExitingEdge(Edge: {PredLoop, LoopBB})) { |
| 897 | if (!EstimatedLoopWeight.count(Val: PredLoop.getLoopData())) |
| 898 | LoopWorkList.push_back(Elt: PredLoop); |
| 899 | } else if (!EstimatedBlockWeight.count(Val: PredBlock)) |
| 900 | BlockWorkList.push_back(Elt: PredBlock); |
| 901 | } |
| 902 | return true; |
| 903 | } |
| 904 | |
| 905 | // Starting from \p BB traverse through dominator blocks and assign \p BBWeight |
| 906 | // to all such blocks that are post dominated by \BB. In other words to all |
| 907 | // blocks that the one is executed if and only if another one is executed. |
| 908 | // Importantly, we skip loops here for two reasons. First weights of blocks in |
| 909 | // a loop should be scaled by trip count (yet possibly unknown). Second there is |
| 910 | // no any value in doing that because that doesn't give any additional |
| 911 | // information regarding distribution of probabilities inside the loop. |
| 912 | // Exception is loop 'enter' and 'exit' edges that are handled in a special way |
| 913 | // at calcEstimatedHeuristics. |
| 914 | // |
| 915 | // In addition, \p WorkList is populated with basic blocks if at leas one |
| 916 | // successor has updated estimated weight. |
| 917 | void BPIConstruction::propagateEstimatedBlockWeight( |
| 918 | const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, |
| 919 | uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, |
| 920 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
| 921 | const BasicBlock *BB = LoopBB.getBlock(); |
| 922 | const auto *DTStartNode = DT->getNode(BB); |
| 923 | const auto *PDTStartNode = PDT->getNode(BB); |
| 924 | |
| 925 | // TODO: Consider propagating weight down the domination line as well. |
| 926 | for (const auto *DTNode = DTStartNode; DTNode != nullptr; |
| 927 | DTNode = DTNode->getIDom()) { |
| 928 | auto *DomBB = DTNode->getBlock(); |
| 929 | // Consider blocks which lie on one 'line'. |
| 930 | if (!PDT->dominates(A: PDTStartNode, B: PDT->getNode(BB: DomBB))) |
| 931 | // If BB doesn't post dominate DomBB it will not post dominate dominators |
| 932 | // of DomBB as well. |
| 933 | break; |
| 934 | |
| 935 | LoopBlock DomLoopBB = getLoopBlock(BB: DomBB); |
| 936 | const LoopEdge Edge{DomLoopBB, LoopBB}; |
| 937 | // Don't propagate weight to blocks belonging to different loops. |
| 938 | if (!isLoopEnteringExitingEdge(Edge)) { |
| 939 | if (!updateEstimatedBlockWeight(LoopBB&: DomLoopBB, BBWeight, BlockWorkList, |
| 940 | LoopWorkList)) |
| 941 | // If DomBB has weight set then all it's predecessors are already |
| 942 | // processed (since we propagate weight up to the top of IR each time). |
| 943 | break; |
| 944 | } else if (isLoopExitingEdge(Edge)) { |
| 945 | LoopWorkList.push_back(Elt: DomLoopBB); |
| 946 | } |
| 947 | } |
| 948 | } |
| 949 | |
| 950 | std::optional<uint32_t> |
| 951 | BPIConstruction::getInitialEstimatedBlockWeight(const BasicBlock *BB) { |
| 952 | // Returns true if \p BB has call marked with "NoReturn" attribute. |
| 953 | auto hasNoReturn = [&](const BasicBlock *BB) { |
| 954 | for (const auto &I : reverse(C: *BB)) |
| 955 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
| 956 | if (CI->hasFnAttr(Kind: Attribute::NoReturn)) |
| 957 | return true; |
| 958 | |
| 959 | return false; |
| 960 | }; |
| 961 | |
| 962 | // Important note regarding the order of checks. They are ordered by weight |
| 963 | // from lowest to highest. Doing that allows to avoid "unstable" results |
| 964 | // when several conditions heuristics can be applied simultaneously. |
| 965 | if (isa<UnreachableInst>(Val: BB->getTerminator()) || |
| 966 | // If this block is terminated by a call to |
| 967 | // @llvm.experimental.deoptimize then treat it like an unreachable |
| 968 | // since it is expected to practically never execute. |
| 969 | // TODO: Should we actually treat as never returning call? |
| 970 | BB->getTerminatingDeoptimizeCall()) |
| 971 | return hasNoReturn(BB) |
| 972 | ? static_cast<uint32_t>(BlockExecWeight::NORETURN) |
| 973 | : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); |
| 974 | |
| 975 | // Check if the block is an exception handling block. |
| 976 | if (BB->isEHPad()) |
| 977 | return static_cast<uint32_t>(BlockExecWeight::UNWIND); |
| 978 | |
| 979 | // Check if the block contains 'cold' call. |
| 980 | for (const auto &I : *BB) |
| 981 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
| 982 | if (CI->hasFnAttr(Kind: Attribute::Cold)) |
| 983 | return static_cast<uint32_t>(BlockExecWeight::COLD); |
| 984 | |
| 985 | return std::nullopt; |
| 986 | } |
| 987 | |
| 988 | // Does RPO traversal over all blocks in \p F and assigns weights to |
| 989 | // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its |
| 990 | // best to propagate the weight to up/down the IR. |
| 991 | void BPIConstruction::estimateBlockWeights(const Function &F, DominatorTree *DT, |
| 992 | PostDominatorTree *PDT) { |
| 993 | SmallVector<BasicBlock *, 8> BlockWorkList; |
| 994 | SmallVector<LoopBlock, 8> LoopWorkList; |
| 995 | SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks; |
| 996 | |
| 997 | // By doing RPO we make sure that all predecessors already have weights |
| 998 | // calculated before visiting theirs successors. |
| 999 | ReversePostOrderTraversal<const Function *> RPOT(&F); |
| 1000 | for (const auto *BB : RPOT) |
| 1001 | if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) |
| 1002 | // If we were able to find estimated weight for the block set it to this |
| 1003 | // block and propagate up the IR. |
| 1004 | propagateEstimatedBlockWeight(LoopBB: getLoopBlock(BB), DT, PDT, BBWeight: *BBWeight, |
| 1005 | BlockWorkList, LoopWorkList); |
| 1006 | |
| 1007 | // BlockWorklist/LoopWorkList contains blocks/loops with at least one |
| 1008 | // successor/exit having estimated weight. Try to propagate weight to such |
| 1009 | // blocks/loops from successors/exits. |
| 1010 | // Process loops and blocks. Order is not important. |
| 1011 | do { |
| 1012 | while (!LoopWorkList.empty()) { |
| 1013 | const LoopBlock LoopBB = LoopWorkList.pop_back_val(); |
| 1014 | const LoopData LD = LoopBB.getLoopData(); |
| 1015 | if (EstimatedLoopWeight.count(Val: LD)) |
| 1016 | continue; |
| 1017 | |
| 1018 | auto Res = LoopExitBlocks.try_emplace(Key: LD); |
| 1019 | SmallVectorImpl<BasicBlock *> &Exits = Res.first->second; |
| 1020 | if (Res.second) |
| 1021 | getLoopExitBlocks(LB: LoopBB, Exits); |
| 1022 | auto LoopWeight = getMaxEstimatedEdgeWeight( |
| 1023 | SrcLoopBB: LoopBB, Successors: make_range(x: Exits.begin(), y: Exits.end())); |
| 1024 | |
| 1025 | if (LoopWeight) { |
| 1026 | // If we never exit the loop then we can enter it once at maximum. |
| 1027 | if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
| 1028 | LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
| 1029 | |
| 1030 | EstimatedLoopWeight.insert(KV: {LD, *LoopWeight}); |
| 1031 | // Add all blocks entering the loop into working list. |
| 1032 | getLoopEnterBlocks(LB: LoopBB, Enters&: BlockWorkList); |
| 1033 | } |
| 1034 | } |
| 1035 | |
| 1036 | while (!BlockWorkList.empty()) { |
| 1037 | // We can reach here only if BlockWorkList is not empty. |
| 1038 | const BasicBlock *BB = BlockWorkList.pop_back_val(); |
| 1039 | if (EstimatedBlockWeight.count(Val: BB)) |
| 1040 | continue; |
| 1041 | |
| 1042 | // We take maximum over all weights of successors. In other words we take |
| 1043 | // weight of "hot" path. In theory we can probably find a better function |
| 1044 | // which gives higher accuracy results (comparing to "maximum") but I |
| 1045 | // can't |
| 1046 | // think of any right now. And I doubt it will make any difference in |
| 1047 | // practice. |
| 1048 | const LoopBlock LoopBB = getLoopBlock(BB); |
| 1049 | auto MaxWeight = getMaxEstimatedEdgeWeight(SrcLoopBB: LoopBB, Successors: successors(BB)); |
| 1050 | |
| 1051 | if (MaxWeight) |
| 1052 | propagateEstimatedBlockWeight(LoopBB, DT, PDT, BBWeight: *MaxWeight, |
| 1053 | BlockWorkList, LoopWorkList); |
| 1054 | } |
| 1055 | } while (!BlockWorkList.empty() || !LoopWorkList.empty()); |
| 1056 | } |
| 1057 | |
| 1058 | // Calculate edge probabilities based on block's estimated weight. |
| 1059 | // Note that gathered weights were not scaled for loops. Thus edges entering |
| 1060 | // and exiting loops requires special processing. |
| 1061 | bool BPIConstruction::calcEstimatedHeuristics(const BasicBlock *BB) { |
| 1062 | assert(BB->getTerminator()->getNumSuccessors() > 1 && |
| 1063 | "expected more than one successor!" ); |
| 1064 | |
| 1065 | const LoopBlock LoopBB = getLoopBlock(BB); |
| 1066 | |
| 1067 | SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; |
| 1068 | uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; |
| 1069 | if (LoopBB.getLoop()) |
| 1070 | computeUnlikelySuccessors(BB, L: LoopBB.getLoop(), UnlikelyBlocks); |
| 1071 | |
| 1072 | // Changed to 'true' if at least one successor has estimated weight. |
| 1073 | bool FoundEstimatedWeight = false; |
| 1074 | SmallVector<uint32_t, 4> SuccWeights; |
| 1075 | uint64_t TotalWeight = 0; |
| 1076 | // Go over all successors of BB and put their weights into SuccWeights. |
| 1077 | for (const BasicBlock *SuccBB : successors(BB)) { |
| 1078 | std::optional<uint32_t> Weight; |
| 1079 | const LoopBlock SuccLoopBB = getLoopBlock(BB: SuccBB); |
| 1080 | const LoopEdge Edge{LoopBB, SuccLoopBB}; |
| 1081 | |
| 1082 | Weight = getEstimatedEdgeWeight(Edge); |
| 1083 | |
| 1084 | if (isLoopExitingEdge(Edge) && |
| 1085 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
| 1086 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
| 1087 | // Scale down loop exiting weight by trip count. |
| 1088 | Weight = std::max( |
| 1089 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
| 1090 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / |
| 1091 | TC); |
| 1092 | } |
| 1093 | bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(Ptr: SuccBB); |
| 1094 | if (IsUnlikelyEdge && |
| 1095 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
| 1096 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
| 1097 | // 'Unlikely' blocks have twice lower weight. |
| 1098 | Weight = std::max( |
| 1099 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
| 1100 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2); |
| 1101 | } |
| 1102 | |
| 1103 | if (Weight) |
| 1104 | FoundEstimatedWeight = true; |
| 1105 | |
| 1106 | auto WeightVal = |
| 1107 | Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)); |
| 1108 | TotalWeight += WeightVal; |
| 1109 | SuccWeights.push_back(Elt: WeightVal); |
| 1110 | } |
| 1111 | |
| 1112 | // If non of blocks have estimated weight bail out. |
| 1113 | // If TotalWeight is 0 that means weight of each successor is 0 as well and |
| 1114 | // equally likely. Bail out early to not deal with devision by zero. |
| 1115 | if (!FoundEstimatedWeight || TotalWeight == 0) |
| 1116 | return false; |
| 1117 | |
| 1118 | assert(SuccWeights.size() == succ_size(BB) && "Missed successor?" ); |
| 1119 | const unsigned SuccCount = SuccWeights.size(); |
| 1120 | |
| 1121 | // If the sum of weights does not fit in 32 bits, scale every weight down |
| 1122 | // accordingly. |
| 1123 | if (TotalWeight > UINT32_MAX) { |
| 1124 | uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; |
| 1125 | TotalWeight = 0; |
| 1126 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
| 1127 | SuccWeights[Idx] /= ScalingFactor; |
| 1128 | if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) |
| 1129 | SuccWeights[Idx] = |
| 1130 | static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
| 1131 | TotalWeight += SuccWeights[Idx]; |
| 1132 | } |
| 1133 | assert(TotalWeight <= UINT32_MAX && "Total weight overflows" ); |
| 1134 | } |
| 1135 | |
| 1136 | // Finally set probabilities to edges according to estimated block weights. |
| 1137 | SmallVector<BranchProbability, 4> EdgeProbabilities( |
| 1138 | SuccCount, BranchProbability::getUnknown()); |
| 1139 | |
| 1140 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
| 1141 | EdgeProbabilities[Idx] = |
| 1142 | BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); |
| 1143 | } |
| 1144 | BPI.setEdgeProbability(Src: BB, Probs: EdgeProbabilities); |
| 1145 | return true; |
| 1146 | } |
| 1147 | |
| 1148 | bool BPIConstruction::calcZeroHeuristics(const BasicBlock *BB, |
| 1149 | const TargetLibraryInfo *TLI) { |
| 1150 | const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator()); |
| 1151 | if (!BI) |
| 1152 | return false; |
| 1153 | |
| 1154 | Value *Cond = BI->getCondition(); |
| 1155 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
| 1156 | if (!CI) |
| 1157 | return false; |
| 1158 | |
| 1159 | auto GetConstantInt = [](Value *V) { |
| 1160 | if (auto *I = dyn_cast<BitCastInst>(Val: V)) |
| 1161 | return dyn_cast<ConstantInt>(Val: I->getOperand(i_nocapture: 0)); |
| 1162 | return dyn_cast<ConstantInt>(Val: V); |
| 1163 | }; |
| 1164 | |
| 1165 | Value *RHS = CI->getOperand(i_nocapture: 1); |
| 1166 | ConstantInt *CV = GetConstantInt(RHS); |
| 1167 | if (!CV) |
| 1168 | return false; |
| 1169 | |
| 1170 | // If the LHS is the result of AND'ing a value with a single bit bitmask, |
| 1171 | // we don't have information about probabilities. |
| 1172 | if (Instruction *LHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0))) |
| 1173 | if (LHS->getOpcode() == Instruction::And) |
| 1174 | if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(i: 1))) |
| 1175 | if (AndRHS->getValue().isPowerOf2()) |
| 1176 | return false; |
| 1177 | |
| 1178 | // Check if the LHS is the return value of a library function |
| 1179 | LibFunc Func = LibFunc::NotLibFunc; |
| 1180 | if (TLI) |
| 1181 | if (CallInst *Call = dyn_cast<CallInst>(Val: CI->getOperand(i_nocapture: 0))) |
| 1182 | if (Function *CalledFn = Call->getCalledFunction()) |
| 1183 | TLI->getLibFunc(FDecl: *CalledFn, F&: Func); |
| 1184 | |
| 1185 | ProbabilityTable::const_iterator Search; |
| 1186 | if (Func == LibFunc_strcasecmp || |
| 1187 | Func == LibFunc_strcmp || |
| 1188 | Func == LibFunc_strncasecmp || |
| 1189 | Func == LibFunc_strncmp || |
| 1190 | Func == LibFunc_memcmp || |
| 1191 | Func == LibFunc_bcmp) { |
| 1192 | Search = ICmpWithLibCallTable.find(x: CI->getPredicate()); |
| 1193 | if (Search == ICmpWithLibCallTable.end()) |
| 1194 | return false; |
| 1195 | } else if (CV->isZero()) { |
| 1196 | Search = ICmpWithZeroTable.find(x: CI->getPredicate()); |
| 1197 | if (Search == ICmpWithZeroTable.end()) |
| 1198 | return false; |
| 1199 | } else if (CV->isOne()) { |
| 1200 | Search = ICmpWithOneTable.find(x: CI->getPredicate()); |
| 1201 | if (Search == ICmpWithOneTable.end()) |
| 1202 | return false; |
| 1203 | } else if (CV->isMinusOne()) { |
| 1204 | Search = ICmpWithMinusOneTable.find(x: CI->getPredicate()); |
| 1205 | if (Search == ICmpWithMinusOneTable.end()) |
| 1206 | return false; |
| 1207 | } else { |
| 1208 | return false; |
| 1209 | } |
| 1210 | |
| 1211 | BPI.setEdgeProbability(Src: BB, Probs: Search->second); |
| 1212 | return true; |
| 1213 | } |
| 1214 | |
| 1215 | bool BPIConstruction::calcFloatingPointHeuristics(const BasicBlock *BB) { |
| 1216 | const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator()); |
| 1217 | if (!BI) |
| 1218 | return false; |
| 1219 | |
| 1220 | Value *Cond = BI->getCondition(); |
| 1221 | FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cond); |
| 1222 | if (!FCmp) |
| 1223 | return false; |
| 1224 | |
| 1225 | ProbabilityList ProbList; |
| 1226 | if (FCmp->isEquality()) { |
| 1227 | ProbList = !FCmp->isTrueWhenEqual() ? |
| 1228 | // f1 == f2 -> Unlikely |
| 1229 | ProbabilityList({FPTakenProb, FPUntakenProb}) : |
| 1230 | // f1 != f2 -> Likely |
| 1231 | ProbabilityList({FPUntakenProb, FPTakenProb}); |
| 1232 | } else { |
| 1233 | auto Search = FCmpTable.find(x: FCmp->getPredicate()); |
| 1234 | if (Search == FCmpTable.end()) |
| 1235 | return false; |
| 1236 | ProbList = Search->second; |
| 1237 | } |
| 1238 | |
| 1239 | BPI.setEdgeProbability(Src: BB, Probs: ProbList); |
| 1240 | return true; |
| 1241 | } |
| 1242 | void BPIConstruction::calculate(const Function &F, const LoopInfo &LoopI, |
| 1243 | const TargetLibraryInfo *TLI, DominatorTree *DT, |
| 1244 | PostDominatorTree *PDT) { |
| 1245 | LI = &LoopI; |
| 1246 | |
| 1247 | SccI = std::make_unique<SccInfo>(args: F); |
| 1248 | |
| 1249 | std::unique_ptr<DominatorTree> DTPtr; |
| 1250 | std::unique_ptr<PostDominatorTree> PDTPtr; |
| 1251 | |
| 1252 | if (!DT) { |
| 1253 | DTPtr = std::make_unique<DominatorTree>(args&: const_cast<Function &>(F)); |
| 1254 | DT = DTPtr.get(); |
| 1255 | } |
| 1256 | |
| 1257 | if (!PDT) { |
| 1258 | PDTPtr = std::make_unique<PostDominatorTree>(args&: const_cast<Function &>(F)); |
| 1259 | PDT = PDTPtr.get(); |
| 1260 | } |
| 1261 | |
| 1262 | estimateBlockWeights(F, DT, PDT); |
| 1263 | |
| 1264 | // Walk the basic blocks in post-order so that we can build up state about |
| 1265 | // the successors of a block iteratively. |
| 1266 | for (const auto *BB : post_order(G: &F.getEntryBlock())) { |
| 1267 | LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() |
| 1268 | << "\n" ); |
| 1269 | // If there is no at least two successors, no sense to set probability. |
| 1270 | if (BB->getTerminator()->getNumSuccessors() < 2) |
| 1271 | continue; |
| 1272 | if (calcMetadataWeights(BB)) |
| 1273 | continue; |
| 1274 | if (calcEstimatedHeuristics(BB)) |
| 1275 | continue; |
| 1276 | if (calcPointerHeuristics(BB)) |
| 1277 | continue; |
| 1278 | if (calcZeroHeuristics(BB, TLI)) |
| 1279 | continue; |
| 1280 | if (calcFloatingPointHeuristics(BB)) |
| 1281 | continue; |
| 1282 | } |
| 1283 | } |
| 1284 | |
| 1285 | } // end anonymous namespace |
| 1286 | |
| 1287 | MutableArrayRef<BranchProbability> |
| 1288 | BranchProbabilityInfo::allocEdges(const BasicBlock *BB) { |
| 1289 | assert(BB->getParent() == LastF); |
| 1290 | assert(BlockNumberEpoch == LastF->getBlockNumberEpoch()); |
| 1291 | unsigned NumSuccs = succ_size(BB); |
| 1292 | if (NumSuccs == 0) { |
| 1293 | eraseBlock(BB); |
| 1294 | return {}; |
| 1295 | } |
| 1296 | if (EdgeStarts.size() <= BB->getNumber()) |
| 1297 | EdgeStarts.resize(N: LastF->getMaxBlockNumber(), NV: 0); |
| 1298 | unsigned EdgeStart = Probs.size(); |
| 1299 | EdgeStarts[BB->getNumber()] = EdgeStart + 1; // 0 = no edges. |
| 1300 | Probs.append(NumInputs: NumSuccs, Elt: {}); |
| 1301 | return MutableArrayRef(&Probs[EdgeStart], NumSuccs); |
| 1302 | } |
| 1303 | |
| 1304 | ArrayRef<BranchProbability> |
| 1305 | BranchProbabilityInfo::getEdges(const BasicBlock *BB) const { |
| 1306 | assert(BB->getParent() == LastF); |
| 1307 | assert(BlockNumberEpoch == LastF->getBlockNumberEpoch()); |
| 1308 | if (EdgeStarts.size() <= BB->getNumber()) |
| 1309 | return {}; |
| 1310 | if (unsigned EdgeStart = EdgeStarts[BB->getNumber()]) { |
| 1311 | const BranchProbability *Start = &Probs[EdgeStart - 1]; // 0 = no edges. |
| 1312 | size_t Count = SIZE_MAX; // Avoid querying num successors in release builds. |
| 1313 | #ifndef NDEBUG |
| 1314 | Count = succ_size(BB); |
| 1315 | #endif |
| 1316 | return ArrayRef(Start, Count); |
| 1317 | } |
| 1318 | return {}; |
| 1319 | } |
| 1320 | |
| 1321 | bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, |
| 1322 | FunctionAnalysisManager::Invalidator &) { |
| 1323 | // Check whether the analysis, all analyses on functions, or the function's |
| 1324 | // CFG have been preserved. |
| 1325 | auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); |
| 1326 | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || |
| 1327 | PAC.preservedSet<CFGAnalyses>()); |
| 1328 | } |
| 1329 | |
| 1330 | void BranchProbabilityInfo::print(raw_ostream &OS) const { |
| 1331 | OS << "---- Branch Probabilities ----\n" ; |
| 1332 | // We print the probabilities from the last function the analysis ran over, |
| 1333 | // or the function it is currently running over. |
| 1334 | assert(LastF && "Cannot print prior to running over a function" ); |
| 1335 | for (const auto &BI : *LastF) { |
| 1336 | for (const BasicBlock *Succ : successors(BB: &BI)) |
| 1337 | printEdgeProbability(OS&: OS << " " , Src: &BI, Dst: Succ); |
| 1338 | } |
| 1339 | } |
| 1340 | |
| 1341 | bool BranchProbabilityInfo:: |
| 1342 | isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { |
| 1343 | // Hot probability is at least 4/5 = 80% |
| 1344 | // FIXME: Compare against a static "hot" BranchProbability. |
| 1345 | return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); |
| 1346 | } |
| 1347 | |
| 1348 | /// Get the raw edge probability for the edge. If can't find it, return a |
| 1349 | /// default probability 1/N where N is the number of successors. Here an edge is |
| 1350 | /// specified using PredBlock and an |
| 1351 | /// index to the successors. |
| 1352 | BranchProbability |
| 1353 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1354 | unsigned IndexInSuccessors) const { |
| 1355 | if (ArrayRef<BranchProbability> P = getEdges(BB: Src); !P.empty()) |
| 1356 | return P[IndexInSuccessors]; |
| 1357 | return {1, static_cast<uint32_t>(succ_size(BB: Src))}; |
| 1358 | } |
| 1359 | |
| 1360 | BranchProbability |
| 1361 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1362 | const_succ_iterator Dst) const { |
| 1363 | return getEdgeProbability(Src, IndexInSuccessors: std::distance(first: succ_begin(BB: Src), last: Dst)); |
| 1364 | } |
| 1365 | |
| 1366 | /// Get the raw edge probability calculated for the block pair. This returns the |
| 1367 | /// sum of all raw edge probabilities from Src to Dst. |
| 1368 | BranchProbability |
| 1369 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
| 1370 | const BasicBlock *Dst) const { |
| 1371 | ArrayRef<BranchProbability> P = getEdges(BB: Src); |
| 1372 | if (P.empty()) |
| 1373 | return BranchProbability(llvm::count(Range: successors(BB: Src), Element: Dst), succ_size(BB: Src)); |
| 1374 | |
| 1375 | auto Prob = BranchProbability::getZero(); |
| 1376 | for (auto It : enumerate(First: successors(BB: Src))) |
| 1377 | if (It.value() == Dst) |
| 1378 | Prob += P[It.index()]; |
| 1379 | |
| 1380 | return Prob; |
| 1381 | } |
| 1382 | |
| 1383 | /// Set the edge probability for all edges at once. |
| 1384 | void BranchProbabilityInfo::setEdgeProbability( |
| 1385 | const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { |
| 1386 | assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); |
| 1387 | MutableArrayRef<BranchProbability> P = allocEdges(BB: Src); |
| 1388 | uint64_t TotalNumerator = 0; |
| 1389 | for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { |
| 1390 | P[SuccIdx] = Probs[SuccIdx]; |
| 1391 | LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx |
| 1392 | << " successor probability to " << Probs[SuccIdx] |
| 1393 | << "\n" ); |
| 1394 | TotalNumerator += Probs[SuccIdx].getNumerator(); |
| 1395 | } |
| 1396 | |
| 1397 | // Because of rounding errors the total probability cannot be checked to be |
| 1398 | // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. |
| 1399 | // Instead, every single probability in Probs must be as accurate as possible. |
| 1400 | // This results in error 1/denominator at most, thus the total absolute error |
| 1401 | // should be within Probs.size / BranchProbability::getDenominator. |
| 1402 | if (P.empty()) |
| 1403 | return; // If we store no probabilities, TotalNumerator is zero. |
| 1404 | assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); |
| 1405 | assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); |
| 1406 | (void)TotalNumerator; |
| 1407 | } |
| 1408 | |
| 1409 | void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, |
| 1410 | BasicBlock *Dst) { |
| 1411 | assert(succ_size(Src) == succ_size(Dst)); |
| 1412 | // allocEdges can reallocate and must be called first. |
| 1413 | MutableArrayRef<BranchProbability> DstP = allocEdges(BB: Dst); |
| 1414 | ArrayRef<BranchProbability> SrcP = getEdges(BB: Src); |
| 1415 | if (SrcP.empty()) { |
| 1416 | // Nothing to copy from, erase again. |
| 1417 | eraseBlock(BB: Dst); |
| 1418 | return; |
| 1419 | } |
| 1420 | for (unsigned i = 0; i != DstP.size(); ++i) { |
| 1421 | DstP[i] = SrcP[i]; |
| 1422 | LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << i |
| 1423 | << " successor probability to " << SrcP[i] << "\n" ); |
| 1424 | } |
| 1425 | } |
| 1426 | |
| 1427 | void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) { |
| 1428 | assert(Src->getTerminator()->getNumSuccessors() == 2); |
| 1429 | ArrayRef<BranchProbability> P = getEdges(BB: Src); |
| 1430 | if (P.empty()) |
| 1431 | return; |
| 1432 | MutableArrayRef<BranchProbability> MP( |
| 1433 | const_cast<BranchProbability *>(P.data()), P.size()); |
| 1434 | std::swap(a&: MP[0], b&: MP[1]); |
| 1435 | } |
| 1436 | |
| 1437 | raw_ostream & |
| 1438 | BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, |
| 1439 | const BasicBlock *Src, |
| 1440 | const BasicBlock *Dst) const { |
| 1441 | const BranchProbability Prob = getEdgeProbability(Src, Dst); |
| 1442 | OS << "edge " ; |
| 1443 | Src->printAsOperand(O&: OS, PrintType: false, M: Src->getModule()); |
| 1444 | OS << " -> " ; |
| 1445 | Dst->printAsOperand(O&: OS, PrintType: false, M: Dst->getModule()); |
| 1446 | OS << " probability is " << Prob |
| 1447 | << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n" ); |
| 1448 | |
| 1449 | return OS; |
| 1450 | } |
| 1451 | |
| 1452 | void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { |
| 1453 | LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n" ); |
| 1454 | assert(BB->getParent() == LastF); |
| 1455 | assert(BlockNumberEpoch == LastF->getBlockNumberEpoch()); |
| 1456 | if (EdgeStarts.size() > BB->getNumber()) |
| 1457 | EdgeStarts[BB->getNumber()] = 0; |
| 1458 | } |
| 1459 | |
| 1460 | void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, |
| 1461 | const TargetLibraryInfo *TLI, |
| 1462 | DominatorTree *DT, |
| 1463 | PostDominatorTree *PDT) { |
| 1464 | LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() |
| 1465 | << " ----\n\n" ); |
| 1466 | LastF = &F; // Store the last function we ran on for printing. |
| 1467 | BlockNumberEpoch = F.getBlockNumberEpoch(); |
| 1468 | Probs.clear(); |
| 1469 | EdgeStarts.clear(); |
| 1470 | BPIConstruction(*this).calculate(F, LoopI, TLI, DT, PDT); |
| 1471 | |
| 1472 | if (PrintBranchProb && (PrintBranchProbFuncName.empty() || |
| 1473 | F.getName() == PrintBranchProbFuncName)) { |
| 1474 | print(OS&: dbgs()); |
| 1475 | } |
| 1476 | } |
| 1477 | |
| 1478 | void BranchProbabilityInfoWrapperPass::getAnalysisUsage( |
| 1479 | AnalysisUsage &AU) const { |
| 1480 | // We require DT so it's available when LI is available. The LI updating code |
| 1481 | // asserts that DT is also present so if we don't make sure that we have DT |
| 1482 | // here, that assert will trigger. |
| 1483 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 1484 | AU.addRequired<LoopInfoWrapperPass>(); |
| 1485 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| 1486 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 1487 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
| 1488 | AU.setPreservesAll(); |
| 1489 | } |
| 1490 | |
| 1491 | bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { |
| 1492 | const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 1493 | const TargetLibraryInfo &TLI = |
| 1494 | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| 1495 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 1496 | PostDominatorTree &PDT = |
| 1497 | getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
| 1498 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
| 1499 | return false; |
| 1500 | } |
| 1501 | |
| 1502 | void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, |
| 1503 | const Module *) const { |
| 1504 | BPI.print(OS); |
| 1505 | } |
| 1506 | |
| 1507 | AnalysisKey BranchProbabilityAnalysis::Key; |
| 1508 | BranchProbabilityInfo |
| 1509 | BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { |
| 1510 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 1511 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
| 1512 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 1513 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
| 1514 | BranchProbabilityInfo BPI; |
| 1515 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
| 1516 | return BPI; |
| 1517 | } |
| 1518 | |
| 1519 | PreservedAnalyses |
| 1520 | BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 1521 | OS << "Printing analysis 'Branch Probability Analysis' for function '" |
| 1522 | << F.getName() << "':\n" ; |
| 1523 | AM.getResult<BranchProbabilityAnalysis>(IR&: F).print(OS); |
| 1524 | return PreservedAnalyses::all(); |
| 1525 | } |
| 1526 | |