1 | //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Loops should be simplified before this analysis. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
14 | #include "llvm/ADT/PostOrderIterator.h" |
15 | #include "llvm/ADT/SCCIterator.h" |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Analysis/ConstantFolding.h" |
19 | #include "llvm/Analysis/LoopInfo.h" |
20 | #include "llvm/Analysis/PostDominators.h" |
21 | #include "llvm/Analysis/TargetLibraryInfo.h" |
22 | #include "llvm/IR/Attributes.h" |
23 | #include "llvm/IR/BasicBlock.h" |
24 | #include "llvm/IR/CFG.h" |
25 | #include "llvm/IR/Constants.h" |
26 | #include "llvm/IR/Dominators.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/IR/InstrTypes.h" |
29 | #include "llvm/IR/Instruction.h" |
30 | #include "llvm/IR/Instructions.h" |
31 | #include "llvm/IR/LLVMContext.h" |
32 | #include "llvm/IR/Metadata.h" |
33 | #include "llvm/IR/PassManager.h" |
34 | #include "llvm/IR/ProfDataUtils.h" |
35 | #include "llvm/IR/Type.h" |
36 | #include "llvm/IR/Value.h" |
37 | #include "llvm/InitializePasses.h" |
38 | #include "llvm/Pass.h" |
39 | #include "llvm/Support/BranchProbability.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/CommandLine.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/raw_ostream.h" |
44 | #include <cassert> |
45 | #include <cstdint> |
46 | #include <iterator> |
47 | #include <map> |
48 | #include <utility> |
49 | |
50 | using namespace llvm; |
51 | |
52 | #define DEBUG_TYPE "branch-prob" |
53 | |
54 | static cl::opt<bool> PrintBranchProb( |
55 | "print-bpi" , cl::init(Val: false), cl::Hidden, |
56 | cl::desc("Print the branch probability info." )); |
57 | |
58 | cl::opt<std::string> PrintBranchProbFuncName( |
59 | "print-bpi-func-name" , cl::Hidden, |
60 | cl::desc("The option to specify the name of the function " |
61 | "whose branch probability info is printed." )); |
62 | |
63 | INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob" , |
64 | "Branch Probability Analysis" , false, true) |
65 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
66 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
67 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
68 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
69 | INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob" , |
70 | "Branch Probability Analysis" , false, true) |
71 | |
72 | BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() |
73 | : FunctionPass(ID) { |
74 | initializeBranchProbabilityInfoWrapperPassPass( |
75 | Registry&: *PassRegistry::getPassRegistry()); |
76 | } |
77 | |
78 | char BranchProbabilityInfoWrapperPass::ID = 0; |
79 | |
80 | // Weights are for internal use only. They are used by heuristics to help to |
81 | // estimate edges' probability. Example: |
82 | // |
83 | // Using "Loop Branch Heuristics" we predict weights of edges for the |
84 | // block BB2. |
85 | // ... |
86 | // | |
87 | // V |
88 | // BB1<-+ |
89 | // | | |
90 | // | | (Weight = 124) |
91 | // V | |
92 | // BB2--+ |
93 | // | |
94 | // | (Weight = 4) |
95 | // V |
96 | // BB3 |
97 | // |
98 | // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 |
99 | // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 |
100 | static const uint32_t LBH_TAKEN_WEIGHT = 124; |
101 | static const uint32_t LBH_NONTAKEN_WEIGHT = 4; |
102 | |
103 | /// Unreachable-terminating branch taken probability. |
104 | /// |
105 | /// This is the probability for a branch being taken to a block that terminates |
106 | /// (eventually) in unreachable. These are predicted as unlikely as possible. |
107 | /// All reachable probability will proportionally share the remaining part. |
108 | static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(N: 1); |
109 | |
110 | /// Heuristics and lookup tables for non-loop branches: |
111 | /// Pointer Heuristics (PH) |
112 | static const uint32_t PH_TAKEN_WEIGHT = 20; |
113 | static const uint32_t PH_NONTAKEN_WEIGHT = 12; |
114 | static const BranchProbability |
115 | PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
116 | static const BranchProbability |
117 | PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
118 | |
119 | using ProbabilityList = SmallVector<BranchProbability>; |
120 | using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; |
121 | |
122 | /// Pointer comparisons: |
123 | static const ProbabilityTable PointerTable{ |
124 | {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely |
125 | {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely |
126 | }; |
127 | |
128 | /// Zero Heuristics (ZH) |
129 | static const uint32_t ZH_TAKEN_WEIGHT = 20; |
130 | static const uint32_t ZH_NONTAKEN_WEIGHT = 12; |
131 | static const BranchProbability |
132 | ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
133 | static const BranchProbability |
134 | ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
135 | |
136 | /// Integer compares with 0: |
137 | static const ProbabilityTable ICmpWithZeroTable{ |
138 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely |
139 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely |
140 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely |
141 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely |
142 | }; |
143 | |
144 | /// Integer compares with -1: |
145 | static const ProbabilityTable ICmpWithMinusOneTable{ |
146 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely |
147 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely |
148 | // InstCombine canonicalizes X >= 0 into X > -1 |
149 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely |
150 | }; |
151 | |
152 | /// Integer compares with 1: |
153 | static const ProbabilityTable ICmpWithOneTable{ |
154 | // InstCombine canonicalizes X <= 0 into X < 1 |
155 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely |
156 | }; |
157 | |
158 | /// strcmp and similar functions return zero, negative, or positive, if the |
159 | /// first string is equal, less, or greater than the second. We consider it |
160 | /// likely that the strings are not equal, so a comparison with zero is |
161 | /// probably false, but also a comparison with any other number is also |
162 | /// probably false given that what exactly is returned for nonzero values is |
163 | /// not specified. Any kind of comparison other than equality we know |
164 | /// nothing about. |
165 | static const ProbabilityTable ICmpWithLibCallTable{ |
166 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, |
167 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, |
168 | }; |
169 | |
170 | // Floating-Point Heuristics (FPH) |
171 | static const uint32_t FPH_TAKEN_WEIGHT = 20; |
172 | static const uint32_t FPH_NONTAKEN_WEIGHT = 12; |
173 | |
174 | /// This is the probability for an ordered floating point comparison. |
175 | static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; |
176 | /// This is the probability for an unordered floating point comparison, it means |
177 | /// one or two of the operands are NaN. Usually it is used to test for an |
178 | /// exceptional case, so the result is unlikely. |
179 | static const uint32_t FPH_UNO_WEIGHT = 1; |
180 | |
181 | static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, |
182 | FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
183 | static const BranchProbability |
184 | FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
185 | static const BranchProbability |
186 | FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
187 | static const BranchProbability |
188 | FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
189 | |
190 | /// Floating-Point compares: |
191 | static const ProbabilityTable FCmpTable{ |
192 | {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely |
193 | {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely |
194 | }; |
195 | |
196 | /// Set of dedicated "absolute" execution weights for a block. These weights are |
197 | /// meaningful relative to each other and their derivatives only. |
198 | enum class BlockExecWeight : std::uint32_t { |
199 | /// Special weight used for cases with exact zero probability. |
200 | ZERO = 0x0, |
201 | /// Minimal possible non zero weight. |
202 | LOWEST_NON_ZERO = 0x1, |
203 | /// Weight to an 'unreachable' block. |
204 | UNREACHABLE = ZERO, |
205 | /// Weight to a block containing non returning call. |
206 | NORETURN = LOWEST_NON_ZERO, |
207 | /// Weight to 'unwind' block of an invoke instruction. |
208 | UNWIND = LOWEST_NON_ZERO, |
209 | /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked |
210 | /// with attribute 'cold'. |
211 | COLD = 0xffff, |
212 | /// Default weight is used in cases when there is no dedicated execution |
213 | /// weight set. It is not propagated through the domination line either. |
214 | DEFAULT = 0xfffff |
215 | }; |
216 | |
217 | BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { |
218 | // Record SCC numbers of blocks in the CFG to identify irreducible loops. |
219 | // FIXME: We could only calculate this if the CFG is known to be irreducible |
220 | // (perhaps cache this info in LoopInfo if we can easily calculate it there?). |
221 | int SccNum = 0; |
222 | for (scc_iterator<const Function *> It = scc_begin(G: &F); !It.isAtEnd(); |
223 | ++It, ++SccNum) { |
224 | // Ignore single-block SCCs since they either aren't loops or LoopInfo will |
225 | // catch them. |
226 | const std::vector<const BasicBlock *> &Scc = *It; |
227 | if (Scc.size() == 1) |
228 | continue; |
229 | |
230 | LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":" ); |
231 | for (const auto *BB : Scc) { |
232 | LLVM_DEBUG(dbgs() << " " << BB->getName()); |
233 | SccNums[BB] = SccNum; |
234 | calculateSccBlockType(BB, SccNum); |
235 | } |
236 | LLVM_DEBUG(dbgs() << "\n" ); |
237 | } |
238 | } |
239 | |
240 | int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { |
241 | auto SccIt = SccNums.find(Val: BB); |
242 | if (SccIt == SccNums.end()) |
243 | return -1; |
244 | return SccIt->second; |
245 | } |
246 | |
247 | void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( |
248 | int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { |
249 | |
250 | for (auto MapIt : SccBlocks[SccNum]) { |
251 | const auto *BB = MapIt.first; |
252 | if (isSCCHeader(BB, SccNum)) |
253 | for (const auto *Pred : predecessors(BB)) |
254 | if (getSCCNum(BB: Pred) != SccNum) |
255 | Enters.push_back(Elt: const_cast<BasicBlock *>(BB)); |
256 | } |
257 | } |
258 | |
259 | void BranchProbabilityInfo::SccInfo::getSccExitBlocks( |
260 | int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { |
261 | for (auto MapIt : SccBlocks[SccNum]) { |
262 | const auto *BB = MapIt.first; |
263 | if (isSCCExitingBlock(BB, SccNum)) |
264 | for (const auto *Succ : successors(BB)) |
265 | if (getSCCNum(BB: Succ) != SccNum) |
266 | Exits.push_back(Elt: const_cast<BasicBlock *>(Succ)); |
267 | } |
268 | } |
269 | |
270 | uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, |
271 | int SccNum) const { |
272 | assert(getSCCNum(BB) == SccNum); |
273 | |
274 | assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC" ); |
275 | const auto &SccBlockTypes = SccBlocks[SccNum]; |
276 | |
277 | auto It = SccBlockTypes.find(Val: BB); |
278 | if (It != SccBlockTypes.end()) { |
279 | return It->second; |
280 | } |
281 | return Inner; |
282 | } |
283 | |
284 | void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, |
285 | int SccNum) { |
286 | assert(getSCCNum(BB) == SccNum); |
287 | uint32_t BlockType = Inner; |
288 | |
289 | if (llvm::any_of(Range: predecessors(BB), P: [&](const BasicBlock *Pred) { |
290 | // Consider any block that is an entry point to the SCC as |
291 | // a header. |
292 | return getSCCNum(BB: Pred) != SccNum; |
293 | })) |
294 | BlockType |= Header; |
295 | |
296 | if (llvm::any_of(Range: successors(BB), P: [&](const BasicBlock *Succ) { |
297 | return getSCCNum(BB: Succ) != SccNum; |
298 | })) |
299 | BlockType |= Exiting; |
300 | |
301 | // Lazily compute the set of headers for a given SCC and cache the results |
302 | // in the SccHeaderMap. |
303 | if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) |
304 | SccBlocks.resize(new_size: SccNum + 1); |
305 | auto &SccBlockTypes = SccBlocks[SccNum]; |
306 | |
307 | if (BlockType != Inner) { |
308 | bool IsInserted; |
309 | std::tie(args: std::ignore, args&: IsInserted) = |
310 | SccBlockTypes.insert(KV: std::make_pair(x&: BB, y&: BlockType)); |
311 | assert(IsInserted && "Duplicated block in SCC" ); |
312 | } |
313 | } |
314 | |
315 | BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, |
316 | const LoopInfo &LI, |
317 | const SccInfo &SccI) |
318 | : BB(BB) { |
319 | LD.first = LI.getLoopFor(BB); |
320 | if (!LD.first) { |
321 | LD.second = SccI.getSCCNum(BB); |
322 | } |
323 | } |
324 | |
325 | bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { |
326 | const auto &SrcBlock = Edge.first; |
327 | const auto &DstBlock = Edge.second; |
328 | return (DstBlock.getLoop() && |
329 | !DstBlock.getLoop()->contains(L: SrcBlock.getLoop())) || |
330 | // Assume that SCCs can't be nested. |
331 | (DstBlock.getSccNum() != -1 && |
332 | SrcBlock.getSccNum() != DstBlock.getSccNum()); |
333 | } |
334 | |
335 | bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { |
336 | return isLoopEnteringEdge(Edge: {Edge.second, Edge.first}); |
337 | } |
338 | |
339 | bool BranchProbabilityInfo::isLoopEnteringExitingEdge( |
340 | const LoopEdge &Edge) const { |
341 | return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); |
342 | } |
343 | |
344 | bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { |
345 | const auto &SrcBlock = Edge.first; |
346 | const auto &DstBlock = Edge.second; |
347 | return SrcBlock.belongsToSameLoop(LB: DstBlock) && |
348 | ((DstBlock.getLoop() && |
349 | DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || |
350 | (DstBlock.getSccNum() != -1 && |
351 | SccI->isSCCHeader(BB: DstBlock.getBlock(), SccNum: DstBlock.getSccNum()))); |
352 | } |
353 | |
354 | void BranchProbabilityInfo::getLoopEnterBlocks( |
355 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { |
356 | if (LB.getLoop()) { |
357 | auto * = LB.getLoop()->getHeader(); |
358 | Enters.append(in_start: pred_begin(BB: Header), in_end: pred_end(BB: Header)); |
359 | } else { |
360 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
361 | SccI->getSccEnterBlocks(SccNum: LB.getSccNum(), Enters); |
362 | } |
363 | } |
364 | |
365 | void BranchProbabilityInfo::getLoopExitBlocks( |
366 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { |
367 | if (LB.getLoop()) { |
368 | LB.getLoop()->getExitBlocks(ExitBlocks&: Exits); |
369 | } else { |
370 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
371 | SccI->getSccExitBlocks(SccNum: LB.getSccNum(), Exits); |
372 | } |
373 | } |
374 | |
375 | // Propagate existing explicit probabilities from either profile data or |
376 | // 'expect' intrinsic processing. Examine metadata against unreachable |
377 | // heuristic. The probability of the edge coming to unreachable block is |
378 | // set to min of metadata and unreachable heuristic. |
379 | bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { |
380 | const Instruction *TI = BB->getTerminator(); |
381 | assert(TI->getNumSuccessors() > 1 && "expected more than one successor!" ); |
382 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || isa<IndirectBrInst>(Val: TI) || |
383 | isa<InvokeInst>(Val: TI) || isa<CallBrInst>(Val: TI))) |
384 | return false; |
385 | |
386 | MDNode *WeightsNode = getValidBranchWeightMDNode(I: *TI); |
387 | if (!WeightsNode) |
388 | return false; |
389 | |
390 | // Check that the number of successors is manageable. |
391 | assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors" ); |
392 | |
393 | // Build up the final weights that will be used in a temporary buffer. |
394 | // Compute the sum of all weights to later decide whether they need to |
395 | // be scaled to fit in 32 bits. |
396 | uint64_t WeightSum = 0; |
397 | SmallVector<uint32_t, 2> Weights; |
398 | SmallVector<unsigned, 2> UnreachableIdxs; |
399 | SmallVector<unsigned, 2> ReachableIdxs; |
400 | |
401 | extractBranchWeights(ProfileData: WeightsNode, Weights); |
402 | for (unsigned I = 0, E = Weights.size(); I != E; ++I) { |
403 | WeightSum += Weights[I]; |
404 | const LoopBlock SrcLoopBB = getLoopBlock(BB); |
405 | const LoopBlock DstLoopBB = getLoopBlock(BB: TI->getSuccessor(Idx: I)); |
406 | auto EstimatedWeight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
407 | if (EstimatedWeight && |
408 | *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
409 | UnreachableIdxs.push_back(Elt: I); |
410 | else |
411 | ReachableIdxs.push_back(Elt: I); |
412 | } |
413 | assert(Weights.size() == TI->getNumSuccessors() && "Checked above" ); |
414 | |
415 | // If the sum of weights does not fit in 32 bits, scale every weight down |
416 | // accordingly. |
417 | uint64_t ScalingFactor = |
418 | (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; |
419 | |
420 | if (ScalingFactor > 1) { |
421 | WeightSum = 0; |
422 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
423 | Weights[I] /= ScalingFactor; |
424 | WeightSum += Weights[I]; |
425 | } |
426 | } |
427 | assert(WeightSum <= UINT32_MAX && |
428 | "Expected weights to scale down to 32 bits" ); |
429 | |
430 | if (WeightSum == 0 || ReachableIdxs.size() == 0) { |
431 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
432 | Weights[I] = 1; |
433 | WeightSum = TI->getNumSuccessors(); |
434 | } |
435 | |
436 | // Set the probability. |
437 | SmallVector<BranchProbability, 2> BP; |
438 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
439 | BP.push_back(Elt: { Weights[I], static_cast<uint32_t>(WeightSum) }); |
440 | |
441 | // Examine the metadata against unreachable heuristic. |
442 | // If the unreachable heuristic is more strong then we use it for this edge. |
443 | if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { |
444 | setEdgeProbability(Src: BB, Probs: BP); |
445 | return true; |
446 | } |
447 | |
448 | auto UnreachableProb = UR_TAKEN_PROB; |
449 | for (auto I : UnreachableIdxs) |
450 | if (UnreachableProb < BP[I]) { |
451 | BP[I] = UnreachableProb; |
452 | } |
453 | |
454 | // Sum of all edge probabilities must be 1.0. If we modified the probability |
455 | // of some edges then we must distribute the introduced difference over the |
456 | // reachable blocks. |
457 | // |
458 | // Proportional distribution: the relation between probabilities of the |
459 | // reachable edges is kept unchanged. That is for any reachable edges i and j: |
460 | // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => |
461 | // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K |
462 | // Where K is independent of i,j. |
463 | // newBP[i] == oldBP[i] * K |
464 | // We need to find K. |
465 | // Make sum of all reachables of the left and right parts: |
466 | // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) |
467 | // Sum of newBP must be equal to 1.0: |
468 | // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => |
469 | // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) |
470 | // Where sum_of_unreachable(newBP) is what has been just changed. |
471 | // Finally: |
472 | // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => |
473 | // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) |
474 | BranchProbability NewUnreachableSum = BranchProbability::getZero(); |
475 | for (auto I : UnreachableIdxs) |
476 | NewUnreachableSum += BP[I]; |
477 | |
478 | BranchProbability NewReachableSum = |
479 | BranchProbability::getOne() - NewUnreachableSum; |
480 | |
481 | BranchProbability OldReachableSum = BranchProbability::getZero(); |
482 | for (auto I : ReachableIdxs) |
483 | OldReachableSum += BP[I]; |
484 | |
485 | if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? |
486 | if (OldReachableSum.isZero()) { |
487 | // If all oldBP[i] are zeroes then the proportional distribution results |
488 | // in all zero probabilities and the error stays big. In this case we |
489 | // evenly spread NewReachableSum over the reachable edges. |
490 | BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); |
491 | for (auto I : ReachableIdxs) |
492 | BP[I] = PerEdge; |
493 | } else { |
494 | for (auto I : ReachableIdxs) { |
495 | // We use uint64_t to avoid double rounding error of the following |
496 | // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum |
497 | // The formula is taken from the private constructor |
498 | // BranchProbability(uint32_t Numerator, uint32_t Denominator) |
499 | uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * |
500 | BP[I].getNumerator(); |
501 | uint32_t Div = static_cast<uint32_t>( |
502 | divideNearest(Numerator: Mul, Denominator: OldReachableSum.getNumerator())); |
503 | BP[I] = BranchProbability::getRaw(N: Div); |
504 | } |
505 | } |
506 | } |
507 | |
508 | setEdgeProbability(Src: BB, Probs: BP); |
509 | |
510 | return true; |
511 | } |
512 | |
513 | // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison |
514 | // between two pointer or pointer and NULL will fail. |
515 | bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { |
516 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
517 | if (!BI || !BI->isConditional()) |
518 | return false; |
519 | |
520 | Value *Cond = BI->getCondition(); |
521 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
522 | if (!CI || !CI->isEquality()) |
523 | return false; |
524 | |
525 | Value *LHS = CI->getOperand(i_nocapture: 0); |
526 | |
527 | if (!LHS->getType()->isPointerTy()) |
528 | return false; |
529 | |
530 | assert(CI->getOperand(1)->getType()->isPointerTy()); |
531 | |
532 | auto Search = PointerTable.find(x: CI->getPredicate()); |
533 | if (Search == PointerTable.end()) |
534 | return false; |
535 | setEdgeProbability(Src: BB, Probs: Search->second); |
536 | return true; |
537 | } |
538 | |
539 | // Compute the unlikely successors to the block BB in the loop L, specifically |
540 | // those that are unlikely because this is a loop, and add them to the |
541 | // UnlikelyBlocks set. |
542 | static void |
543 | computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, |
544 | SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { |
545 | // Sometimes in a loop we have a branch whose condition is made false by |
546 | // taking it. This is typically something like |
547 | // int n = 0; |
548 | // while (...) { |
549 | // if (++n >= MAX) { |
550 | // n = 0; |
551 | // } |
552 | // } |
553 | // In this sort of situation taking the branch means that at the very least it |
554 | // won't be taken again in the next iteration of the loop, so we should |
555 | // consider it less likely than a typical branch. |
556 | // |
557 | // We detect this by looking back through the graph of PHI nodes that sets the |
558 | // value that the condition depends on, and seeing if we can reach a successor |
559 | // block which can be determined to make the condition false. |
560 | // |
561 | // FIXME: We currently consider unlikely blocks to be half as likely as other |
562 | // blocks, but if we consider the example above the likelyhood is actually |
563 | // 1/MAX. We could therefore be more precise in how unlikely we consider |
564 | // blocks to be, but it would require more careful examination of the form |
565 | // of the comparison expression. |
566 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
567 | if (!BI || !BI->isConditional()) |
568 | return; |
569 | |
570 | // Check if the branch is based on an instruction compared with a constant |
571 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
572 | if (!CI || !isa<Instruction>(Val: CI->getOperand(i_nocapture: 0)) || |
573 | !isa<Constant>(Val: CI->getOperand(i_nocapture: 1))) |
574 | return; |
575 | |
576 | // Either the instruction must be a PHI, or a chain of operations involving |
577 | // constants that ends in a PHI which we can then collapse into a single value |
578 | // if the PHI value is known. |
579 | Instruction *CmpLHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0)); |
580 | PHINode *CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
581 | Constant *CmpConst = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 1)); |
582 | // Collect the instructions until we hit a PHI |
583 | SmallVector<BinaryOperator *, 1> InstChain; |
584 | while (!CmpPHI && CmpLHS && isa<BinaryOperator>(Val: CmpLHS) && |
585 | isa<Constant>(Val: CmpLHS->getOperand(i: 1))) { |
586 | // Stop if the chain extends outside of the loop |
587 | if (!L->contains(Inst: CmpLHS)) |
588 | return; |
589 | InstChain.push_back(Elt: cast<BinaryOperator>(Val: CmpLHS)); |
590 | CmpLHS = dyn_cast<Instruction>(Val: CmpLHS->getOperand(i: 0)); |
591 | if (CmpLHS) |
592 | CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
593 | } |
594 | if (!CmpPHI || !L->contains(Inst: CmpPHI)) |
595 | return; |
596 | |
597 | // Trace the phi node to find all values that come from successors of BB |
598 | SmallPtrSet<PHINode*, 8> VisitedInsts; |
599 | SmallVector<PHINode*, 8> WorkList; |
600 | WorkList.push_back(Elt: CmpPHI); |
601 | VisitedInsts.insert(Ptr: CmpPHI); |
602 | while (!WorkList.empty()) { |
603 | PHINode *P = WorkList.pop_back_val(); |
604 | for (BasicBlock *B : P->blocks()) { |
605 | // Skip blocks that aren't part of the loop |
606 | if (!L->contains(BB: B)) |
607 | continue; |
608 | Value *V = P->getIncomingValueForBlock(BB: B); |
609 | // If the source is a PHI add it to the work list if we haven't |
610 | // already visited it. |
611 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
612 | if (VisitedInsts.insert(Ptr: PN).second) |
613 | WorkList.push_back(Elt: PN); |
614 | continue; |
615 | } |
616 | // If this incoming value is a constant and B is a successor of BB, then |
617 | // we can constant-evaluate the compare to see if it makes the branch be |
618 | // taken or not. |
619 | Constant *CmpLHSConst = dyn_cast<Constant>(Val: V); |
620 | if (!CmpLHSConst || !llvm::is_contained(Range: successors(BB), Element: B)) |
621 | continue; |
622 | // First collapse InstChain |
623 | const DataLayout &DL = BB->getDataLayout(); |
624 | for (Instruction *I : llvm::reverse(C&: InstChain)) { |
625 | CmpLHSConst = ConstantFoldBinaryOpOperands( |
626 | Opcode: I->getOpcode(), LHS: CmpLHSConst, RHS: cast<Constant>(Val: I->getOperand(i: 1)), DL); |
627 | if (!CmpLHSConst) |
628 | break; |
629 | } |
630 | if (!CmpLHSConst) |
631 | continue; |
632 | // Now constant-evaluate the compare |
633 | Constant *Result = ConstantFoldCompareInstOperands( |
634 | Predicate: CI->getPredicate(), LHS: CmpLHSConst, RHS: CmpConst, DL); |
635 | // If the result means we don't branch to the block then that block is |
636 | // unlikely. |
637 | if (Result && |
638 | ((Result->isZeroValue() && B == BI->getSuccessor(i: 0)) || |
639 | (Result->isOneValue() && B == BI->getSuccessor(i: 1)))) |
640 | UnlikelyBlocks.insert(Ptr: B); |
641 | } |
642 | } |
643 | } |
644 | |
645 | std::optional<uint32_t> |
646 | BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { |
647 | auto WeightIt = EstimatedBlockWeight.find(Val: BB); |
648 | if (WeightIt == EstimatedBlockWeight.end()) |
649 | return std::nullopt; |
650 | return WeightIt->second; |
651 | } |
652 | |
653 | std::optional<uint32_t> |
654 | BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { |
655 | auto WeightIt = EstimatedLoopWeight.find(Val: L); |
656 | if (WeightIt == EstimatedLoopWeight.end()) |
657 | return std::nullopt; |
658 | return WeightIt->second; |
659 | } |
660 | |
661 | std::optional<uint32_t> |
662 | BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { |
663 | // For edges entering a loop take weight of a loop rather than an individual |
664 | // block in the loop. |
665 | return isLoopEnteringEdge(Edge) |
666 | ? getEstimatedLoopWeight(L: Edge.second.getLoopData()) |
667 | : getEstimatedBlockWeight(BB: Edge.second.getBlock()); |
668 | } |
669 | |
670 | template <class IterT> |
671 | std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( |
672 | const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { |
673 | SmallVector<uint32_t, 4> Weights; |
674 | std::optional<uint32_t> MaxWeight; |
675 | for (const BasicBlock *DstBB : Successors) { |
676 | const LoopBlock DstLoopBB = getLoopBlock(BB: DstBB); |
677 | auto Weight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
678 | |
679 | if (!Weight) |
680 | return std::nullopt; |
681 | |
682 | if (!MaxWeight || *MaxWeight < *Weight) |
683 | MaxWeight = Weight; |
684 | } |
685 | |
686 | return MaxWeight; |
687 | } |
688 | |
689 | // Updates \p LoopBB's weight and returns true. If \p LoopBB has already |
690 | // an associated weight it is unchanged and false is returned. |
691 | // |
692 | // Please note by the algorithm the weight is not expected to change once set |
693 | // thus 'false' status is used to track visited blocks. |
694 | bool BranchProbabilityInfo::updateEstimatedBlockWeight( |
695 | LoopBlock &LoopBB, uint32_t BBWeight, |
696 | SmallVectorImpl<BasicBlock *> &BlockWorkList, |
697 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
698 | BasicBlock *BB = LoopBB.getBlock(); |
699 | |
700 | // In general, weight is assigned to a block when it has final value and |
701 | // can't/shouldn't be changed. However, there are cases when a block |
702 | // inherently has several (possibly "contradicting") weights. For example, |
703 | // "unwind" block may also contain "cold" call. In that case the first |
704 | // set weight is favored and all consequent weights are ignored. |
705 | if (!EstimatedBlockWeight.insert(KV: {BB, BBWeight}).second) |
706 | return false; |
707 | |
708 | for (BasicBlock *PredBlock : predecessors(BB)) { |
709 | LoopBlock PredLoop = getLoopBlock(BB: PredBlock); |
710 | // Add affected block/loop to a working list. |
711 | if (isLoopExitingEdge(Edge: {PredLoop, LoopBB})) { |
712 | if (!EstimatedLoopWeight.count(Val: PredLoop.getLoopData())) |
713 | LoopWorkList.push_back(Elt: PredLoop); |
714 | } else if (!EstimatedBlockWeight.count(Val: PredBlock)) |
715 | BlockWorkList.push_back(Elt: PredBlock); |
716 | } |
717 | return true; |
718 | } |
719 | |
720 | // Starting from \p BB traverse through dominator blocks and assign \p BBWeight |
721 | // to all such blocks that are post dominated by \BB. In other words to all |
722 | // blocks that the one is executed if and only if another one is executed. |
723 | // Importantly, we skip loops here for two reasons. First weights of blocks in |
724 | // a loop should be scaled by trip count (yet possibly unknown). Second there is |
725 | // no any value in doing that because that doesn't give any additional |
726 | // information regarding distribution of probabilities inside the loop. |
727 | // Exception is loop 'enter' and 'exit' edges that are handled in a special way |
728 | // at calcEstimatedHeuristics. |
729 | // |
730 | // In addition, \p WorkList is populated with basic blocks if at leas one |
731 | // successor has updated estimated weight. |
732 | void BranchProbabilityInfo::propagateEstimatedBlockWeight( |
733 | const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, |
734 | uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, |
735 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
736 | const BasicBlock *BB = LoopBB.getBlock(); |
737 | const auto *DTStartNode = DT->getNode(BB); |
738 | const auto *PDTStartNode = PDT->getNode(BB); |
739 | |
740 | // TODO: Consider propagating weight down the domination line as well. |
741 | for (const auto *DTNode = DTStartNode; DTNode != nullptr; |
742 | DTNode = DTNode->getIDom()) { |
743 | auto *DomBB = DTNode->getBlock(); |
744 | // Consider blocks which lie on one 'line'. |
745 | if (!PDT->dominates(A: PDTStartNode, B: PDT->getNode(BB: DomBB))) |
746 | // If BB doesn't post dominate DomBB it will not post dominate dominators |
747 | // of DomBB as well. |
748 | break; |
749 | |
750 | LoopBlock DomLoopBB = getLoopBlock(BB: DomBB); |
751 | const LoopEdge Edge{DomLoopBB, LoopBB}; |
752 | // Don't propagate weight to blocks belonging to different loops. |
753 | if (!isLoopEnteringExitingEdge(Edge)) { |
754 | if (!updateEstimatedBlockWeight(LoopBB&: DomLoopBB, BBWeight, BlockWorkList, |
755 | LoopWorkList)) |
756 | // If DomBB has weight set then all it's predecessors are already |
757 | // processed (since we propagate weight up to the top of IR each time). |
758 | break; |
759 | } else if (isLoopExitingEdge(Edge)) { |
760 | LoopWorkList.push_back(Elt: DomLoopBB); |
761 | } |
762 | } |
763 | } |
764 | |
765 | std::optional<uint32_t> |
766 | BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) { |
767 | // Returns true if \p BB has call marked with "NoReturn" attribute. |
768 | auto hasNoReturn = [&](const BasicBlock *BB) { |
769 | for (const auto &I : reverse(C: *BB)) |
770 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
771 | if (CI->hasFnAttr(Kind: Attribute::NoReturn)) |
772 | return true; |
773 | |
774 | return false; |
775 | }; |
776 | |
777 | // Important note regarding the order of checks. They are ordered by weight |
778 | // from lowest to highest. Doing that allows to avoid "unstable" results |
779 | // when several conditions heuristics can be applied simultaneously. |
780 | if (isa<UnreachableInst>(Val: BB->getTerminator()) || |
781 | // If this block is terminated by a call to |
782 | // @llvm.experimental.deoptimize then treat it like an unreachable |
783 | // since it is expected to practically never execute. |
784 | // TODO: Should we actually treat as never returning call? |
785 | BB->getTerminatingDeoptimizeCall()) |
786 | return hasNoReturn(BB) |
787 | ? static_cast<uint32_t>(BlockExecWeight::NORETURN) |
788 | : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); |
789 | |
790 | // Check if the block is an exception handling block. |
791 | if (BB->isEHPad()) |
792 | return static_cast<uint32_t>(BlockExecWeight::UNWIND); |
793 | |
794 | // Check if the block contains 'cold' call. |
795 | for (const auto &I : *BB) |
796 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
797 | if (CI->hasFnAttr(Kind: Attribute::Cold)) |
798 | return static_cast<uint32_t>(BlockExecWeight::COLD); |
799 | |
800 | return std::nullopt; |
801 | } |
802 | |
803 | // Does RPO traversal over all blocks in \p F and assigns weights to |
804 | // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its |
805 | // best to propagate the weight to up/down the IR. |
806 | void BranchProbabilityInfo::computeEestimateBlockWeight( |
807 | const Function &F, DominatorTree *DT, PostDominatorTree *PDT) { |
808 | SmallVector<BasicBlock *, 8> BlockWorkList; |
809 | SmallVector<LoopBlock, 8> LoopWorkList; |
810 | SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks; |
811 | |
812 | // By doing RPO we make sure that all predecessors already have weights |
813 | // calculated before visiting theirs successors. |
814 | ReversePostOrderTraversal<const Function *> RPOT(&F); |
815 | for (const auto *BB : RPOT) |
816 | if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) |
817 | // If we were able to find estimated weight for the block set it to this |
818 | // block and propagate up the IR. |
819 | propagateEstimatedBlockWeight(LoopBB: getLoopBlock(BB), DT, PDT, BBWeight: *BBWeight, |
820 | BlockWorkList, LoopWorkList); |
821 | |
822 | // BlockWorklist/LoopWorkList contains blocks/loops with at least one |
823 | // successor/exit having estimated weight. Try to propagate weight to such |
824 | // blocks/loops from successors/exits. |
825 | // Process loops and blocks. Order is not important. |
826 | do { |
827 | while (!LoopWorkList.empty()) { |
828 | const LoopBlock LoopBB = LoopWorkList.pop_back_val(); |
829 | const LoopData LD = LoopBB.getLoopData(); |
830 | if (EstimatedLoopWeight.count(Val: LD)) |
831 | continue; |
832 | |
833 | auto Res = LoopExitBlocks.try_emplace(Key: LD); |
834 | SmallVectorImpl<BasicBlock *> &Exits = Res.first->second; |
835 | if (Res.second) |
836 | getLoopExitBlocks(LB: LoopBB, Exits); |
837 | auto LoopWeight = getMaxEstimatedEdgeWeight( |
838 | SrcLoopBB: LoopBB, Successors: make_range(x: Exits.begin(), y: Exits.end())); |
839 | |
840 | if (LoopWeight) { |
841 | // If we never exit the loop then we can enter it once at maximum. |
842 | if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
843 | LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
844 | |
845 | EstimatedLoopWeight.insert(KV: {LD, *LoopWeight}); |
846 | // Add all blocks entering the loop into working list. |
847 | getLoopEnterBlocks(LB: LoopBB, Enters&: BlockWorkList); |
848 | } |
849 | } |
850 | |
851 | while (!BlockWorkList.empty()) { |
852 | // We can reach here only if BlockWorkList is not empty. |
853 | const BasicBlock *BB = BlockWorkList.pop_back_val(); |
854 | if (EstimatedBlockWeight.count(Val: BB)) |
855 | continue; |
856 | |
857 | // We take maximum over all weights of successors. In other words we take |
858 | // weight of "hot" path. In theory we can probably find a better function |
859 | // which gives higher accuracy results (comparing to "maximum") but I |
860 | // can't |
861 | // think of any right now. And I doubt it will make any difference in |
862 | // practice. |
863 | const LoopBlock LoopBB = getLoopBlock(BB); |
864 | auto MaxWeight = getMaxEstimatedEdgeWeight(SrcLoopBB: LoopBB, Successors: successors(BB)); |
865 | |
866 | if (MaxWeight) |
867 | propagateEstimatedBlockWeight(LoopBB, DT, PDT, BBWeight: *MaxWeight, |
868 | BlockWorkList, LoopWorkList); |
869 | } |
870 | } while (!BlockWorkList.empty() || !LoopWorkList.empty()); |
871 | } |
872 | |
873 | // Calculate edge probabilities based on block's estimated weight. |
874 | // Note that gathered weights were not scaled for loops. Thus edges entering |
875 | // and exiting loops requires special processing. |
876 | bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { |
877 | assert(BB->getTerminator()->getNumSuccessors() > 1 && |
878 | "expected more than one successor!" ); |
879 | |
880 | const LoopBlock LoopBB = getLoopBlock(BB); |
881 | |
882 | SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; |
883 | uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; |
884 | if (LoopBB.getLoop()) |
885 | computeUnlikelySuccessors(BB, L: LoopBB.getLoop(), UnlikelyBlocks); |
886 | |
887 | // Changed to 'true' if at least one successor has estimated weight. |
888 | bool FoundEstimatedWeight = false; |
889 | SmallVector<uint32_t, 4> SuccWeights; |
890 | uint64_t TotalWeight = 0; |
891 | // Go over all successors of BB and put their weights into SuccWeights. |
892 | for (const BasicBlock *SuccBB : successors(BB)) { |
893 | std::optional<uint32_t> Weight; |
894 | const LoopBlock SuccLoopBB = getLoopBlock(BB: SuccBB); |
895 | const LoopEdge Edge{LoopBB, SuccLoopBB}; |
896 | |
897 | Weight = getEstimatedEdgeWeight(Edge); |
898 | |
899 | if (isLoopExitingEdge(Edge) && |
900 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
901 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
902 | // Scale down loop exiting weight by trip count. |
903 | Weight = std::max( |
904 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
905 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / |
906 | TC); |
907 | } |
908 | bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(Ptr: SuccBB); |
909 | if (IsUnlikelyEdge && |
910 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
911 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
912 | // 'Unlikely' blocks have twice lower weight. |
913 | Weight = std::max( |
914 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
915 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2); |
916 | } |
917 | |
918 | if (Weight) |
919 | FoundEstimatedWeight = true; |
920 | |
921 | auto WeightVal = |
922 | Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)); |
923 | TotalWeight += WeightVal; |
924 | SuccWeights.push_back(Elt: WeightVal); |
925 | } |
926 | |
927 | // If non of blocks have estimated weight bail out. |
928 | // If TotalWeight is 0 that means weight of each successor is 0 as well and |
929 | // equally likely. Bail out early to not deal with devision by zero. |
930 | if (!FoundEstimatedWeight || TotalWeight == 0) |
931 | return false; |
932 | |
933 | assert(SuccWeights.size() == succ_size(BB) && "Missed successor?" ); |
934 | const unsigned SuccCount = SuccWeights.size(); |
935 | |
936 | // If the sum of weights does not fit in 32 bits, scale every weight down |
937 | // accordingly. |
938 | if (TotalWeight > UINT32_MAX) { |
939 | uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; |
940 | TotalWeight = 0; |
941 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
942 | SuccWeights[Idx] /= ScalingFactor; |
943 | if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) |
944 | SuccWeights[Idx] = |
945 | static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
946 | TotalWeight += SuccWeights[Idx]; |
947 | } |
948 | assert(TotalWeight <= UINT32_MAX && "Total weight overflows" ); |
949 | } |
950 | |
951 | // Finally set probabilities to edges according to estimated block weights. |
952 | SmallVector<BranchProbability, 4> EdgeProbabilities( |
953 | SuccCount, BranchProbability::getUnknown()); |
954 | |
955 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
956 | EdgeProbabilities[Idx] = |
957 | BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); |
958 | } |
959 | setEdgeProbability(Src: BB, Probs: EdgeProbabilities); |
960 | return true; |
961 | } |
962 | |
963 | bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, |
964 | const TargetLibraryInfo *TLI) { |
965 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
966 | if (!BI || !BI->isConditional()) |
967 | return false; |
968 | |
969 | Value *Cond = BI->getCondition(); |
970 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
971 | if (!CI) |
972 | return false; |
973 | |
974 | auto GetConstantInt = [](Value *V) { |
975 | if (auto *I = dyn_cast<BitCastInst>(Val: V)) |
976 | return dyn_cast<ConstantInt>(Val: I->getOperand(i_nocapture: 0)); |
977 | return dyn_cast<ConstantInt>(Val: V); |
978 | }; |
979 | |
980 | Value *RHS = CI->getOperand(i_nocapture: 1); |
981 | ConstantInt *CV = GetConstantInt(RHS); |
982 | if (!CV) |
983 | return false; |
984 | |
985 | // If the LHS is the result of AND'ing a value with a single bit bitmask, |
986 | // we don't have information about probabilities. |
987 | if (Instruction *LHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0))) |
988 | if (LHS->getOpcode() == Instruction::And) |
989 | if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(i: 1))) |
990 | if (AndRHS->getValue().isPowerOf2()) |
991 | return false; |
992 | |
993 | // Check if the LHS is the return value of a library function |
994 | LibFunc Func = NumLibFuncs; |
995 | if (TLI) |
996 | if (CallInst *Call = dyn_cast<CallInst>(Val: CI->getOperand(i_nocapture: 0))) |
997 | if (Function *CalledFn = Call->getCalledFunction()) |
998 | TLI->getLibFunc(FDecl: *CalledFn, F&: Func); |
999 | |
1000 | ProbabilityTable::const_iterator Search; |
1001 | if (Func == LibFunc_strcasecmp || |
1002 | Func == LibFunc_strcmp || |
1003 | Func == LibFunc_strncasecmp || |
1004 | Func == LibFunc_strncmp || |
1005 | Func == LibFunc_memcmp || |
1006 | Func == LibFunc_bcmp) { |
1007 | Search = ICmpWithLibCallTable.find(x: CI->getPredicate()); |
1008 | if (Search == ICmpWithLibCallTable.end()) |
1009 | return false; |
1010 | } else if (CV->isZero()) { |
1011 | Search = ICmpWithZeroTable.find(x: CI->getPredicate()); |
1012 | if (Search == ICmpWithZeroTable.end()) |
1013 | return false; |
1014 | } else if (CV->isOne()) { |
1015 | Search = ICmpWithOneTable.find(x: CI->getPredicate()); |
1016 | if (Search == ICmpWithOneTable.end()) |
1017 | return false; |
1018 | } else if (CV->isMinusOne()) { |
1019 | Search = ICmpWithMinusOneTable.find(x: CI->getPredicate()); |
1020 | if (Search == ICmpWithMinusOneTable.end()) |
1021 | return false; |
1022 | } else { |
1023 | return false; |
1024 | } |
1025 | |
1026 | setEdgeProbability(Src: BB, Probs: Search->second); |
1027 | return true; |
1028 | } |
1029 | |
1030 | bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { |
1031 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
1032 | if (!BI || !BI->isConditional()) |
1033 | return false; |
1034 | |
1035 | Value *Cond = BI->getCondition(); |
1036 | FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cond); |
1037 | if (!FCmp) |
1038 | return false; |
1039 | |
1040 | ProbabilityList ProbList; |
1041 | if (FCmp->isEquality()) { |
1042 | ProbList = !FCmp->isTrueWhenEqual() ? |
1043 | // f1 == f2 -> Unlikely |
1044 | ProbabilityList({FPTakenProb, FPUntakenProb}) : |
1045 | // f1 != f2 -> Likely |
1046 | ProbabilityList({FPUntakenProb, FPTakenProb}); |
1047 | } else { |
1048 | auto Search = FCmpTable.find(x: FCmp->getPredicate()); |
1049 | if (Search == FCmpTable.end()) |
1050 | return false; |
1051 | ProbList = Search->second; |
1052 | } |
1053 | |
1054 | setEdgeProbability(Src: BB, Probs: ProbList); |
1055 | return true; |
1056 | } |
1057 | |
1058 | void BranchProbabilityInfo::releaseMemory() { |
1059 | Probs.clear(); |
1060 | Handles.clear(); |
1061 | } |
1062 | |
1063 | bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, |
1064 | FunctionAnalysisManager::Invalidator &) { |
1065 | // Check whether the analysis, all analyses on functions, or the function's |
1066 | // CFG have been preserved. |
1067 | auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); |
1068 | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || |
1069 | PAC.preservedSet<CFGAnalyses>()); |
1070 | } |
1071 | |
1072 | void BranchProbabilityInfo::print(raw_ostream &OS) const { |
1073 | OS << "---- Branch Probabilities ----\n" ; |
1074 | // We print the probabilities from the last function the analysis ran over, |
1075 | // or the function it is currently running over. |
1076 | assert(LastF && "Cannot print prior to running over a function" ); |
1077 | for (const auto &BI : *LastF) { |
1078 | for (const BasicBlock *Succ : successors(BB: &BI)) |
1079 | printEdgeProbability(OS&: OS << " " , Src: &BI, Dst: Succ); |
1080 | } |
1081 | } |
1082 | |
1083 | bool BranchProbabilityInfo:: |
1084 | isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { |
1085 | // Hot probability is at least 4/5 = 80% |
1086 | // FIXME: Compare against a static "hot" BranchProbability. |
1087 | return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); |
1088 | } |
1089 | |
1090 | /// Get the raw edge probability for the edge. If can't find it, return a |
1091 | /// default probability 1/N where N is the number of successors. Here an edge is |
1092 | /// specified using PredBlock and an |
1093 | /// index to the successors. |
1094 | BranchProbability |
1095 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1096 | unsigned IndexInSuccessors) const { |
1097 | auto I = Probs.find(Val: std::make_pair(x&: Src, y&: IndexInSuccessors)); |
1098 | assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == |
1099 | (Probs.end() == I) && |
1100 | "Probability for I-th successor must always be defined along with the " |
1101 | "probability for the first successor" ); |
1102 | |
1103 | if (I != Probs.end()) |
1104 | return I->second; |
1105 | |
1106 | return {1, static_cast<uint32_t>(succ_size(BB: Src))}; |
1107 | } |
1108 | |
1109 | BranchProbability |
1110 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1111 | const_succ_iterator Dst) const { |
1112 | return getEdgeProbability(Src, IndexInSuccessors: Dst.getSuccessorIndex()); |
1113 | } |
1114 | |
1115 | /// Get the raw edge probability calculated for the block pair. This returns the |
1116 | /// sum of all raw edge probabilities from Src to Dst. |
1117 | BranchProbability |
1118 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1119 | const BasicBlock *Dst) const { |
1120 | if (!Probs.count(Val: std::make_pair(x&: Src, y: 0))) |
1121 | return BranchProbability(llvm::count(Range: successors(BB: Src), Element: Dst), succ_size(BB: Src)); |
1122 | |
1123 | auto Prob = BranchProbability::getZero(); |
1124 | for (const_succ_iterator I = succ_begin(BB: Src), E = succ_end(BB: Src); I != E; ++I) |
1125 | if (*I == Dst) |
1126 | Prob += Probs.find(Val: std::make_pair(x&: Src, y: I.getSuccessorIndex()))->second; |
1127 | |
1128 | return Prob; |
1129 | } |
1130 | |
1131 | /// Set the edge probability for all edges at once. |
1132 | void BranchProbabilityInfo::setEdgeProbability( |
1133 | const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { |
1134 | assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); |
1135 | eraseBlock(BB: Src); // Erase stale data if any. |
1136 | if (Probs.size() == 0) |
1137 | return; // Nothing to set. |
1138 | |
1139 | Handles.insert(V: BasicBlockCallbackVH(Src, this)); |
1140 | uint64_t TotalNumerator = 0; |
1141 | for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { |
1142 | this->Probs[std::make_pair(x&: Src, y&: SuccIdx)] = Probs[SuccIdx]; |
1143 | LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx |
1144 | << " successor probability to " << Probs[SuccIdx] |
1145 | << "\n" ); |
1146 | TotalNumerator += Probs[SuccIdx].getNumerator(); |
1147 | } |
1148 | |
1149 | // Because of rounding errors the total probability cannot be checked to be |
1150 | // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. |
1151 | // Instead, every single probability in Probs must be as accurate as possible. |
1152 | // This results in error 1/denominator at most, thus the total absolute error |
1153 | // should be within Probs.size / BranchProbability::getDenominator. |
1154 | assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); |
1155 | assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); |
1156 | (void)TotalNumerator; |
1157 | } |
1158 | |
1159 | void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, |
1160 | BasicBlock *Dst) { |
1161 | eraseBlock(BB: Dst); // Erase stale data if any. |
1162 | unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); |
1163 | assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); |
1164 | if (NumSuccessors == 0) |
1165 | return; // Nothing to set. |
1166 | if (!this->Probs.contains(Val: std::make_pair(x&: Src, y: 0))) |
1167 | return; // No probability is set for edges from Src. Keep the same for Dst. |
1168 | |
1169 | Handles.insert(V: BasicBlockCallbackVH(Dst, this)); |
1170 | for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { |
1171 | auto Prob = this->Probs[std::make_pair(x&: Src, y&: SuccIdx)]; |
1172 | this->Probs[std::make_pair(x&: Dst, y&: SuccIdx)] = Prob; |
1173 | LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx |
1174 | << " successor probability to " << Prob << "\n" ); |
1175 | } |
1176 | } |
1177 | |
1178 | void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) { |
1179 | assert(Src->getTerminator()->getNumSuccessors() == 2); |
1180 | if (!Probs.contains(Val: std::make_pair(x&: Src, y: 0))) |
1181 | return; // No probability is set for edges from Src |
1182 | assert(Probs.contains(std::make_pair(Src, 1))); |
1183 | std::swap(a&: Probs[std::make_pair(x&: Src, y: 0)], b&: Probs[std::make_pair(x&: Src, y: 1)]); |
1184 | } |
1185 | |
1186 | raw_ostream & |
1187 | BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, |
1188 | const BasicBlock *Src, |
1189 | const BasicBlock *Dst) const { |
1190 | const BranchProbability Prob = getEdgeProbability(Src, Dst); |
1191 | OS << "edge " ; |
1192 | Src->printAsOperand(O&: OS, PrintType: false, M: Src->getModule()); |
1193 | OS << " -> " ; |
1194 | Dst->printAsOperand(O&: OS, PrintType: false, M: Dst->getModule()); |
1195 | OS << " probability is " << Prob |
1196 | << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n" ); |
1197 | |
1198 | return OS; |
1199 | } |
1200 | |
1201 | void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { |
1202 | LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n" ); |
1203 | |
1204 | // Note that we cannot use successors of BB because the terminator of BB may |
1205 | // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. |
1206 | // Instead we remove prob data for the block by iterating successors by their |
1207 | // indices from 0 till the last which exists. There could not be prob data for |
1208 | // a pair (BB, N) if there is no data for (BB, N-1) because the data is always |
1209 | // set for all successors from 0 to M at once by the method |
1210 | // setEdgeProbability(). |
1211 | Handles.erase(V: BasicBlockCallbackVH(BB, this)); |
1212 | for (unsigned I = 0;; ++I) { |
1213 | auto MapI = Probs.find(Val: std::make_pair(x&: BB, y&: I)); |
1214 | if (MapI == Probs.end()) { |
1215 | assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && |
1216 | "Must be no more successors" ); |
1217 | return; |
1218 | } |
1219 | Probs.erase(I: MapI); |
1220 | } |
1221 | } |
1222 | |
1223 | void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, |
1224 | const TargetLibraryInfo *TLI, |
1225 | DominatorTree *DT, |
1226 | PostDominatorTree *PDT) { |
1227 | LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() |
1228 | << " ----\n\n" ); |
1229 | LastF = &F; // Store the last function we ran on for printing. |
1230 | LI = &LoopI; |
1231 | |
1232 | SccI = std::make_unique<SccInfo>(args: F); |
1233 | |
1234 | assert(EstimatedBlockWeight.empty()); |
1235 | assert(EstimatedLoopWeight.empty()); |
1236 | |
1237 | std::unique_ptr<DominatorTree> DTPtr; |
1238 | std::unique_ptr<PostDominatorTree> PDTPtr; |
1239 | |
1240 | if (!DT) { |
1241 | DTPtr = std::make_unique<DominatorTree>(args&: const_cast<Function &>(F)); |
1242 | DT = DTPtr.get(); |
1243 | } |
1244 | |
1245 | if (!PDT) { |
1246 | PDTPtr = std::make_unique<PostDominatorTree>(args&: const_cast<Function &>(F)); |
1247 | PDT = PDTPtr.get(); |
1248 | } |
1249 | |
1250 | computeEestimateBlockWeight(F, DT, PDT); |
1251 | |
1252 | // Walk the basic blocks in post-order so that we can build up state about |
1253 | // the successors of a block iteratively. |
1254 | for (const auto *BB : post_order(G: &F.getEntryBlock())) { |
1255 | LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() |
1256 | << "\n" ); |
1257 | // If there is no at least two successors, no sense to set probability. |
1258 | if (BB->getTerminator()->getNumSuccessors() < 2) |
1259 | continue; |
1260 | if (calcMetadataWeights(BB)) |
1261 | continue; |
1262 | if (calcEstimatedHeuristics(BB)) |
1263 | continue; |
1264 | if (calcPointerHeuristics(BB)) |
1265 | continue; |
1266 | if (calcZeroHeuristics(BB, TLI)) |
1267 | continue; |
1268 | if (calcFloatingPointHeuristics(BB)) |
1269 | continue; |
1270 | } |
1271 | |
1272 | EstimatedLoopWeight.clear(); |
1273 | EstimatedBlockWeight.clear(); |
1274 | SccI.reset(); |
1275 | |
1276 | if (PrintBranchProb && (PrintBranchProbFuncName.empty() || |
1277 | F.getName() == PrintBranchProbFuncName)) { |
1278 | print(OS&: dbgs()); |
1279 | } |
1280 | } |
1281 | |
1282 | void BranchProbabilityInfoWrapperPass::getAnalysisUsage( |
1283 | AnalysisUsage &AU) const { |
1284 | // We require DT so it's available when LI is available. The LI updating code |
1285 | // asserts that DT is also present so if we don't make sure that we have DT |
1286 | // here, that assert will trigger. |
1287 | AU.addRequired<DominatorTreeWrapperPass>(); |
1288 | AU.addRequired<LoopInfoWrapperPass>(); |
1289 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
1290 | AU.addRequired<DominatorTreeWrapperPass>(); |
1291 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
1292 | AU.setPreservesAll(); |
1293 | } |
1294 | |
1295 | bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { |
1296 | const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
1297 | const TargetLibraryInfo &TLI = |
1298 | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
1299 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1300 | PostDominatorTree &PDT = |
1301 | getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
1302 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
1303 | return false; |
1304 | } |
1305 | |
1306 | void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } |
1307 | |
1308 | void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, |
1309 | const Module *) const { |
1310 | BPI.print(OS); |
1311 | } |
1312 | |
1313 | AnalysisKey BranchProbabilityAnalysis::Key; |
1314 | BranchProbabilityInfo |
1315 | BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { |
1316 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
1317 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
1318 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
1319 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
1320 | BranchProbabilityInfo BPI; |
1321 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
1322 | return BPI; |
1323 | } |
1324 | |
1325 | PreservedAnalyses |
1326 | BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
1327 | OS << "Printing analysis 'Branch Probability Analysis' for function '" |
1328 | << F.getName() << "':\n" ; |
1329 | AM.getResult<BranchProbabilityAnalysis>(IR&: F).print(OS); |
1330 | return PreservedAnalyses::all(); |
1331 | } |
1332 | |