1 | //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Loops should be simplified before this analysis. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
14 | #include "llvm/ADT/PostOrderIterator.h" |
15 | #include "llvm/ADT/SCCIterator.h" |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Analysis/ConstantFolding.h" |
19 | #include "llvm/Analysis/LoopInfo.h" |
20 | #include "llvm/Analysis/PostDominators.h" |
21 | #include "llvm/Analysis/TargetLibraryInfo.h" |
22 | #include "llvm/IR/Attributes.h" |
23 | #include "llvm/IR/BasicBlock.h" |
24 | #include "llvm/IR/CFG.h" |
25 | #include "llvm/IR/Constants.h" |
26 | #include "llvm/IR/Dominators.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/IR/InstrTypes.h" |
29 | #include "llvm/IR/Instruction.h" |
30 | #include "llvm/IR/Instructions.h" |
31 | #include "llvm/IR/LLVMContext.h" |
32 | #include "llvm/IR/Metadata.h" |
33 | #include "llvm/IR/PassManager.h" |
34 | #include "llvm/IR/ProfDataUtils.h" |
35 | #include "llvm/IR/Type.h" |
36 | #include "llvm/IR/Value.h" |
37 | #include "llvm/InitializePasses.h" |
38 | #include "llvm/Pass.h" |
39 | #include "llvm/Support/BranchProbability.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/CommandLine.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/raw_ostream.h" |
44 | #include <cassert> |
45 | #include <cstdint> |
46 | #include <map> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "branch-prob" |
52 | |
53 | static cl::opt<bool> PrintBranchProb( |
54 | "print-bpi" , cl::init(Val: false), cl::Hidden, |
55 | cl::desc("Print the branch probability info." )); |
56 | |
57 | static cl::opt<std::string> PrintBranchProbFuncName( |
58 | "print-bpi-func-name" , cl::Hidden, |
59 | cl::desc("The option to specify the name of the function " |
60 | "whose branch probability info is printed." )); |
61 | |
62 | INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob" , |
63 | "Branch Probability Analysis" , false, true) |
64 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
65 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
66 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
67 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
68 | INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob" , |
69 | "Branch Probability Analysis" , false, true) |
70 | |
71 | BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() |
72 | : FunctionPass(ID) {} |
73 | |
74 | char BranchProbabilityInfoWrapperPass::ID = 0; |
75 | |
76 | // Weights are for internal use only. They are used by heuristics to help to |
77 | // estimate edges' probability. Example: |
78 | // |
79 | // Using "Loop Branch Heuristics" we predict weights of edges for the |
80 | // block BB2. |
81 | // ... |
82 | // | |
83 | // V |
84 | // BB1<-+ |
85 | // | | |
86 | // | | (Weight = 124) |
87 | // V | |
88 | // BB2--+ |
89 | // | |
90 | // | (Weight = 4) |
91 | // V |
92 | // BB3 |
93 | // |
94 | // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 |
95 | // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 |
96 | static const uint32_t LBH_TAKEN_WEIGHT = 124; |
97 | static const uint32_t LBH_NONTAKEN_WEIGHT = 4; |
98 | |
99 | /// Unreachable-terminating branch taken probability. |
100 | /// |
101 | /// This is the probability for a branch being taken to a block that terminates |
102 | /// (eventually) in unreachable. These are predicted as unlikely as possible. |
103 | /// All reachable probability will proportionally share the remaining part. |
104 | static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(N: 1); |
105 | |
106 | /// Heuristics and lookup tables for non-loop branches: |
107 | /// Pointer Heuristics (PH) |
108 | static const uint32_t PH_TAKEN_WEIGHT = 20; |
109 | static const uint32_t PH_NONTAKEN_WEIGHT = 12; |
110 | static const BranchProbability |
111 | PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
112 | static const BranchProbability |
113 | PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); |
114 | |
115 | using ProbabilityList = SmallVector<BranchProbability>; |
116 | using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; |
117 | |
118 | /// Pointer comparisons: |
119 | static const ProbabilityTable PointerTable{ |
120 | {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely |
121 | {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely |
122 | }; |
123 | |
124 | /// Zero Heuristics (ZH) |
125 | static const uint32_t ZH_TAKEN_WEIGHT = 20; |
126 | static const uint32_t ZH_NONTAKEN_WEIGHT = 12; |
127 | static const BranchProbability |
128 | ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
129 | static const BranchProbability |
130 | ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); |
131 | |
132 | /// Integer compares with 0: |
133 | static const ProbabilityTable ICmpWithZeroTable{ |
134 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely |
135 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely |
136 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely |
137 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely |
138 | }; |
139 | |
140 | /// Integer compares with -1: |
141 | static const ProbabilityTable ICmpWithMinusOneTable{ |
142 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely |
143 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely |
144 | // InstCombine canonicalizes X >= 0 into X > -1 |
145 | {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely |
146 | }; |
147 | |
148 | /// Integer compares with 1: |
149 | static const ProbabilityTable ICmpWithOneTable{ |
150 | // InstCombine canonicalizes X <= 0 into X < 1 |
151 | {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely |
152 | }; |
153 | |
154 | /// strcmp and similar functions return zero, negative, or positive, if the |
155 | /// first string is equal, less, or greater than the second. We consider it |
156 | /// likely that the strings are not equal, so a comparison with zero is |
157 | /// probably false, but also a comparison with any other number is also |
158 | /// probably false given that what exactly is returned for nonzero values is |
159 | /// not specified. Any kind of comparison other than equality we know |
160 | /// nothing about. |
161 | static const ProbabilityTable ICmpWithLibCallTable{ |
162 | {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, |
163 | {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, |
164 | }; |
165 | |
166 | // Floating-Point Heuristics (FPH) |
167 | static const uint32_t FPH_TAKEN_WEIGHT = 20; |
168 | static const uint32_t FPH_NONTAKEN_WEIGHT = 12; |
169 | |
170 | /// This is the probability for an ordered floating point comparison. |
171 | static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; |
172 | /// This is the probability for an unordered floating point comparison, it means |
173 | /// one or two of the operands are NaN. Usually it is used to test for an |
174 | /// exceptional case, so the result is unlikely. |
175 | static const uint32_t FPH_UNO_WEIGHT = 1; |
176 | |
177 | static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, |
178 | FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
179 | static const BranchProbability |
180 | FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); |
181 | static const BranchProbability |
182 | FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
183 | static const BranchProbability |
184 | FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); |
185 | |
186 | /// Floating-Point compares: |
187 | static const ProbabilityTable FCmpTable{ |
188 | {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely |
189 | {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely |
190 | }; |
191 | |
192 | /// Set of dedicated "absolute" execution weights for a block. These weights are |
193 | /// meaningful relative to each other and their derivatives only. |
194 | enum class BlockExecWeight : std::uint32_t { |
195 | /// Special weight used for cases with exact zero probability. |
196 | ZERO = 0x0, |
197 | /// Minimal possible non zero weight. |
198 | LOWEST_NON_ZERO = 0x1, |
199 | /// Weight to an 'unreachable' block. |
200 | UNREACHABLE = ZERO, |
201 | /// Weight to a block containing non returning call. |
202 | NORETURN = LOWEST_NON_ZERO, |
203 | /// Weight to 'unwind' block of an invoke instruction. |
204 | UNWIND = LOWEST_NON_ZERO, |
205 | /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked |
206 | /// with attribute 'cold'. |
207 | COLD = 0xffff, |
208 | /// Default weight is used in cases when there is no dedicated execution |
209 | /// weight set. It is not propagated through the domination line either. |
210 | DEFAULT = 0xfffff |
211 | }; |
212 | |
213 | BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { |
214 | // Record SCC numbers of blocks in the CFG to identify irreducible loops. |
215 | // FIXME: We could only calculate this if the CFG is known to be irreducible |
216 | // (perhaps cache this info in LoopInfo if we can easily calculate it there?). |
217 | int SccNum = 0; |
218 | for (scc_iterator<const Function *> It = scc_begin(G: &F); !It.isAtEnd(); |
219 | ++It, ++SccNum) { |
220 | // Ignore single-block SCCs since they either aren't loops or LoopInfo will |
221 | // catch them. |
222 | const std::vector<const BasicBlock *> &Scc = *It; |
223 | if (Scc.size() == 1) |
224 | continue; |
225 | |
226 | LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":" ); |
227 | for (const auto *BB : Scc) { |
228 | LLVM_DEBUG(dbgs() << " " << BB->getName()); |
229 | SccNums[BB] = SccNum; |
230 | calculateSccBlockType(BB, SccNum); |
231 | } |
232 | LLVM_DEBUG(dbgs() << "\n" ); |
233 | } |
234 | } |
235 | |
236 | int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { |
237 | auto SccIt = SccNums.find(Val: BB); |
238 | if (SccIt == SccNums.end()) |
239 | return -1; |
240 | return SccIt->second; |
241 | } |
242 | |
243 | void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( |
244 | int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { |
245 | |
246 | for (auto MapIt : SccBlocks[SccNum]) { |
247 | const auto *BB = MapIt.first; |
248 | if (isSCCHeader(BB, SccNum)) |
249 | for (const auto *Pred : predecessors(BB)) |
250 | if (getSCCNum(BB: Pred) != SccNum) |
251 | Enters.push_back(Elt: const_cast<BasicBlock *>(BB)); |
252 | } |
253 | } |
254 | |
255 | void BranchProbabilityInfo::SccInfo::getSccExitBlocks( |
256 | int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { |
257 | for (auto MapIt : SccBlocks[SccNum]) { |
258 | const auto *BB = MapIt.first; |
259 | if (isSCCExitingBlock(BB, SccNum)) |
260 | for (const auto *Succ : successors(BB)) |
261 | if (getSCCNum(BB: Succ) != SccNum) |
262 | Exits.push_back(Elt: const_cast<BasicBlock *>(Succ)); |
263 | } |
264 | } |
265 | |
266 | uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, |
267 | int SccNum) const { |
268 | assert(getSCCNum(BB) == SccNum); |
269 | |
270 | assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC" ); |
271 | const auto &SccBlockTypes = SccBlocks[SccNum]; |
272 | |
273 | auto It = SccBlockTypes.find(Val: BB); |
274 | if (It != SccBlockTypes.end()) { |
275 | return It->second; |
276 | } |
277 | return Inner; |
278 | } |
279 | |
280 | void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, |
281 | int SccNum) { |
282 | assert(getSCCNum(BB) == SccNum); |
283 | uint32_t BlockType = Inner; |
284 | |
285 | if (llvm::any_of(Range: predecessors(BB), P: [&](const BasicBlock *Pred) { |
286 | // Consider any block that is an entry point to the SCC as |
287 | // a header. |
288 | return getSCCNum(BB: Pred) != SccNum; |
289 | })) |
290 | BlockType |= Header; |
291 | |
292 | if (llvm::any_of(Range: successors(BB), P: [&](const BasicBlock *Succ) { |
293 | return getSCCNum(BB: Succ) != SccNum; |
294 | })) |
295 | BlockType |= Exiting; |
296 | |
297 | // Lazily compute the set of headers for a given SCC and cache the results |
298 | // in the SccHeaderMap. |
299 | if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) |
300 | SccBlocks.resize(new_size: SccNum + 1); |
301 | auto &SccBlockTypes = SccBlocks[SccNum]; |
302 | |
303 | if (BlockType != Inner) { |
304 | bool IsInserted; |
305 | std::tie(args: std::ignore, args&: IsInserted) = |
306 | SccBlockTypes.insert(KV: std::make_pair(x&: BB, y&: BlockType)); |
307 | assert(IsInserted && "Duplicated block in SCC" ); |
308 | } |
309 | } |
310 | |
311 | BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, |
312 | const LoopInfo &LI, |
313 | const SccInfo &SccI) |
314 | : BB(BB) { |
315 | LD.first = LI.getLoopFor(BB); |
316 | if (!LD.first) { |
317 | LD.second = SccI.getSCCNum(BB); |
318 | } |
319 | } |
320 | |
321 | bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { |
322 | const auto &SrcBlock = Edge.first; |
323 | const auto &DstBlock = Edge.second; |
324 | return (DstBlock.getLoop() && |
325 | !DstBlock.getLoop()->contains(L: SrcBlock.getLoop())) || |
326 | // Assume that SCCs can't be nested. |
327 | (DstBlock.getSccNum() != -1 && |
328 | SrcBlock.getSccNum() != DstBlock.getSccNum()); |
329 | } |
330 | |
331 | bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { |
332 | return isLoopEnteringEdge(Edge: {Edge.second, Edge.first}); |
333 | } |
334 | |
335 | bool BranchProbabilityInfo::isLoopEnteringExitingEdge( |
336 | const LoopEdge &Edge) const { |
337 | return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); |
338 | } |
339 | |
340 | bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { |
341 | const auto &SrcBlock = Edge.first; |
342 | const auto &DstBlock = Edge.second; |
343 | return SrcBlock.belongsToSameLoop(LB: DstBlock) && |
344 | ((DstBlock.getLoop() && |
345 | DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || |
346 | (DstBlock.getSccNum() != -1 && |
347 | SccI->isSCCHeader(BB: DstBlock.getBlock(), SccNum: DstBlock.getSccNum()))); |
348 | } |
349 | |
350 | void BranchProbabilityInfo::getLoopEnterBlocks( |
351 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { |
352 | if (LB.getLoop()) { |
353 | auto * = LB.getLoop()->getHeader(); |
354 | Enters.append(in_start: pred_begin(BB: Header), in_end: pred_end(BB: Header)); |
355 | } else { |
356 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
357 | SccI->getSccEnterBlocks(SccNum: LB.getSccNum(), Enters); |
358 | } |
359 | } |
360 | |
361 | void BranchProbabilityInfo::getLoopExitBlocks( |
362 | const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { |
363 | if (LB.getLoop()) { |
364 | LB.getLoop()->getExitBlocks(ExitBlocks&: Exits); |
365 | } else { |
366 | assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?" ); |
367 | SccI->getSccExitBlocks(SccNum: LB.getSccNum(), Exits); |
368 | } |
369 | } |
370 | |
371 | // Propagate existing explicit probabilities from either profile data or |
372 | // 'expect' intrinsic processing. Examine metadata against unreachable |
373 | // heuristic. The probability of the edge coming to unreachable block is |
374 | // set to min of metadata and unreachable heuristic. |
375 | bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { |
376 | const Instruction *TI = BB->getTerminator(); |
377 | assert(TI->getNumSuccessors() > 1 && "expected more than one successor!" ); |
378 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || isa<IndirectBrInst>(Val: TI) || |
379 | isa<InvokeInst>(Val: TI) || isa<CallBrInst>(Val: TI))) |
380 | return false; |
381 | |
382 | MDNode *WeightsNode = getValidBranchWeightMDNode(I: *TI); |
383 | if (!WeightsNode) |
384 | return false; |
385 | |
386 | // Check that the number of successors is manageable. |
387 | assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors" ); |
388 | |
389 | // Build up the final weights that will be used in a temporary buffer. |
390 | // Compute the sum of all weights to later decide whether they need to |
391 | // be scaled to fit in 32 bits. |
392 | uint64_t WeightSum = 0; |
393 | SmallVector<uint32_t, 2> Weights; |
394 | SmallVector<unsigned, 2> UnreachableIdxs; |
395 | SmallVector<unsigned, 2> ReachableIdxs; |
396 | |
397 | extractBranchWeights(ProfileData: WeightsNode, Weights); |
398 | for (unsigned I = 0, E = Weights.size(); I != E; ++I) { |
399 | WeightSum += Weights[I]; |
400 | const LoopBlock SrcLoopBB = getLoopBlock(BB); |
401 | const LoopBlock DstLoopBB = getLoopBlock(BB: TI->getSuccessor(Idx: I)); |
402 | auto EstimatedWeight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
403 | if (EstimatedWeight && |
404 | *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
405 | UnreachableIdxs.push_back(Elt: I); |
406 | else |
407 | ReachableIdxs.push_back(Elt: I); |
408 | } |
409 | assert(Weights.size() == TI->getNumSuccessors() && "Checked above" ); |
410 | |
411 | // If the sum of weights does not fit in 32 bits, scale every weight down |
412 | // accordingly. |
413 | uint64_t ScalingFactor = |
414 | (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; |
415 | |
416 | if (ScalingFactor > 1) { |
417 | WeightSum = 0; |
418 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
419 | Weights[I] /= ScalingFactor; |
420 | WeightSum += Weights[I]; |
421 | } |
422 | } |
423 | assert(WeightSum <= UINT32_MAX && |
424 | "Expected weights to scale down to 32 bits" ); |
425 | |
426 | if (WeightSum == 0 || ReachableIdxs.size() == 0) { |
427 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
428 | Weights[I] = 1; |
429 | WeightSum = TI->getNumSuccessors(); |
430 | } |
431 | |
432 | // Set the probability. |
433 | SmallVector<BranchProbability, 2> BP; |
434 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) |
435 | BP.push_back(Elt: { Weights[I], static_cast<uint32_t>(WeightSum) }); |
436 | |
437 | // Examine the metadata against unreachable heuristic. |
438 | // If the unreachable heuristic is more strong then we use it for this edge. |
439 | if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { |
440 | setEdgeProbability(Src: BB, Probs: BP); |
441 | return true; |
442 | } |
443 | |
444 | auto UnreachableProb = UR_TAKEN_PROB; |
445 | for (auto I : UnreachableIdxs) |
446 | if (UnreachableProb < BP[I]) { |
447 | BP[I] = UnreachableProb; |
448 | } |
449 | |
450 | // Sum of all edge probabilities must be 1.0. If we modified the probability |
451 | // of some edges then we must distribute the introduced difference over the |
452 | // reachable blocks. |
453 | // |
454 | // Proportional distribution: the relation between probabilities of the |
455 | // reachable edges is kept unchanged. That is for any reachable edges i and j: |
456 | // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => |
457 | // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K |
458 | // Where K is independent of i,j. |
459 | // newBP[i] == oldBP[i] * K |
460 | // We need to find K. |
461 | // Make sum of all reachables of the left and right parts: |
462 | // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) |
463 | // Sum of newBP must be equal to 1.0: |
464 | // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => |
465 | // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) |
466 | // Where sum_of_unreachable(newBP) is what has been just changed. |
467 | // Finally: |
468 | // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => |
469 | // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) |
470 | BranchProbability NewUnreachableSum = BranchProbability::getZero(); |
471 | for (auto I : UnreachableIdxs) |
472 | NewUnreachableSum += BP[I]; |
473 | |
474 | BranchProbability NewReachableSum = |
475 | BranchProbability::getOne() - NewUnreachableSum; |
476 | |
477 | BranchProbability OldReachableSum = BranchProbability::getZero(); |
478 | for (auto I : ReachableIdxs) |
479 | OldReachableSum += BP[I]; |
480 | |
481 | if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? |
482 | if (OldReachableSum.isZero()) { |
483 | // If all oldBP[i] are zeroes then the proportional distribution results |
484 | // in all zero probabilities and the error stays big. In this case we |
485 | // evenly spread NewReachableSum over the reachable edges. |
486 | BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); |
487 | for (auto I : ReachableIdxs) |
488 | BP[I] = PerEdge; |
489 | } else { |
490 | for (auto I : ReachableIdxs) { |
491 | // We use uint64_t to avoid double rounding error of the following |
492 | // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum |
493 | // The formula is taken from the private constructor |
494 | // BranchProbability(uint32_t Numerator, uint32_t Denominator) |
495 | uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * |
496 | BP[I].getNumerator(); |
497 | uint32_t Div = static_cast<uint32_t>( |
498 | divideNearest(Numerator: Mul, Denominator: OldReachableSum.getNumerator())); |
499 | BP[I] = BranchProbability::getRaw(N: Div); |
500 | } |
501 | } |
502 | } |
503 | |
504 | setEdgeProbability(Src: BB, Probs: BP); |
505 | |
506 | return true; |
507 | } |
508 | |
509 | // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison |
510 | // between two pointer or pointer and NULL will fail. |
511 | bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { |
512 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
513 | if (!BI || !BI->isConditional()) |
514 | return false; |
515 | |
516 | Value *Cond = BI->getCondition(); |
517 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
518 | if (!CI || !CI->isEquality()) |
519 | return false; |
520 | |
521 | Value *LHS = CI->getOperand(i_nocapture: 0); |
522 | |
523 | if (!LHS->getType()->isPointerTy()) |
524 | return false; |
525 | |
526 | assert(CI->getOperand(1)->getType()->isPointerTy()); |
527 | |
528 | auto Search = PointerTable.find(x: CI->getPredicate()); |
529 | if (Search == PointerTable.end()) |
530 | return false; |
531 | setEdgeProbability(Src: BB, Probs: Search->second); |
532 | return true; |
533 | } |
534 | |
535 | // Compute the unlikely successors to the block BB in the loop L, specifically |
536 | // those that are unlikely because this is a loop, and add them to the |
537 | // UnlikelyBlocks set. |
538 | static void |
539 | computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, |
540 | SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { |
541 | // Sometimes in a loop we have a branch whose condition is made false by |
542 | // taking it. This is typically something like |
543 | // int n = 0; |
544 | // while (...) { |
545 | // if (++n >= MAX) { |
546 | // n = 0; |
547 | // } |
548 | // } |
549 | // In this sort of situation taking the branch means that at the very least it |
550 | // won't be taken again in the next iteration of the loop, so we should |
551 | // consider it less likely than a typical branch. |
552 | // |
553 | // We detect this by looking back through the graph of PHI nodes that sets the |
554 | // value that the condition depends on, and seeing if we can reach a successor |
555 | // block which can be determined to make the condition false. |
556 | // |
557 | // FIXME: We currently consider unlikely blocks to be half as likely as other |
558 | // blocks, but if we consider the example above the likelyhood is actually |
559 | // 1/MAX. We could therefore be more precise in how unlikely we consider |
560 | // blocks to be, but it would require more careful examination of the form |
561 | // of the comparison expression. |
562 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
563 | if (!BI || !BI->isConditional()) |
564 | return; |
565 | |
566 | // Check if the branch is based on an instruction compared with a constant |
567 | CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition()); |
568 | if (!CI || !isa<Instruction>(Val: CI->getOperand(i_nocapture: 0)) || |
569 | !isa<Constant>(Val: CI->getOperand(i_nocapture: 1))) |
570 | return; |
571 | |
572 | // Either the instruction must be a PHI, or a chain of operations involving |
573 | // constants that ends in a PHI which we can then collapse into a single value |
574 | // if the PHI value is known. |
575 | Instruction *CmpLHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0)); |
576 | PHINode *CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
577 | Constant *CmpConst = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 1)); |
578 | // Collect the instructions until we hit a PHI |
579 | SmallVector<BinaryOperator *, 1> InstChain; |
580 | while (!CmpPHI && CmpLHS && isa<BinaryOperator>(Val: CmpLHS) && |
581 | isa<Constant>(Val: CmpLHS->getOperand(i: 1))) { |
582 | // Stop if the chain extends outside of the loop |
583 | if (!L->contains(Inst: CmpLHS)) |
584 | return; |
585 | InstChain.push_back(Elt: cast<BinaryOperator>(Val: CmpLHS)); |
586 | CmpLHS = dyn_cast<Instruction>(Val: CmpLHS->getOperand(i: 0)); |
587 | if (CmpLHS) |
588 | CmpPHI = dyn_cast<PHINode>(Val: CmpLHS); |
589 | } |
590 | if (!CmpPHI || !L->contains(Inst: CmpPHI)) |
591 | return; |
592 | |
593 | // Trace the phi node to find all values that come from successors of BB |
594 | SmallPtrSet<PHINode*, 8> VisitedInsts; |
595 | SmallVector<PHINode*, 8> WorkList; |
596 | WorkList.push_back(Elt: CmpPHI); |
597 | VisitedInsts.insert(Ptr: CmpPHI); |
598 | while (!WorkList.empty()) { |
599 | PHINode *P = WorkList.pop_back_val(); |
600 | for (BasicBlock *B : P->blocks()) { |
601 | // Skip blocks that aren't part of the loop |
602 | if (!L->contains(BB: B)) |
603 | continue; |
604 | Value *V = P->getIncomingValueForBlock(BB: B); |
605 | // If the source is a PHI add it to the work list if we haven't |
606 | // already visited it. |
607 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
608 | if (VisitedInsts.insert(Ptr: PN).second) |
609 | WorkList.push_back(Elt: PN); |
610 | continue; |
611 | } |
612 | // If this incoming value is a constant and B is a successor of BB, then |
613 | // we can constant-evaluate the compare to see if it makes the branch be |
614 | // taken or not. |
615 | Constant *CmpLHSConst = dyn_cast<Constant>(Val: V); |
616 | if (!CmpLHSConst || !llvm::is_contained(Range: successors(BB), Element: B)) |
617 | continue; |
618 | // First collapse InstChain |
619 | const DataLayout &DL = BB->getDataLayout(); |
620 | for (Instruction *I : llvm::reverse(C&: InstChain)) { |
621 | CmpLHSConst = ConstantFoldBinaryOpOperands( |
622 | Opcode: I->getOpcode(), LHS: CmpLHSConst, RHS: cast<Constant>(Val: I->getOperand(i: 1)), DL); |
623 | if (!CmpLHSConst) |
624 | break; |
625 | } |
626 | if (!CmpLHSConst) |
627 | continue; |
628 | // Now constant-evaluate the compare |
629 | Constant *Result = ConstantFoldCompareInstOperands( |
630 | Predicate: CI->getPredicate(), LHS: CmpLHSConst, RHS: CmpConst, DL); |
631 | // If the result means we don't branch to the block then that block is |
632 | // unlikely. |
633 | if (Result && |
634 | ((Result->isZeroValue() && B == BI->getSuccessor(i: 0)) || |
635 | (Result->isOneValue() && B == BI->getSuccessor(i: 1)))) |
636 | UnlikelyBlocks.insert(Ptr: B); |
637 | } |
638 | } |
639 | } |
640 | |
641 | std::optional<uint32_t> |
642 | BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { |
643 | auto WeightIt = EstimatedBlockWeight.find(Val: BB); |
644 | if (WeightIt == EstimatedBlockWeight.end()) |
645 | return std::nullopt; |
646 | return WeightIt->second; |
647 | } |
648 | |
649 | std::optional<uint32_t> |
650 | BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { |
651 | auto WeightIt = EstimatedLoopWeight.find(Val: L); |
652 | if (WeightIt == EstimatedLoopWeight.end()) |
653 | return std::nullopt; |
654 | return WeightIt->second; |
655 | } |
656 | |
657 | std::optional<uint32_t> |
658 | BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { |
659 | // For edges entering a loop take weight of a loop rather than an individual |
660 | // block in the loop. |
661 | return isLoopEnteringEdge(Edge) |
662 | ? getEstimatedLoopWeight(L: Edge.second.getLoopData()) |
663 | : getEstimatedBlockWeight(BB: Edge.second.getBlock()); |
664 | } |
665 | |
666 | template <class IterT> |
667 | std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( |
668 | const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { |
669 | std::optional<uint32_t> MaxWeight; |
670 | for (const BasicBlock *DstBB : Successors) { |
671 | const LoopBlock DstLoopBB = getLoopBlock(BB: DstBB); |
672 | auto Weight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB}); |
673 | |
674 | if (!Weight) |
675 | return std::nullopt; |
676 | |
677 | if (!MaxWeight || *MaxWeight < *Weight) |
678 | MaxWeight = Weight; |
679 | } |
680 | |
681 | return MaxWeight; |
682 | } |
683 | |
684 | // Updates \p LoopBB's weight and returns true. If \p LoopBB has already |
685 | // an associated weight it is unchanged and false is returned. |
686 | // |
687 | // Please note by the algorithm the weight is not expected to change once set |
688 | // thus 'false' status is used to track visited blocks. |
689 | bool BranchProbabilityInfo::updateEstimatedBlockWeight( |
690 | LoopBlock &LoopBB, uint32_t BBWeight, |
691 | SmallVectorImpl<BasicBlock *> &BlockWorkList, |
692 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
693 | BasicBlock *BB = LoopBB.getBlock(); |
694 | |
695 | // In general, weight is assigned to a block when it has final value and |
696 | // can't/shouldn't be changed. However, there are cases when a block |
697 | // inherently has several (possibly "contradicting") weights. For example, |
698 | // "unwind" block may also contain "cold" call. In that case the first |
699 | // set weight is favored and all consequent weights are ignored. |
700 | if (!EstimatedBlockWeight.insert(KV: {BB, BBWeight}).second) |
701 | return false; |
702 | |
703 | for (BasicBlock *PredBlock : predecessors(BB)) { |
704 | LoopBlock PredLoop = getLoopBlock(BB: PredBlock); |
705 | // Add affected block/loop to a working list. |
706 | if (isLoopExitingEdge(Edge: {PredLoop, LoopBB})) { |
707 | if (!EstimatedLoopWeight.count(Val: PredLoop.getLoopData())) |
708 | LoopWorkList.push_back(Elt: PredLoop); |
709 | } else if (!EstimatedBlockWeight.count(Val: PredBlock)) |
710 | BlockWorkList.push_back(Elt: PredBlock); |
711 | } |
712 | return true; |
713 | } |
714 | |
715 | // Starting from \p BB traverse through dominator blocks and assign \p BBWeight |
716 | // to all such blocks that are post dominated by \BB. In other words to all |
717 | // blocks that the one is executed if and only if another one is executed. |
718 | // Importantly, we skip loops here for two reasons. First weights of blocks in |
719 | // a loop should be scaled by trip count (yet possibly unknown). Second there is |
720 | // no any value in doing that because that doesn't give any additional |
721 | // information regarding distribution of probabilities inside the loop. |
722 | // Exception is loop 'enter' and 'exit' edges that are handled in a special way |
723 | // at calcEstimatedHeuristics. |
724 | // |
725 | // In addition, \p WorkList is populated with basic blocks if at leas one |
726 | // successor has updated estimated weight. |
727 | void BranchProbabilityInfo::propagateEstimatedBlockWeight( |
728 | const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, |
729 | uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, |
730 | SmallVectorImpl<LoopBlock> &LoopWorkList) { |
731 | const BasicBlock *BB = LoopBB.getBlock(); |
732 | const auto *DTStartNode = DT->getNode(BB); |
733 | const auto *PDTStartNode = PDT->getNode(BB); |
734 | |
735 | // TODO: Consider propagating weight down the domination line as well. |
736 | for (const auto *DTNode = DTStartNode; DTNode != nullptr; |
737 | DTNode = DTNode->getIDom()) { |
738 | auto *DomBB = DTNode->getBlock(); |
739 | // Consider blocks which lie on one 'line'. |
740 | if (!PDT->dominates(A: PDTStartNode, B: PDT->getNode(BB: DomBB))) |
741 | // If BB doesn't post dominate DomBB it will not post dominate dominators |
742 | // of DomBB as well. |
743 | break; |
744 | |
745 | LoopBlock DomLoopBB = getLoopBlock(BB: DomBB); |
746 | const LoopEdge Edge{DomLoopBB, LoopBB}; |
747 | // Don't propagate weight to blocks belonging to different loops. |
748 | if (!isLoopEnteringExitingEdge(Edge)) { |
749 | if (!updateEstimatedBlockWeight(LoopBB&: DomLoopBB, BBWeight, BlockWorkList, |
750 | LoopWorkList)) |
751 | // If DomBB has weight set then all it's predecessors are already |
752 | // processed (since we propagate weight up to the top of IR each time). |
753 | break; |
754 | } else if (isLoopExitingEdge(Edge)) { |
755 | LoopWorkList.push_back(Elt: DomLoopBB); |
756 | } |
757 | } |
758 | } |
759 | |
760 | std::optional<uint32_t> |
761 | BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) { |
762 | // Returns true if \p BB has call marked with "NoReturn" attribute. |
763 | auto hasNoReturn = [&](const BasicBlock *BB) { |
764 | for (const auto &I : reverse(C: *BB)) |
765 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
766 | if (CI->hasFnAttr(Kind: Attribute::NoReturn)) |
767 | return true; |
768 | |
769 | return false; |
770 | }; |
771 | |
772 | // Important note regarding the order of checks. They are ordered by weight |
773 | // from lowest to highest. Doing that allows to avoid "unstable" results |
774 | // when several conditions heuristics can be applied simultaneously. |
775 | if (isa<UnreachableInst>(Val: BB->getTerminator()) || |
776 | // If this block is terminated by a call to |
777 | // @llvm.experimental.deoptimize then treat it like an unreachable |
778 | // since it is expected to practically never execute. |
779 | // TODO: Should we actually treat as never returning call? |
780 | BB->getTerminatingDeoptimizeCall()) |
781 | return hasNoReturn(BB) |
782 | ? static_cast<uint32_t>(BlockExecWeight::NORETURN) |
783 | : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); |
784 | |
785 | // Check if the block is an exception handling block. |
786 | if (BB->isEHPad()) |
787 | return static_cast<uint32_t>(BlockExecWeight::UNWIND); |
788 | |
789 | // Check if the block contains 'cold' call. |
790 | for (const auto &I : *BB) |
791 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
792 | if (CI->hasFnAttr(Kind: Attribute::Cold)) |
793 | return static_cast<uint32_t>(BlockExecWeight::COLD); |
794 | |
795 | return std::nullopt; |
796 | } |
797 | |
798 | // Does RPO traversal over all blocks in \p F and assigns weights to |
799 | // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its |
800 | // best to propagate the weight to up/down the IR. |
801 | void BranchProbabilityInfo::estimateBlockWeights(const Function &F, |
802 | DominatorTree *DT, |
803 | PostDominatorTree *PDT) { |
804 | SmallVector<BasicBlock *, 8> BlockWorkList; |
805 | SmallVector<LoopBlock, 8> LoopWorkList; |
806 | SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks; |
807 | |
808 | // By doing RPO we make sure that all predecessors already have weights |
809 | // calculated before visiting theirs successors. |
810 | ReversePostOrderTraversal<const Function *> RPOT(&F); |
811 | for (const auto *BB : RPOT) |
812 | if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) |
813 | // If we were able to find estimated weight for the block set it to this |
814 | // block and propagate up the IR. |
815 | propagateEstimatedBlockWeight(LoopBB: getLoopBlock(BB), DT, PDT, BBWeight: *BBWeight, |
816 | BlockWorkList, LoopWorkList); |
817 | |
818 | // BlockWorklist/LoopWorkList contains blocks/loops with at least one |
819 | // successor/exit having estimated weight. Try to propagate weight to such |
820 | // blocks/loops from successors/exits. |
821 | // Process loops and blocks. Order is not important. |
822 | do { |
823 | while (!LoopWorkList.empty()) { |
824 | const LoopBlock LoopBB = LoopWorkList.pop_back_val(); |
825 | const LoopData LD = LoopBB.getLoopData(); |
826 | if (EstimatedLoopWeight.count(Val: LD)) |
827 | continue; |
828 | |
829 | auto Res = LoopExitBlocks.try_emplace(Key: LD); |
830 | SmallVectorImpl<BasicBlock *> &Exits = Res.first->second; |
831 | if (Res.second) |
832 | getLoopExitBlocks(LB: LoopBB, Exits); |
833 | auto LoopWeight = getMaxEstimatedEdgeWeight( |
834 | SrcLoopBB: LoopBB, Successors: make_range(x: Exits.begin(), y: Exits.end())); |
835 | |
836 | if (LoopWeight) { |
837 | // If we never exit the loop then we can enter it once at maximum. |
838 | if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) |
839 | LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
840 | |
841 | EstimatedLoopWeight.insert(KV: {LD, *LoopWeight}); |
842 | // Add all blocks entering the loop into working list. |
843 | getLoopEnterBlocks(LB: LoopBB, Enters&: BlockWorkList); |
844 | } |
845 | } |
846 | |
847 | while (!BlockWorkList.empty()) { |
848 | // We can reach here only if BlockWorkList is not empty. |
849 | const BasicBlock *BB = BlockWorkList.pop_back_val(); |
850 | if (EstimatedBlockWeight.count(Val: BB)) |
851 | continue; |
852 | |
853 | // We take maximum over all weights of successors. In other words we take |
854 | // weight of "hot" path. In theory we can probably find a better function |
855 | // which gives higher accuracy results (comparing to "maximum") but I |
856 | // can't |
857 | // think of any right now. And I doubt it will make any difference in |
858 | // practice. |
859 | const LoopBlock LoopBB = getLoopBlock(BB); |
860 | auto MaxWeight = getMaxEstimatedEdgeWeight(SrcLoopBB: LoopBB, Successors: successors(BB)); |
861 | |
862 | if (MaxWeight) |
863 | propagateEstimatedBlockWeight(LoopBB, DT, PDT, BBWeight: *MaxWeight, |
864 | BlockWorkList, LoopWorkList); |
865 | } |
866 | } while (!BlockWorkList.empty() || !LoopWorkList.empty()); |
867 | } |
868 | |
869 | // Calculate edge probabilities based on block's estimated weight. |
870 | // Note that gathered weights were not scaled for loops. Thus edges entering |
871 | // and exiting loops requires special processing. |
872 | bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { |
873 | assert(BB->getTerminator()->getNumSuccessors() > 1 && |
874 | "expected more than one successor!" ); |
875 | |
876 | const LoopBlock LoopBB = getLoopBlock(BB); |
877 | |
878 | SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; |
879 | uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; |
880 | if (LoopBB.getLoop()) |
881 | computeUnlikelySuccessors(BB, L: LoopBB.getLoop(), UnlikelyBlocks); |
882 | |
883 | // Changed to 'true' if at least one successor has estimated weight. |
884 | bool FoundEstimatedWeight = false; |
885 | SmallVector<uint32_t, 4> SuccWeights; |
886 | uint64_t TotalWeight = 0; |
887 | // Go over all successors of BB and put their weights into SuccWeights. |
888 | for (const BasicBlock *SuccBB : successors(BB)) { |
889 | std::optional<uint32_t> Weight; |
890 | const LoopBlock SuccLoopBB = getLoopBlock(BB: SuccBB); |
891 | const LoopEdge Edge{LoopBB, SuccLoopBB}; |
892 | |
893 | Weight = getEstimatedEdgeWeight(Edge); |
894 | |
895 | if (isLoopExitingEdge(Edge) && |
896 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
897 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
898 | // Scale down loop exiting weight by trip count. |
899 | Weight = std::max( |
900 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
901 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / |
902 | TC); |
903 | } |
904 | bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(Ptr: SuccBB); |
905 | if (IsUnlikelyEdge && |
906 | // Avoid adjustment of ZERO weight since it should remain unchanged. |
907 | Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { |
908 | // 'Unlikely' blocks have twice lower weight. |
909 | Weight = std::max( |
910 | a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), |
911 | b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2); |
912 | } |
913 | |
914 | if (Weight) |
915 | FoundEstimatedWeight = true; |
916 | |
917 | auto WeightVal = |
918 | Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)); |
919 | TotalWeight += WeightVal; |
920 | SuccWeights.push_back(Elt: WeightVal); |
921 | } |
922 | |
923 | // If non of blocks have estimated weight bail out. |
924 | // If TotalWeight is 0 that means weight of each successor is 0 as well and |
925 | // equally likely. Bail out early to not deal with devision by zero. |
926 | if (!FoundEstimatedWeight || TotalWeight == 0) |
927 | return false; |
928 | |
929 | assert(SuccWeights.size() == succ_size(BB) && "Missed successor?" ); |
930 | const unsigned SuccCount = SuccWeights.size(); |
931 | |
932 | // If the sum of weights does not fit in 32 bits, scale every weight down |
933 | // accordingly. |
934 | if (TotalWeight > UINT32_MAX) { |
935 | uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; |
936 | TotalWeight = 0; |
937 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
938 | SuccWeights[Idx] /= ScalingFactor; |
939 | if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) |
940 | SuccWeights[Idx] = |
941 | static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); |
942 | TotalWeight += SuccWeights[Idx]; |
943 | } |
944 | assert(TotalWeight <= UINT32_MAX && "Total weight overflows" ); |
945 | } |
946 | |
947 | // Finally set probabilities to edges according to estimated block weights. |
948 | SmallVector<BranchProbability, 4> EdgeProbabilities( |
949 | SuccCount, BranchProbability::getUnknown()); |
950 | |
951 | for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { |
952 | EdgeProbabilities[Idx] = |
953 | BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); |
954 | } |
955 | setEdgeProbability(Src: BB, Probs: EdgeProbabilities); |
956 | return true; |
957 | } |
958 | |
959 | bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, |
960 | const TargetLibraryInfo *TLI) { |
961 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
962 | if (!BI || !BI->isConditional()) |
963 | return false; |
964 | |
965 | Value *Cond = BI->getCondition(); |
966 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
967 | if (!CI) |
968 | return false; |
969 | |
970 | auto GetConstantInt = [](Value *V) { |
971 | if (auto *I = dyn_cast<BitCastInst>(Val: V)) |
972 | return dyn_cast<ConstantInt>(Val: I->getOperand(i_nocapture: 0)); |
973 | return dyn_cast<ConstantInt>(Val: V); |
974 | }; |
975 | |
976 | Value *RHS = CI->getOperand(i_nocapture: 1); |
977 | ConstantInt *CV = GetConstantInt(RHS); |
978 | if (!CV) |
979 | return false; |
980 | |
981 | // If the LHS is the result of AND'ing a value with a single bit bitmask, |
982 | // we don't have information about probabilities. |
983 | if (Instruction *LHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: 0))) |
984 | if (LHS->getOpcode() == Instruction::And) |
985 | if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(i: 1))) |
986 | if (AndRHS->getValue().isPowerOf2()) |
987 | return false; |
988 | |
989 | // Check if the LHS is the return value of a library function |
990 | LibFunc Func = NumLibFuncs; |
991 | if (TLI) |
992 | if (CallInst *Call = dyn_cast<CallInst>(Val: CI->getOperand(i_nocapture: 0))) |
993 | if (Function *CalledFn = Call->getCalledFunction()) |
994 | TLI->getLibFunc(FDecl: *CalledFn, F&: Func); |
995 | |
996 | ProbabilityTable::const_iterator Search; |
997 | if (Func == LibFunc_strcasecmp || |
998 | Func == LibFunc_strcmp || |
999 | Func == LibFunc_strncasecmp || |
1000 | Func == LibFunc_strncmp || |
1001 | Func == LibFunc_memcmp || |
1002 | Func == LibFunc_bcmp) { |
1003 | Search = ICmpWithLibCallTable.find(x: CI->getPredicate()); |
1004 | if (Search == ICmpWithLibCallTable.end()) |
1005 | return false; |
1006 | } else if (CV->isZero()) { |
1007 | Search = ICmpWithZeroTable.find(x: CI->getPredicate()); |
1008 | if (Search == ICmpWithZeroTable.end()) |
1009 | return false; |
1010 | } else if (CV->isOne()) { |
1011 | Search = ICmpWithOneTable.find(x: CI->getPredicate()); |
1012 | if (Search == ICmpWithOneTable.end()) |
1013 | return false; |
1014 | } else if (CV->isMinusOne()) { |
1015 | Search = ICmpWithMinusOneTable.find(x: CI->getPredicate()); |
1016 | if (Search == ICmpWithMinusOneTable.end()) |
1017 | return false; |
1018 | } else { |
1019 | return false; |
1020 | } |
1021 | |
1022 | setEdgeProbability(Src: BB, Probs: Search->second); |
1023 | return true; |
1024 | } |
1025 | |
1026 | bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { |
1027 | const BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
1028 | if (!BI || !BI->isConditional()) |
1029 | return false; |
1030 | |
1031 | Value *Cond = BI->getCondition(); |
1032 | FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cond); |
1033 | if (!FCmp) |
1034 | return false; |
1035 | |
1036 | ProbabilityList ProbList; |
1037 | if (FCmp->isEquality()) { |
1038 | ProbList = !FCmp->isTrueWhenEqual() ? |
1039 | // f1 == f2 -> Unlikely |
1040 | ProbabilityList({FPTakenProb, FPUntakenProb}) : |
1041 | // f1 != f2 -> Likely |
1042 | ProbabilityList({FPUntakenProb, FPTakenProb}); |
1043 | } else { |
1044 | auto Search = FCmpTable.find(x: FCmp->getPredicate()); |
1045 | if (Search == FCmpTable.end()) |
1046 | return false; |
1047 | ProbList = Search->second; |
1048 | } |
1049 | |
1050 | setEdgeProbability(Src: BB, Probs: ProbList); |
1051 | return true; |
1052 | } |
1053 | |
1054 | void BranchProbabilityInfo::releaseMemory() { |
1055 | Probs.clear(); |
1056 | Handles.clear(); |
1057 | } |
1058 | |
1059 | bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, |
1060 | FunctionAnalysisManager::Invalidator &) { |
1061 | // Check whether the analysis, all analyses on functions, or the function's |
1062 | // CFG have been preserved. |
1063 | auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); |
1064 | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || |
1065 | PAC.preservedSet<CFGAnalyses>()); |
1066 | } |
1067 | |
1068 | void BranchProbabilityInfo::print(raw_ostream &OS) const { |
1069 | OS << "---- Branch Probabilities ----\n" ; |
1070 | // We print the probabilities from the last function the analysis ran over, |
1071 | // or the function it is currently running over. |
1072 | assert(LastF && "Cannot print prior to running over a function" ); |
1073 | for (const auto &BI : *LastF) { |
1074 | for (const BasicBlock *Succ : successors(BB: &BI)) |
1075 | printEdgeProbability(OS&: OS << " " , Src: &BI, Dst: Succ); |
1076 | } |
1077 | } |
1078 | |
1079 | bool BranchProbabilityInfo:: |
1080 | isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { |
1081 | // Hot probability is at least 4/5 = 80% |
1082 | // FIXME: Compare against a static "hot" BranchProbability. |
1083 | return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); |
1084 | } |
1085 | |
1086 | /// Get the raw edge probability for the edge. If can't find it, return a |
1087 | /// default probability 1/N where N is the number of successors. Here an edge is |
1088 | /// specified using PredBlock and an |
1089 | /// index to the successors. |
1090 | BranchProbability |
1091 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1092 | unsigned IndexInSuccessors) const { |
1093 | auto I = Probs.find(Val: std::make_pair(x&: Src, y&: IndexInSuccessors)); |
1094 | assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == |
1095 | (Probs.end() == I) && |
1096 | "Probability for I-th successor must always be defined along with the " |
1097 | "probability for the first successor" ); |
1098 | |
1099 | if (I != Probs.end()) |
1100 | return I->second; |
1101 | |
1102 | return {1, static_cast<uint32_t>(succ_size(BB: Src))}; |
1103 | } |
1104 | |
1105 | BranchProbability |
1106 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1107 | const_succ_iterator Dst) const { |
1108 | return getEdgeProbability(Src, IndexInSuccessors: Dst.getSuccessorIndex()); |
1109 | } |
1110 | |
1111 | /// Get the raw edge probability calculated for the block pair. This returns the |
1112 | /// sum of all raw edge probabilities from Src to Dst. |
1113 | BranchProbability |
1114 | BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, |
1115 | const BasicBlock *Dst) const { |
1116 | if (!Probs.count(Val: std::make_pair(x&: Src, y: 0))) |
1117 | return BranchProbability(llvm::count(Range: successors(BB: Src), Element: Dst), succ_size(BB: Src)); |
1118 | |
1119 | auto Prob = BranchProbability::getZero(); |
1120 | for (const_succ_iterator I = succ_begin(BB: Src), E = succ_end(BB: Src); I != E; ++I) |
1121 | if (*I == Dst) |
1122 | Prob += Probs.find(Val: std::make_pair(x&: Src, y: I.getSuccessorIndex()))->second; |
1123 | |
1124 | return Prob; |
1125 | } |
1126 | |
1127 | /// Set the edge probability for all edges at once. |
1128 | void BranchProbabilityInfo::setEdgeProbability( |
1129 | const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { |
1130 | assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); |
1131 | eraseBlock(BB: Src); // Erase stale data if any. |
1132 | if (Probs.size() == 0) |
1133 | return; // Nothing to set. |
1134 | |
1135 | Handles.insert(V: BasicBlockCallbackVH(Src, this)); |
1136 | uint64_t TotalNumerator = 0; |
1137 | for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { |
1138 | this->Probs[std::make_pair(x&: Src, y&: SuccIdx)] = Probs[SuccIdx]; |
1139 | LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx |
1140 | << " successor probability to " << Probs[SuccIdx] |
1141 | << "\n" ); |
1142 | TotalNumerator += Probs[SuccIdx].getNumerator(); |
1143 | } |
1144 | |
1145 | // Because of rounding errors the total probability cannot be checked to be |
1146 | // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. |
1147 | // Instead, every single probability in Probs must be as accurate as possible. |
1148 | // This results in error 1/denominator at most, thus the total absolute error |
1149 | // should be within Probs.size / BranchProbability::getDenominator. |
1150 | assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); |
1151 | assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); |
1152 | (void)TotalNumerator; |
1153 | } |
1154 | |
1155 | void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, |
1156 | BasicBlock *Dst) { |
1157 | eraseBlock(BB: Dst); // Erase stale data if any. |
1158 | unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); |
1159 | assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); |
1160 | if (NumSuccessors == 0) |
1161 | return; // Nothing to set. |
1162 | if (!this->Probs.contains(Val: std::make_pair(x&: Src, y: 0))) |
1163 | return; // No probability is set for edges from Src. Keep the same for Dst. |
1164 | |
1165 | Handles.insert(V: BasicBlockCallbackVH(Dst, this)); |
1166 | for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { |
1167 | auto Prob = this->Probs[std::make_pair(x&: Src, y&: SuccIdx)]; |
1168 | this->Probs[std::make_pair(x&: Dst, y&: SuccIdx)] = Prob; |
1169 | LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx |
1170 | << " successor probability to " << Prob << "\n" ); |
1171 | } |
1172 | } |
1173 | |
1174 | void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) { |
1175 | assert(Src->getTerminator()->getNumSuccessors() == 2); |
1176 | auto It0 = Probs.find(Val: std::make_pair(x&: Src, y: 0)); |
1177 | if (It0 == Probs.end()) |
1178 | return; // No probability is set for edges from Src |
1179 | auto It1 = Probs.find(Val: std::make_pair(x&: Src, y: 1)); |
1180 | assert(It1 != Probs.end()); |
1181 | std::swap(a&: It0->second, b&: It1->second); |
1182 | } |
1183 | |
1184 | raw_ostream & |
1185 | BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, |
1186 | const BasicBlock *Src, |
1187 | const BasicBlock *Dst) const { |
1188 | const BranchProbability Prob = getEdgeProbability(Src, Dst); |
1189 | OS << "edge " ; |
1190 | Src->printAsOperand(O&: OS, PrintType: false, M: Src->getModule()); |
1191 | OS << " -> " ; |
1192 | Dst->printAsOperand(O&: OS, PrintType: false, M: Dst->getModule()); |
1193 | OS << " probability is " << Prob |
1194 | << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n" ); |
1195 | |
1196 | return OS; |
1197 | } |
1198 | |
1199 | void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { |
1200 | LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n" ); |
1201 | |
1202 | // Note that we cannot use successors of BB because the terminator of BB may |
1203 | // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. |
1204 | // Instead we remove prob data for the block by iterating successors by their |
1205 | // indices from 0 till the last which exists. There could not be prob data for |
1206 | // a pair (BB, N) if there is no data for (BB, N-1) because the data is always |
1207 | // set for all successors from 0 to M at once by the method |
1208 | // setEdgeProbability(). |
1209 | Handles.erase(V: BasicBlockCallbackVH(BB, this)); |
1210 | for (unsigned I = 0;; ++I) { |
1211 | auto MapI = Probs.find(Val: std::make_pair(x&: BB, y&: I)); |
1212 | if (MapI == Probs.end()) { |
1213 | assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && |
1214 | "Must be no more successors" ); |
1215 | return; |
1216 | } |
1217 | Probs.erase(I: MapI); |
1218 | } |
1219 | } |
1220 | |
1221 | void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, |
1222 | const TargetLibraryInfo *TLI, |
1223 | DominatorTree *DT, |
1224 | PostDominatorTree *PDT) { |
1225 | LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() |
1226 | << " ----\n\n" ); |
1227 | LastF = &F; // Store the last function we ran on for printing. |
1228 | LI = &LoopI; |
1229 | |
1230 | SccI = std::make_unique<SccInfo>(args: F); |
1231 | |
1232 | assert(EstimatedBlockWeight.empty()); |
1233 | assert(EstimatedLoopWeight.empty()); |
1234 | |
1235 | std::unique_ptr<DominatorTree> DTPtr; |
1236 | std::unique_ptr<PostDominatorTree> PDTPtr; |
1237 | |
1238 | if (!DT) { |
1239 | DTPtr = std::make_unique<DominatorTree>(args&: const_cast<Function &>(F)); |
1240 | DT = DTPtr.get(); |
1241 | } |
1242 | |
1243 | if (!PDT) { |
1244 | PDTPtr = std::make_unique<PostDominatorTree>(args&: const_cast<Function &>(F)); |
1245 | PDT = PDTPtr.get(); |
1246 | } |
1247 | |
1248 | estimateBlockWeights(F, DT, PDT); |
1249 | |
1250 | // Walk the basic blocks in post-order so that we can build up state about |
1251 | // the successors of a block iteratively. |
1252 | for (const auto *BB : post_order(G: &F.getEntryBlock())) { |
1253 | LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() |
1254 | << "\n" ); |
1255 | // If there is no at least two successors, no sense to set probability. |
1256 | if (BB->getTerminator()->getNumSuccessors() < 2) |
1257 | continue; |
1258 | if (calcMetadataWeights(BB)) |
1259 | continue; |
1260 | if (calcEstimatedHeuristics(BB)) |
1261 | continue; |
1262 | if (calcPointerHeuristics(BB)) |
1263 | continue; |
1264 | if (calcZeroHeuristics(BB, TLI)) |
1265 | continue; |
1266 | if (calcFloatingPointHeuristics(BB)) |
1267 | continue; |
1268 | } |
1269 | |
1270 | EstimatedLoopWeight.clear(); |
1271 | EstimatedBlockWeight.clear(); |
1272 | SccI.reset(); |
1273 | |
1274 | if (PrintBranchProb && (PrintBranchProbFuncName.empty() || |
1275 | F.getName() == PrintBranchProbFuncName)) { |
1276 | print(OS&: dbgs()); |
1277 | } |
1278 | } |
1279 | |
1280 | void BranchProbabilityInfoWrapperPass::getAnalysisUsage( |
1281 | AnalysisUsage &AU) const { |
1282 | // We require DT so it's available when LI is available. The LI updating code |
1283 | // asserts that DT is also present so if we don't make sure that we have DT |
1284 | // here, that assert will trigger. |
1285 | AU.addRequired<DominatorTreeWrapperPass>(); |
1286 | AU.addRequired<LoopInfoWrapperPass>(); |
1287 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
1288 | AU.addRequired<DominatorTreeWrapperPass>(); |
1289 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
1290 | AU.setPreservesAll(); |
1291 | } |
1292 | |
1293 | bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { |
1294 | const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
1295 | const TargetLibraryInfo &TLI = |
1296 | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
1297 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1298 | PostDominatorTree &PDT = |
1299 | getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
1300 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
1301 | return false; |
1302 | } |
1303 | |
1304 | void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } |
1305 | |
1306 | void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, |
1307 | const Module *) const { |
1308 | BPI.print(OS); |
1309 | } |
1310 | |
1311 | AnalysisKey BranchProbabilityAnalysis::Key; |
1312 | BranchProbabilityInfo |
1313 | BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { |
1314 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
1315 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
1316 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
1317 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
1318 | BranchProbabilityInfo BPI; |
1319 | BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT); |
1320 | return BPI; |
1321 | } |
1322 | |
1323 | PreservedAnalyses |
1324 | BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
1325 | OS << "Printing analysis 'Branch Probability Analysis' for function '" |
1326 | << F.getName() << "':\n" ; |
1327 | AM.getResult<BranchProbabilityAnalysis>(IR&: F).print(OS); |
1328 | return PreservedAnalyses::all(); |
1329 | } |
1330 | |