BranchProbabilityInfo.cpp source code [llvm_projects/llvm/lib/Analysis/BranchProbabilityInfo.cpp]

1	//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loops should be simplified before this analysis.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Analysis/BranchProbabilityInfo.h"
14	#include "llvm/ADT/PostOrderIterator.h"
15	#include "llvm/ADT/SCCIterator.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/Analysis/ConstantFolding.h"
19	#include "llvm/Analysis/LoopInfo.h"
20	#include "llvm/Analysis/PostDominators.h"
21	#include "llvm/Analysis/TargetLibraryInfo.h"
22	#include "llvm/IR/Attributes.h"
23	#include "llvm/IR/BasicBlock.h"
24	#include "llvm/IR/CFG.h"
25	#include "llvm/IR/Constants.h"
26	#include "llvm/IR/Dominators.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/IR/InstrTypes.h"
29	#include "llvm/IR/Instruction.h"
30	#include "llvm/IR/Instructions.h"
31	#include "llvm/IR/LLVMContext.h"
32	#include "llvm/IR/Metadata.h"
33	#include "llvm/IR/PassManager.h"
34	#include "llvm/IR/ProfDataUtils.h"
35	#include "llvm/IR/Type.h"
36	#include "llvm/IR/Value.h"
37	#include "llvm/InitializePasses.h"
38	#include "llvm/Pass.h"
39	#include "llvm/Support/BranchProbability.h"
40	#include "llvm/Support/Casting.h"
41	#include "llvm/Support/CommandLine.h"
42	#include "llvm/Support/Debug.h"
43	#include "llvm/Support/raw_ostream.h"
44	#include <cassert>
45	#include <cstdint>
46	#include <map>
47	#include <utility>
48
49	using namespace llvm;
50
51	#define DEBUG_TYPE "branch-prob"
52
53	static cl::opt<bool> PrintBranchProb(
54	"print-bpi", cl::init(Val: false), cl::Hidden,
55	cl::desc("Print the branch probability info."));
56
57	static cl::opt<std::string> PrintBranchProbFuncName(
58	"print-bpi-func-name", cl::Hidden,
59	cl::desc("The option to specify the name of the function "
60	"whose branch probability info is printed."));
61
62	INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
63	"Branch Probability Analysis", false, true)
64	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
65	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
66	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
67	INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
68	INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
69	"Branch Probability Analysis", false, true)
70
71	BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
72	: FunctionPass (ID) {}
73
74	char BranchProbabilityInfoWrapperPass::ID = `0`;
75
76	// Weights are for internal use only. They are used by heuristics to help to
77	// estimate edges' probability. Example:
78	//
79	// Using "Loop Branch Heuristics" we predict weights of edges for the
80	// block BB2.
81	// ...
82	// \|
83	// V
84	// BB1<-+
85	// \| \|
86	// \| \| (Weight = 124)
87	// V \|
88	// BB2--+
89	// \|
90	// \| (Weight = 4)
91	// V
92	// BB3
93	//
94	// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
95	// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
96	static const uint32_t LBH_TAKEN_WEIGHT = `124`;
97	static const uint32_t LBH_NONTAKEN_WEIGHT = `4`;
98
99	/// Unreachable-terminating branch taken probability.
100	///
101	/// This is the probability for a branch being taken to a block that terminates
102	/// (eventually) in unreachable. These are predicted as unlikely as possible.
103	/// All reachable probability will proportionally share the remaining part.
104	static constexpr BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(N: `1`);
105
106	/// Heuristics and lookup tables for non-loop branches:
107	/// Pointer Heuristics (PH)
108	static const uint32_t PH_TAKEN_WEIGHT = `20`;
109	static const uint32_t PH_NONTAKEN_WEIGHT = `12`;
110	static constexpr BranchProbability
111	PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
112	static constexpr BranchProbability
113	PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
114
115	/// Zero Heuristics (ZH)
116	static const uint32_t ZH_TAKEN_WEIGHT = `20`;
117	static const uint32_t ZH_NONTAKEN_WEIGHT = `12`;
118	static constexpr BranchProbability
119	ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
120	static constexpr BranchProbability
121	ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
122
123	// Floating-Point Heuristics (FPH)
124	static const uint32_t FPH_TAKEN_WEIGHT = `20`;
125	static const uint32_t FPH_NONTAKEN_WEIGHT = `12`;
126
127	/// This is the probability for an ordered floating point comparison.
128	static const uint32_t FPH_ORD_WEIGHT = `1024` * `1024` - `1`;
129	/// This is the probability for an unordered floating point comparison, it means
130	/// one or two of the operands are NaN. Usually it is used to test for an
131	/// exceptional case, so the result is unlikely.
132	static const uint32_t FPH_UNO_WEIGHT = `1`;
133
134	static constexpr BranchProbability
135	FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
136	static constexpr BranchProbability
137	FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
138	static constexpr BranchProbability
139	FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
140	static constexpr BranchProbability
141	FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
142
143	/// Set of dedicated "absolute" execution weights for a block. These weights are
144	/// meaningful relative to each other and their derivatives only.
145	enum class BlockExecWeight : std::uint32_t {
146	/// Special weight used for cases with exact zero probability.
147	ZERO = `0x0`,
148	/// Minimal possible non zero weight.
149	LOWEST_NON_ZERO = `0x1`,
150	/// Weight to an 'unreachable' block.
151	UNREACHABLE = ZERO,
152	/// Weight to a block containing non returning call.
153	NORETURN = LOWEST_NON_ZERO,
154	/// Weight to 'unwind' block of an invoke instruction.
155	UNWIND = LOWEST_NON_ZERO,
156	/// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
157	/// with attribute 'cold'.
158	COLD = `0xffff`,
159	/// Default weight is used in cases when there is no dedicated execution
160	/// weight set. It is not propagated through the domination line either.
161	DEFAULT = `0xfffff`
162	};
163
164	namespace {
165	class BPIConstruction {
166	public:
167	BPIConstruction(BranchProbabilityInfo &BPI) : BPI(BPI) {}
168	void calculate(const Function &F, const LoopInfo &LI,
169	const TargetLibraryInfo TLI, DominatorTree DT,
170	PostDominatorTree *PDT);
171
172	private:
173	// Data structure to track SCCs for handling irreducible loops.
174	class SccInfo {
175	// Enum of types to classify basic blocks in SCC. Basic block belonging to
176	// SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a
177	// basic block can be 'Header' and 'Exiting' at the same time.
178	enum SccBlockType {
179	Inner = `0x0`,
180	Header = `0x1`,
181	Exiting = `0x2`,
182	};
183	// Map of basic blocks to SCC IDs they belong to. If basic block doesn't
184	// belong to any SCC it is not in the map.
185	using SccMap = DenseMap<const BasicBlock , int*>;
186	// Each basic block in SCC is attributed with one or several types from
187	// SccBlockType. Map value has uint32_t type (instead of SccBlockType)
188	// since basic block may be for example "Header" and "Exiting" at the same
189	// time and we need to be able to keep more than one value from
190	// SccBlockType.
191	using SccBlockTypeMap = DenseMap<const BasicBlock *, uint32_t>;
192	// Vector containing classification of basic blocks for all SCCs where i'th
193	// vector element corresponds to SCC with ID equal to i.
194	using SccBlockTypeMaps = std::vector<SccBlockTypeMap>;
195
196	SccMap SccNums;
197	SccBlockTypeMaps SccBlocks;
198
199	public:
200	explicit SccInfo(const Function &F);
201
202	/// If \p BB belongs to some SCC then ID of that SCC is returned, otherwise
203	/// -1 is returned. If \p BB belongs to more than one SCC at the same time
204	/// result is undefined.
205	int getSCCNum(const BasicBlock BB) const*;
206	/// Returns true if \p BB is a 'header' block in SCC with \p SccNum ID,
207	/// false otherwise.
208	bool isSCCHeader(const BasicBlock BB, int* SccNum) const {
209	return getSccBlockType(BB, SccNum) & Header;
210	}
211	/// Returns true if \p BB is an 'exiting' block in SCC with \p SccNum ID,
212	/// false otherwise.
213	bool isSCCExitingBlock(const BasicBlock BB, int* SccNum) const {
214	return getSccBlockType(BB, SccNum) & Exiting;
215	}
216	/// Fills in \p Enters vector with all such blocks that don't belong to
217	/// SCC with \p SccNum ID but there is an edge to a block belonging to the
218	/// SCC.
219	void getSccEnterBlocks(int SccNum,
220	SmallVectorImpl<BasicBlock > &Enters) const*;
221	/// Fills in \p Exits vector with all such blocks that don't belong to
222	/// SCC with \p SccNum ID but there is an edge from a block belonging to the
223	/// SCC.
224	void getSccExitBlocks(int SccNum,
225	SmallVectorImpl<BasicBlock > &Exits) const*;
226
227	private:
228	/// Returns \p BB's type according to classification given by SccBlockType
229	/// enum. Please note that \p BB must belong to SSC with \p SccNum ID.
230	uint32_t getSccBlockType(const BasicBlock BB, int* SccNum) const;
231	/// Calculates \p BB's type and stores it in internal data structures for
232	/// future use. Please note that \p BB must belong to SSC with \p SccNum ID.
233	void calculateSccBlockType(const BasicBlock BB, int* SccNum);
234	};
235
236	/// Pair of Loop and SCC ID number. Used to unify handling of normal and
237	/// SCC based loop representations.
238	using LoopData = std::pair<Loop , int*>;
239	/// Helper class to keep basic block along with its loop data information.
240	class LoopBlock {
241	public:
242	explicit LoopBlock(const BasicBlock BB, const* LoopInfo &LI,
243	const SccInfo &SccI);
244
245	const BasicBlock getBlock() const* { return BB; }
246	BasicBlock getBlock() { return* const_cast<BasicBlock *>(BB); }
247	LoopData getLoopData() const { return LD; }
248	Loop getLoop() const* { return LD.first; }
249	int getSccNum() const { return LD.second; }
250
251	bool belongsToLoop() const { return getLoop() \|\| getSccNum() != -`1`; }
252	bool belongsToSameLoop(const LoopBlock &LB) const {
253	return (LB.getLoop() && getLoop() == LB.getLoop()) \|\|
254	(LB.getSccNum() != -`1` && getSccNum() == LB.getSccNum());
255	}
256
257	private:
258	const BasicBlock *const BB = nullptr;
259	LoopData LD = {nullptr, -`1`};
260	};
261
262	// Pair of LoopBlocks representing an edge from first to second block.
263	using LoopEdge = std::pair<const LoopBlock &, const LoopBlock &>;
264
265	/// Helper to construct LoopBlock for \p BB.
266	LoopBlock getLoopBlock(const BasicBlock BB) const* {
267	return LoopBlock (BB, LI, SccI);
268	}
269
270	/// Returns true if destination block belongs to some loop and source block is
271	/// either doesn't belong to any loop or belongs to a loop which is not inner
272	/// relative to the destination block.
273	bool isLoopEnteringEdge(const LoopEdge &Edge) const;
274	/// Returns true if source block belongs to some loop and destination block is
275	/// either doesn't belong to any loop or belongs to a loop which is not inner
276	/// relative to the source block.
277	bool isLoopExitingEdge(const LoopEdge &Edge) const;
278	/// Returns true if \p Edge is either enters to or exits from some loop, false
279	/// in all other cases.
280	bool isLoopEnteringExitingEdge(const LoopEdge &Edge) const;
281	/// Returns true if source and destination blocks belongs to the same loop and
282	/// destination block is loop header.
283	bool isLoopBackEdge(const LoopEdge &Edge) const;
284	// Fills in \p Enters vector with all "enter" blocks to a loop \LB belongs to.
285	void getLoopEnterBlocks(const LoopBlock &LB,
286	SmallVectorImpl<BasicBlock > &Enters) const*;
287	// Fills in \p Exits vector with all "exit" blocks from a loop \LB belongs to.
288	void getLoopExitBlocks(const LoopBlock &LB,
289	SmallVectorImpl<BasicBlock > &Exits) const*;
290
291	/// Returns estimated weight for \p BB. std::nullopt if \p BB has no estimated
292	/// weight.
293	std::optional<uint32_t> getEstimatedBlockWeight(const BasicBlock BB) const*;
294
295	/// Returns estimated weight to enter \p L. In other words it is weight of
296	/// loop's header block not scaled by trip count. Returns std::nullopt if \p L
297	/// has no no estimated weight.
298	std::optional<uint32_t> getEstimatedLoopWeight(const LoopData &L) const;
299
300	/// Return estimated weight for \p Edge. Returns std::nullopt if estimated
301	/// weight is unknown.
302	std::optional<uint32_t> getEstimatedEdgeWeight(const LoopEdge &Edge) const;
303
304	/// Iterates over all edges leading from \p SrcBB to \p Successors and
305	/// returns maximum of all estimated weights. If at least one edge has unknown
306	/// estimated weight std::nullopt is returned.
307	template <class IterT>
308	std::optional<uint32_t>
309	getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB,
310	iterator_range<IterT> Successors) const;
311
312	/// If \p LoopBB has no estimated weight then set it to \p BBWeight and
313	/// return true. Otherwise \p BB's weight remains unchanged and false is
314	/// returned. In addition all blocks/loops that might need their weight to be
315	/// re-estimated are put into BlockWorkList/LoopWorkList.
316	bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight,
317	SmallVectorImpl<BasicBlock *> &BlockWorkList,
318	SmallVectorImpl<LoopBlock> &LoopWorkList);
319
320	/// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight
321	/// up the domination tree.
322	void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT,
323	PostDominatorTree *PDT, uint32_t BBWeight,
324	SmallVectorImpl<BasicBlock *> &WorkList,
325	SmallVectorImpl<LoopBlock> &LoopWorkList);
326
327	/// Returns block's weight encoded in the IR.
328	std::optional<uint32_t> getInitialEstimatedBlockWeight(const BasicBlock *BB);
329
330	// Computes estimated weights for all blocks in \p F.
331	void estimateBlockWeights(const Function &F, DominatorTree *DT,
332	PostDominatorTree *PDT);
333
334	/// Based on computed weights by \p computeEstimatedBlockWeight set
335	/// probabilities on branches.
336	bool calcEstimatedHeuristics(const BasicBlock *BB);
337	bool calcMetadataWeights(const BasicBlock *BB);
338	bool calcPointerHeuristics(const BasicBlock *BB);
339	bool calcZeroHeuristics(const BasicBlock BB, const* TargetLibraryInfo *TLI);
340	bool calcFloatingPointHeuristics(const BasicBlock *BB);
341
342	BranchProbabilityInfo &BPI;
343
344	const LoopInfo LI = nullptr*;
345
346	/// Keeps information about all SCCs in a function.
347	std::unique_ptr<const SccInfo> SccI;
348
349	/// Keeps mapping of a basic block to its estimated weight.
350	SmallDenseMap<const BasicBlock *, uint32_t> EstimatedBlockWeight;
351
352	/// Keeps mapping of a loop to estimated weight to enter the loop.
353	SmallDenseMap<LoopData, uint32_t> EstimatedLoopWeight;
354	};
355
356	BPIConstruction::SccInfo::SccInfo(const Function &F) {
357	// Record SCC numbers of blocks in the CFG to identify irreducible loops.
358	// FIXME: We could only calculate this if the CFG is known to be irreducible
359	// (perhaps cache this info in LoopInfo if we can easily calculate it there?).
360	int SccNum = `0`;
361	for (scc_iterator<const Function *> It = scc_begin(G: &F); !It.isAtEnd();
362	++It, ++SccNum) {
363	// Ignore single-block SCCs since they either aren't loops or LoopInfo will
364	// catch them.
365	const std::vector<const BasicBlock > &Scc = It;
366	if (Scc.size() == `1`)
367	continue;
368
369	LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
370	for (const auto *BB : Scc) {
371	LLVM_DEBUG(dbgs() << " " << BB->getName());
372	SccNums [BB] = SccNum;
373	calculateSccBlockType(BB, SccNum);
374	}
375	LLVM_DEBUG(dbgs() << "\n");
376	}
377	}
378
379	int BPIConstruction::SccInfo::getSCCNum(const BasicBlock BB) const* {
380	auto SccIt = SccNums.find(Val: BB);
381	if (SccIt == SccNums.end())
382	return -`1`;
383	return SccIt ->second;
384	}
385
386	void BPIConstruction::SccInfo::getSccEnterBlocks(
387	int SccNum, SmallVectorImpl<BasicBlock > &Enters) const* {
388
389	for (auto MapIt : SccBlocks [SccNum]) {
390	const auto *BB = MapIt.first;
391	if (isSCCHeader(BB, SccNum))
392	for (const auto *Pred : predecessors(BB))
393	if (getSCCNum(BB: Pred) != SccNum)
394	Enters.push_back(Elt: const_cast<BasicBlock *>(BB));
395	}
396	}
397
398	void BPIConstruction::SccInfo::getSccExitBlocks(
399	int SccNum, SmallVectorImpl<BasicBlock > &Exits) const* {
400	for (auto MapIt : SccBlocks [SccNum]) {
401	const auto *BB = MapIt.first;
402	if (isSCCExitingBlock(BB, SccNum))
403	for (const auto *Succ : successors(BB))
404	if (getSCCNum(BB: Succ) != SccNum)
405	Exits.push_back(Elt: const_cast<BasicBlock *>(Succ));
406	}
407	}
408
409	uint32_t BPIConstruction::SccInfo::getSccBlockType(const BasicBlock *BB,
410	int SccNum) const {
411	assert(getSCCNum(BB) == SccNum);
412
413	assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
414	const auto &SccBlockTypes = SccBlocks [SccNum];
415
416	auto It = SccBlockTypes.find(Val: BB);
417	if (It != SccBlockTypes.end()) {
418	return It ->second;
419	}
420	return Inner;
421	}
422
423	void BPIConstruction::SccInfo::calculateSccBlockType(const BasicBlock *BB,
424	int SccNum) {
425	assert(getSCCNum(BB) == SccNum);
426	uint32_t BlockType = Inner;
427
428	if (llvm::any_of(Range: predecessors(BB), P: [&](const BasicBlock *Pred) {
429	// Consider any block that is an entry point to the SCC as
430	// a header.
431	return getSCCNum(BB: Pred) != SccNum;
432	}))
433	BlockType \|= Header;
434
435	if (llvm::any_of(Range: successors(BB), P: [&](const BasicBlock *Succ) {
436	return getSCCNum(BB: Succ) != SccNum;
437	}))
438	BlockType \|= Exiting;
439
440	// Lazily compute the set of headers for a given SCC and cache the results
441	// in the SccHeaderMap.
442	if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
443	SccBlocks.resize(new_size: SccNum + `1`);
444	auto &SccBlockTypes = SccBlocks [SccNum];
445
446	if (BlockType != Inner) {
447	bool IsInserted;
448	std::tie(args: std::ignore, args&: IsInserted) =
449	SccBlockTypes.insert(KV: std::make_pair(x&: BB, y&: BlockType));
450	assert(IsInserted && "Duplicated block in SCC");
451	}
452	}
453
454	BPIConstruction::LoopBlock::LoopBlock(const BasicBlock BB, const* LoopInfo &LI,
455	const SccInfo &SccI)
456	: BB(BB) {
457	LD.first = LI.getLoopFor(BB);
458	if (!LD.first) {
459	LD.second = SccI.getSCCNum(BB);
460	}
461	}
462
463	bool BPIConstruction::isLoopEnteringEdge(const LoopEdge &Edge) const {
464	const auto &SrcBlock = Edge.first;
465	const auto &DstBlock = Edge.second;
466	return (DstBlock.getLoop() &&
467	!DstBlock.getLoop()->contains(L: SrcBlock.getLoop())) \|\|
468	// Assume that SCCs can't be nested.
469	(DstBlock.getSccNum() != -`1` &&
470	SrcBlock.getSccNum() != DstBlock.getSccNum());
471	}
472
473	bool BPIConstruction::isLoopExitingEdge(const LoopEdge &Edge) const {
474	return isLoopEnteringEdge(Edge: {Edge.second, Edge.first});
475	}
476
477	bool BPIConstruction::isLoopEnteringExitingEdge(const LoopEdge &Edge) const {
478	return isLoopEnteringEdge(Edge) \|\| isLoopExitingEdge(Edge);
479	}
480
481	bool BPIConstruction::isLoopBackEdge(const LoopEdge &Edge) const {
482	const auto &SrcBlock = Edge.first;
483	const auto &DstBlock = Edge.second;
484	return SrcBlock.belongsToSameLoop(LB: DstBlock) &&
485	((DstBlock.getLoop() &&
486	DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) \|\|
487	(DstBlock.getSccNum() != -`1` &&
488	SccI ->isSCCHeader(BB: DstBlock.getBlock(), SccNum: DstBlock.getSccNum())));
489	}
490
491	void BPIConstruction::getLoopEnterBlocks(
492	const LoopBlock &LB, SmallVectorImpl<BasicBlock > &Enters) const* {
493	if (LB.getLoop()) {
494	auto *Header = LB.getLoop()->getHeader();
495	Enters.append(in_start: pred_begin(BB: Header), in_end: pred_end(BB: Header));
496	} else {
497	assert(LB.getSccNum() != -`1` && "LB doesn't belong to any loop?");
498	SccI ->getSccEnterBlocks(SccNum: LB.getSccNum(), Enters);
499	}
500	}
501
502	void BPIConstruction::getLoopExitBlocks(
503	const LoopBlock &LB, SmallVectorImpl<BasicBlock > &Exits) const* {
504	if (LB.getLoop()) {
505	LB.getLoop()->getExitBlocks(ExitBlocks&: Exits);
506	} else {
507	assert(LB.getSccNum() != -`1` && "LB doesn't belong to any loop?");
508	SccI ->getSccExitBlocks(SccNum: LB.getSccNum(), Exits);
509	}
510	}
511
512	// Propagate existing explicit probabilities from either profile data or
513	// 'expect' intrinsic processing. Examine metadata against unreachable
514	// heuristic. The probability of the edge coming to unreachable block is
515	// set to min of metadata and unreachable heuristic.
516	bool BPIConstruction::calcMetadataWeights(const BasicBlock *BB) {
517	const Instruction *TI = BB->getTerminator();
518	assert(TI->getNumSuccessors() > `1` && "expected more than one successor!");
519	if (!(isa<CondBrInst>(Val: TI) \|\| isa<SwitchInst>(Val: TI) \|\| isa<IndirectBrInst>(Val: TI) \|\|
520	isa<InvokeInst>(Val: TI) \|\| isa<CallBrInst>(Val: TI)))
521	return false;
522
523	MDNode WeightsNode = getValidBranchWeightMDNode(I: TI);
524	if (!WeightsNode)
525	return false;
526
527	// Check that the number of successors is manageable.
528	assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
529
530	// Build up the final weights that will be used in a temporary buffer.
531	// Compute the sum of all weights to later decide whether they need to
532	// be scaled to fit in 32 bits.
533	uint64_t WeightSum = `0`;
534	SmallVector<uint32_t, `2`> Weights;
535	SmallVector<unsigned, `2`> UnreachableIdxs;
536	SmallVector<unsigned, `2`> ReachableIdxs;
537
538	extractBranchWeights(ProfileData: WeightsNode, Weights);
539	auto Succs = succ_begin(I: TI);
540	for (unsigned I = `0`, E = Weights.size(); I != E; ++I) {
541	WeightSum += Weights [I];
542	const LoopBlock SrcLoopBB = getLoopBlock(BB);
543	const LoopBlock DstLoopBB = getLoopBlock(BB: *Succs ++);
544	auto EstimatedWeight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB});
545	if (EstimatedWeight &&
546	EstimatedWeight <= static_cast*<uint32_t>(BlockExecWeight::UNREACHABLE))
547	UnreachableIdxs.push_back(Elt: I);
548	else
549	ReachableIdxs.push_back(Elt: I);
550	}
551	assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
552
553	// If the sum of weights does not fit in 32 bits, scale every weight down
554	// accordingly.
555	uint64_t ScalingFactor =
556	(WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + `1` : `1`;
557
558	if (ScalingFactor > `1`) {
559	WeightSum = `0`;
560	for (unsigned I = `0`, E = TI->getNumSuccessors(); I != E; ++I) {
561	Weights [I] /= ScalingFactor;
562	WeightSum += Weights [I];
563	}
564	}
565	assert(WeightSum <= UINT32_MAX &&
566	"Expected weights to scale down to 32 bits");
567
568	if (WeightSum == `0` \|\| ReachableIdxs.size() == `0`) {
569	for (unsigned I = `0`, E = TI->getNumSuccessors(); I != E; ++I)
570	Weights [I] = `1`;
571	WeightSum = TI->getNumSuccessors();
572	}
573
574	// Set the probability.
575	SmallVector<BranchProbability, `2`> BP;
576	for (unsigned I = `0`, E = TI->getNumSuccessors(); I != E; ++I)
577	BP.push_back(Elt: { Weights [I], static_cast<uint32_t>(WeightSum) });
578
579	// Examine the metadata against unreachable heuristic.
580	// If the unreachable heuristic is more strong then we use it for this edge.
581	if (UnreachableIdxs.size() == `0` \|\| ReachableIdxs.size() == `0`) {
582	BPI.setEdgeProbability(Src: BB, Probs: BP);
583	return true;
584	}
585
586	auto UnreachableProb = UR_TAKEN_PROB;
587	for (auto I : UnreachableIdxs)
588	if (UnreachableProb < BP [I]) {
589	BP [I] = UnreachableProb;
590	}
591
592	// Sum of all edge probabilities must be 1.0. If we modified the probability
593	// of some edges then we must distribute the introduced difference over the
594	// reachable blocks.
595	//
596	// Proportional distribution: the relation between probabilities of the
597	// reachable edges is kept unchanged. That is for any reachable edges i and j:
598	// newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
599	// newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
600	// Where K is independent of i,j.
601	// newBP[i] == oldBP[i] K*
602	// We need to find K.
603	// Make sum of all reachables of the left and right parts:
604	// sum_of_reachable(newBP) == K sum_of_reachable(oldBP)*
605	// Sum of newBP must be equal to 1.0:
606	// sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
607	// sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
608	// Where sum_of_unreachable(newBP) is what has been just changed.
609	// Finally:
610	// K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
611	// K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
612	BranchProbability NewUnreachableSum = BranchProbability::getZero();
613	for (auto I : UnreachableIdxs)
614	NewUnreachableSum += BP [I];
615
616	BranchProbability NewReachableSum =
617	BranchProbability::getOne() - NewUnreachableSum;
618
619	BranchProbability OldReachableSum = BranchProbability::getZero();
620	for (auto I : ReachableIdxs)
621	OldReachableSum += BP [I];
622
623	if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
624	if (OldReachableSum.isZero()) {
625	// If all oldBP[i] are zeroes then the proportional distribution results
626	// in all zero probabilities and the error stays big. In this case we
627	// evenly spread NewReachableSum over the reachable edges.
628	BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
629	for (auto I : ReachableIdxs)
630	BP [I] = PerEdge;
631	} else {
632	for (auto I : ReachableIdxs) {
633	// We use uint64_t to avoid double rounding error of the following
634	// calculation: BP[i] = BP[i] NewReachableSum / OldReachableSum*
635	// The formula is taken from the private constructor
636	// BranchProbability(uint32_t Numerator, uint32_t Denominator)
637	uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
638	BP [I].getNumerator();
639	uint32_t Div = static_cast<uint32_t>(
640	divideNearest(Numerator: Mul, Denominator: OldReachableSum.getNumerator()));
641	BP [I] = BranchProbability::getRaw(N: Div);
642	}
643	}
644	}
645
646	BPI.setEdgeProbability(Src: BB, Probs: BP);
647
648	return true;
649	}
650
651	// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
652	// between two pointer or pointer and NULL will fail.
653	bool BPIConstruction::calcPointerHeuristics(const BasicBlock *BB) {
654	const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
655	if (!BI)
656	return false;
657
658	Value *Cond = BI->getCondition();
659	ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond);
660	if (!CI \|\| !CI->isEquality())
661	return false;
662
663	Value *LHS = CI->getOperand(i_nocapture: `0`);
664
665	if (!LHS->getType()->isPointerTy())
666	return false;
667
668	assert(CI->getOperand(`1`)->getType()->isPointerTy());
669
670	switch (CI->getPredicate()) {
671	case ICmpInst::ICMP_NE: // p != q -> Likely
672	BPI.setEdgeProbability(Src: BB, Probs: {PtrTakenProb, PtrUntakenProb});
673	return true;
674	case ICmpInst::ICMP_EQ: // p == q -> Unlikely
675	BPI.setEdgeProbability(Src: BB, Probs: {PtrUntakenProb, PtrTakenProb});
676	return true;
677	default:
678	return false;
679	}
680	}
681
682	// Compute the unlikely successors to the block BB in the loop L, specifically
683	// those that are unlikely because this is a loop, and add them to the
684	// UnlikelyBlocks set.
685	static void
686	computeUnlikelySuccessors(const BasicBlock BB, Loop L,
687	SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
688	// Sometimes in a loop we have a branch whose condition is made false by
689	// taking it. This is typically something like
690	// int n = 0;
691	// while (...) {
692	// if (++n >= MAX) {
693	// n = 0;
694	// }
695	// }
696	// In this sort of situation taking the branch means that at the very least it
697	// won't be taken again in the next iteration of the loop, so we should
698	// consider it less likely than a typical branch.
699	//
700	// We detect this by looking back through the graph of PHI nodes that sets the
701	// value that the condition depends on, and seeing if we can reach a successor
702	// block which can be determined to make the condition false.
703	//
704	// FIXME: We currently consider unlikely blocks to be half as likely as other
705	// blocks, but if we consider the example above the likelyhood is actually
706	// 1/MAX. We could therefore be more precise in how unlikely we consider
707	// blocks to be, but it would require more careful examination of the form
708	// of the comparison expression.
709	const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
710	if (!BI)
711	return;
712
713	// Check if the branch is based on an instruction compared with a constant
714	CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition());
715	if (!CI \|\| !isa<Instruction>(Val: CI->getOperand(i_nocapture: `0`)) \|\|
716	!isa<Constant>(Val: CI->getOperand(i_nocapture: `1`)))
717	return;
718
719	// Either the instruction must be a PHI, or a chain of operations involving
720	// constants that ends in a PHI which we can then collapse into a single value
721	// if the PHI value is known.
722	Instruction *CmpLHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: `0`));
723	PHINode *CmpPHI = dyn_cast<PHINode>(Val: CmpLHS);
724	Constant *CmpConst = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: `1`));
725	// Collect the instructions until we hit a PHI
726	SmallVector<BinaryOperator *, `1`> InstChain;
727	while (!CmpPHI && CmpLHS && isa<BinaryOperator>(Val: CmpLHS) &&
728	isa<Constant>(Val: CmpLHS->getOperand(i: `1`))) {
729	// Stop if the chain extends outside of the loop
730	if (!L->contains(Inst: CmpLHS))
731	return;
732	InstChain.push_back(Elt: cast<BinaryOperator>(Val: CmpLHS));
733	CmpLHS = dyn_cast<Instruction>(Val: CmpLHS->getOperand(i: `0`));
734	if (CmpLHS)
735	CmpPHI = dyn_cast<PHINode>(Val: CmpLHS);
736	}
737	if (!CmpPHI \|\| !L->contains(Inst: CmpPHI))
738	return;
739
740	// Trace the phi node to find all values that come from successors of BB
741	SmallPtrSet<PHINode*, `8`> VisitedInsts;
742	SmallVector<PHINode*, `8`> WorkList;
743	WorkList.push_back(Elt: CmpPHI);
744	VisitedInsts.insert(Ptr: CmpPHI);
745	while (!WorkList.empty()) {
746	PHINode *P = WorkList.pop_back_val();
747	for (BasicBlock *B : P->blocks()) {
748	// Skip blocks that aren't part of the loop
749	if (!L->contains(BB: B))
750	continue;
751	Value *V = P->getIncomingValueForBlock(BB: B);
752	// If the source is a PHI add it to the work list if we haven't
753	// already visited it.
754	if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
755	if (VisitedInsts.insert(Ptr: PN).second)
756	WorkList.push_back(Elt: PN);
757	continue;
758	}
759	// If this incoming value is a constant and B is a successor of BB, then
760	// we can constant-evaluate the compare to see if it makes the branch be
761	// taken or not.
762	Constant *CmpLHSConst = dyn_cast<Constant>(Val: V);
763	if (!CmpLHSConst \|\| !llvm::is_contained(Range: successors(BB), Element: B))
764	continue;
765	// First collapse InstChain
766	const DataLayout &DL = BB->getDataLayout();
767	for (Instruction *I : llvm::reverse(C&: InstChain)) {
768	CmpLHSConst = ConstantFoldBinaryOpOperands(
769	Opcode: I->getOpcode(), LHS: CmpLHSConst, RHS: cast<Constant>(Val: I->getOperand(i: `1`)), DL);
770	if (!CmpLHSConst)
771	break;
772	}
773	if (!CmpLHSConst)
774	continue;
775	// Now constant-evaluate the compare
776	Constant *Result = ConstantFoldCompareInstOperands(
777	Predicate: CI->getPredicate(), LHS: CmpLHSConst, RHS: CmpConst, DL);
778	// If the result means we don't branch to the block then that block is
779	// unlikely.
780	if (Result && ((Result->isNullValue() && B == BI->getSuccessor(i: `0`)) \|\|
781	(Result->isOneValue() && B == BI->getSuccessor(i: `1`))))
782	UnlikelyBlocks.insert(Ptr: B);
783	}
784	}
785	}
786
787	std::optional<uint32_t>
788	BPIConstruction::getEstimatedBlockWeight(const BasicBlock BB) const* {
789	auto WeightIt = EstimatedBlockWeight.find(Val: BB);
790	if (WeightIt == EstimatedBlockWeight.end())
791	return std::nullopt;
792	return WeightIt ->second;
793	}
794
795	std::optional<uint32_t>
796	BPIConstruction::getEstimatedLoopWeight(const LoopData &L) const {
797	auto WeightIt = EstimatedLoopWeight.find(Val: L);
798	if (WeightIt == EstimatedLoopWeight.end())
799	return std::nullopt;
800	return WeightIt ->second;
801	}
802
803	std::optional<uint32_t>
804	BPIConstruction::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
805	// For edges entering a loop take weight of a loop rather than an individual
806	// block in the loop.
807	return isLoopEnteringEdge(Edge)
808	? getEstimatedLoopWeight(L: Edge.second.getLoopData())
809	: getEstimatedBlockWeight(BB: Edge.second.getBlock());
810	}
811
812	template <class IterT>
813	std::optional<uint32_t> BPIConstruction::getMaxEstimatedEdgeWeight(
814	const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
815	std::optional<uint32_t> MaxWeight;
816	for (const BasicBlock *DstBB : Successors) {
817	const LoopBlock DstLoopBB = getLoopBlock(BB: DstBB);
818	auto Weight = getEstimatedEdgeWeight(Edge: {SrcLoopBB, DstLoopBB});
819
820	if (!Weight)
821	return std::nullopt;
822
823	if (!MaxWeight \|\| MaxWeight < Weight)
824	MaxWeight = Weight;
825	}
826
827	return MaxWeight;
828	}
829
830	// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
831	// an associated weight it is unchanged and false is returned.
832	//
833	// Please note by the algorithm the weight is not expected to change once set
834	// thus 'false' status is used to track visited blocks.
835	bool BPIConstruction::updateEstimatedBlockWeight(
836	LoopBlock &LoopBB, uint32_t BBWeight,
837	SmallVectorImpl<BasicBlock *> &BlockWorkList,
838	SmallVectorImpl<LoopBlock> &LoopWorkList) {
839	BasicBlock *BB = LoopBB.getBlock();
840
841	// In general, weight is assigned to a block when it has final value and
842	// can't/shouldn't be changed. However, there are cases when a block
843	// inherently has several (possibly "contradicting") weights. For example,
844	// "unwind" block may also contain "cold" call. In that case the first
845	// set weight is favored and all consequent weights are ignored.
846	if (!EstimatedBlockWeight.insert(KV: {BB, BBWeight}).second)
847	return false;
848
849	for (BasicBlock *PredBlock : predecessors(BB)) {
850	LoopBlock PredLoop = getLoopBlock(BB: PredBlock);
851	// Add affected block/loop to a working list.
852	if (isLoopExitingEdge(Edge: {PredLoop, LoopBB})) {
853	if (!EstimatedLoopWeight.count(Val: PredLoop.getLoopData()))
854	LoopWorkList.push_back(Elt: PredLoop);
855	} else if (!EstimatedBlockWeight.count(Val: PredBlock))
856	BlockWorkList.push_back(Elt: PredBlock);
857	}
858	return true;
859	}
860
861	// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
862	// to all such blocks that are post dominated by \BB. In other words to all
863	// blocks that the one is executed if and only if another one is executed.
864	// Importantly, we skip loops here for two reasons. First weights of blocks in
865	// a loop should be scaled by trip count (yet possibly unknown). Second there is
866	// no any value in doing that because that doesn't give any additional
867	// information regarding distribution of probabilities inside the loop.
868	// Exception is loop 'enter' and 'exit' edges that are handled in a special way
869	// at calcEstimatedHeuristics.
870	//
871	// In addition, \p WorkList is populated with basic blocks if at leas one
872	// successor has updated estimated weight.
873	void BPIConstruction::propagateEstimatedBlockWeight(
874	const LoopBlock &LoopBB, DominatorTree DT, PostDominatorTree PDT,
875	uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
876	SmallVectorImpl<LoopBlock> &LoopWorkList) {
877	const BasicBlock *BB = LoopBB.getBlock();
878	const auto *DTStartNode = DT->getNode(BB);
879	const auto *PDTStartNode = PDT->getNode(BB);
880
881	// TODO: Consider propagating weight down the domination line as well.
882	for (const auto DTNode = DTStartNode; DTNode != nullptr*;
883	DTNode = DTNode->getIDom()) {
884	auto *DomBB = DTNode->getBlock();
885	// Consider blocks which lie on one 'line'.
886	if (!PDT->dominates(A: PDTStartNode, B: PDT->getNode(BB: DomBB)))
887	// If BB doesn't post dominate DomBB it will not post dominate dominators
888	// of DomBB as well.
889	break;
890
891	LoopBlock DomLoopBB = getLoopBlock(BB: DomBB);
892	const LoopEdge Edge{DomLoopBB, LoopBB};
893	// Don't propagate weight to blocks belonging to different loops.
894	if (!isLoopEnteringExitingEdge(Edge)) {
895	if (!updateEstimatedBlockWeight(LoopBB&: DomLoopBB, BBWeight, BlockWorkList,
896	LoopWorkList))
897	// If DomBB has weight set then all it's predecessors are already
898	// processed (since we propagate weight up to the top of IR each time).
899	break;
900	} else if (isLoopExitingEdge(Edge)) {
901	LoopWorkList.push_back(Elt: DomLoopBB);
902	}
903	}
904	}
905
906	std::optional<uint32_t>
907	BPIConstruction::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
908	// Returns true if \p BB has call marked with "NoReturn" attribute.
909	auto hasNoReturn = [&](const BasicBlock *BB) {
910	for (const auto &I : reverse(C: *BB))
911	if (const CallInst *CI = dyn_cast<CallInst>(Val: &I))
912	if (CI->hasFnAttr(Kind: Attribute::NoReturn))
913	return true;
914
915	return false;
916	};
917
918	// Important note regarding the order of checks. They are ordered by weight
919	// from lowest to highest. Doing that allows to avoid "unstable" results
920	// when several conditions heuristics can be applied simultaneously.
921	if (isa<UnreachableInst>(Val: BB->getTerminator()) \|\|
922	// If this block is terminated by a call to
923	// @llvm.experimental.deoptimize then treat it like an unreachable
924	// since it is expected to practically never execute.
925	// TODO: Should we actually treat as never returning call?
926	BB->getTerminatingDeoptimizeCall())
927	return hasNoReturn (BB)
928	? static_cast<uint32_t>(BlockExecWeight::NORETURN)
929	: static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
930
931	// Check if the block is an exception handling block.
932	if (BB->isEHPad())
933	return static_cast<uint32_t>(BlockExecWeight::UNWIND);
934
935	// Check if the block contains 'cold' call.
936	for (const auto &I : *BB)
937	if (const CallInst *CI = dyn_cast<CallInst>(Val: &I))
938	if (CI->hasFnAttr(Kind: Attribute::Cold))
939	return static_cast<uint32_t>(BlockExecWeight::COLD);
940
941	return std::nullopt;
942	}
943
944	// Does RPO traversal over all blocks in \p F and assigns weights to
945	// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
946	// best to propagate the weight to up/down the IR.
947	void BPIConstruction::estimateBlockWeights(const Function &F, DominatorTree *DT,
948	PostDominatorTree *PDT) {
949	SmallVector<BasicBlock *, `8`> BlockWorkList;
950	SmallVector<LoopBlock, `8`> LoopWorkList;
951	SmallDenseMap<LoopData, SmallVector<BasicBlock *, `4`>> LoopExitBlocks;
952
953	// By doing RPO we make sure that all predecessors already have weights
954	// calculated before visiting theirs successors.
955	ReversePostOrderTraversal<const Function *> RPOT(&F);
956	for (const auto *BB : RPOT)
957	if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
958	// If we were able to find estimated weight for the block set it to this
959	// block and propagate up the IR.
960	propagateEstimatedBlockWeight(LoopBB: getLoopBlock(BB), DT, PDT, BBWeight: *BBWeight,
961	BlockWorkList, LoopWorkList);
962
963	// BlockWorklist/LoopWorkList contains blocks/loops with at least one
964	// successor/exit having estimated weight. Try to propagate weight to such
965	// blocks/loops from successors/exits.
966	// Process loops and blocks. Order is not important.
967	do {
968	while (!LoopWorkList.empty()) {
969	const LoopBlock LoopBB = LoopWorkList.pop_back_val();
970	const LoopData LD = LoopBB.getLoopData();
971	if (EstimatedLoopWeight.count(Val: LD))
972	continue;
973
974	auto Res = LoopExitBlocks.try_emplace(Key: LD);
975	SmallVectorImpl<BasicBlock *> &Exits = Res.first ->second;
976	if (Res.second)
977	getLoopExitBlocks(LB: LoopBB, Exits);
978	auto LoopWeight = getMaxEstimatedEdgeWeight(
979	SrcLoopBB: LoopBB, Successors: make_range(x: Exits.begin(), y: Exits.end()));
980
981	if (LoopWeight) {
982	// If we never exit the loop then we can enter it once at maximum.
983	if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
984	LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
985
986	EstimatedLoopWeight.insert(KV: {LD, *LoopWeight});
987	// Add all blocks entering the loop into working list.
988	getLoopEnterBlocks(LB: LoopBB, Enters&: BlockWorkList);
989	}
990	}
991
992	while (!BlockWorkList.empty()) {
993	// We can reach here only if BlockWorkList is not empty.
994	const BasicBlock *BB = BlockWorkList.pop_back_val();
995	if (EstimatedBlockWeight.count(Val: BB))
996	continue;
997
998	// We take maximum over all weights of successors. In other words we take
999	// weight of "hot" path. In theory we can probably find a better function
1000	// which gives higher accuracy results (comparing to "maximum") but I
1001	// can't
1002	// think of any right now. And I doubt it will make any difference in
1003	// practice.
1004	const LoopBlock LoopBB = getLoopBlock(BB);
1005	auto MaxWeight = getMaxEstimatedEdgeWeight(SrcLoopBB: LoopBB, Successors: successors(BB));
1006
1007	if (MaxWeight)
1008	propagateEstimatedBlockWeight(LoopBB, DT, PDT, BBWeight: *MaxWeight,
1009	BlockWorkList, LoopWorkList);
1010	}
1011	} while (!BlockWorkList.empty() \|\| !LoopWorkList.empty());
1012	}
1013
1014	// Calculate edge probabilities based on block's estimated weight.
1015	// Note that gathered weights were not scaled for loops. Thus edges entering
1016	// and exiting loops requires special processing.
1017	bool BPIConstruction::calcEstimatedHeuristics(const BasicBlock *BB) {
1018	assert(BB->getTerminator()->getNumSuccessors() > `1` &&
1019	"expected more than one successor!");
1020
1021	const LoopBlock LoopBB = getLoopBlock(BB);
1022
1023	SmallPtrSet<const BasicBlock *, `8`> UnlikelyBlocks;
1024	uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT;
1025	if (LoopBB.getLoop())
1026	computeUnlikelySuccessors(BB, L: LoopBB.getLoop(), UnlikelyBlocks);
1027
1028	// Changed to 'true' if at least one successor has estimated weight.
1029	bool FoundEstimatedWeight = false;
1030	SmallVector<uint32_t, `4`> SuccWeights;
1031	uint64_t TotalWeight = `0`;
1032	// Go over all successors of BB and put their weights into SuccWeights.
1033	for (const BasicBlock *SuccBB : successors(BB)) {
1034	std::optional<uint32_t> Weight;
1035	const LoopBlock SuccLoopBB = getLoopBlock(BB: SuccBB);
1036	const LoopEdge Edge{LoopBB, SuccLoopBB};
1037
1038	Weight = getEstimatedEdgeWeight(Edge);
1039
1040	if (isLoopExitingEdge(Edge) &&
1041	// Avoid adjustment of ZERO weight since it should remain unchanged.
1042	Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
1043	// Scale down loop exiting weight by trip count.
1044	Weight = std::max(
1045	a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
1046	b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
1047	TC);
1048	}
1049	bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(Ptr: SuccBB);
1050	if (IsUnlikelyEdge &&
1051	// Avoid adjustment of ZERO weight since it should remain unchanged.
1052	Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
1053	// 'Unlikely' blocks have twice lower weight.
1054	Weight = std::max(
1055	a: static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
1056	b: Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / `2`);
1057	}
1058
1059	if (Weight)
1060	FoundEstimatedWeight = true;
1061
1062	auto WeightVal =
1063	Weight.value_or(u: static_cast<uint32_t>(BlockExecWeight::DEFAULT));
1064	TotalWeight += WeightVal;
1065	SuccWeights.push_back(Elt: WeightVal);
1066	}
1067
1068	// If non of blocks have estimated weight bail out.
1069	// If TotalWeight is 0 that means weight of each successor is 0 as well and
1070	// equally likely. Bail out early to not deal with devision by zero.
1071	if (!FoundEstimatedWeight \|\| TotalWeight == `0`)
1072	return false;
1073
1074	assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
1075	const unsigned SuccCount = SuccWeights.size();
1076
1077	// If the sum of weights does not fit in 32 bits, scale every weight down
1078	// accordingly.
1079	if (TotalWeight > UINT32_MAX) {
1080	uint64_t ScalingFactor = TotalWeight / UINT32_MAX + `1`;
1081	TotalWeight = `0`;
1082	for (unsigned Idx = `0`; Idx < SuccCount; ++Idx) {
1083	SuccWeights [Idx] /= ScalingFactor;
1084	if (SuccWeights [Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
1085	SuccWeights [Idx] =
1086	static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
1087	TotalWeight += SuccWeights [Idx];
1088	}
1089	assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
1090	}
1091
1092	// Finally set probabilities to edges according to estimated block weights.
1093	SmallVector<BranchProbability, `4`> EdgeProbabilities(
1094	SuccCount, BranchProbability::getUnknown());
1095
1096	for (unsigned Idx = `0`; Idx < SuccCount; ++Idx) {
1097	EdgeProbabilities [Idx] =
1098	BranchProbability (SuccWeights [Idx], (uint32_t)TotalWeight);
1099	}
1100	BPI.setEdgeProbability(Src: BB, Probs: EdgeProbabilities);
1101	return true;
1102	}
1103
1104	bool BPIConstruction::calcZeroHeuristics(const BasicBlock *BB,
1105	const TargetLibraryInfo *TLI) {
1106	const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
1107	if (!BI)
1108	return false;
1109
1110	Value *Cond = BI->getCondition();
1111	ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond);
1112	if (!CI)
1113	return false;
1114
1115	auto GetConstantInt = [](Value *V) {
1116	if (auto *I = dyn_cast<BitCastInst>(Val: V))
1117	return dyn_cast<ConstantInt>(Val: I->getOperand(i_nocapture: `0`));
1118	return dyn_cast<ConstantInt>(Val: V);
1119	};
1120
1121	Value *RHS = CI->getOperand(i_nocapture: `1`);
1122	ConstantInt *CV = GetConstantInt (RHS);
1123	if (!CV)
1124	return false;
1125
1126	// If the LHS is the result of AND'ing a value with a single bit bitmask,
1127	// we don't have information about probabilities.
1128	if (Instruction *LHS = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: `0`)))
1129	if (LHS->getOpcode() == Instruction::And)
1130	if (ConstantInt *AndRHS = GetConstantInt (LHS->getOperand(i: `1`)))
1131	if (AndRHS->getValue().isPowerOf2())
1132	return false;
1133
1134	// Check if the LHS is the return value of a library function
1135	LibFunc Func = LibFunc::NotLibFunc;
1136	if (TLI)
1137	if (CallInst *Call = dyn_cast<CallInst>(Val: CI->getOperand(i_nocapture: `0`)))
1138	if (Function *CalledFn = Call->getCalledFunction())
1139	TLI->getLibFunc(FDecl: *CalledFn, F&: Func);
1140
1141	bool Likely;
1142	if (Func == LibFunc_strcasecmp \|\|
1143	Func == LibFunc_strcmp \|\|
1144	Func == LibFunc_strncasecmp \|\|
1145	Func == LibFunc_strncmp \|\|
1146	Func == LibFunc_memcmp \|\|
1147	Func == LibFunc_bcmp) {
1148	/// strcmp and similar functions return zero, negative, or positive, if the
1149	/// first string is equal, less, or greater than the second. We consider it
1150	/// likely that the strings are not equal, so a comparison with zero is
1151	/// probably false, but also a comparison with any other number is also
1152	/// probably false given that what exactly is returned for nonzero values is
1153	/// not specified. Any kind of comparison other than equality we know
1154	/// nothing about.
1155	// clang-format off
1156	switch (CI->getPredicate()) {
1157	case CmpInst::ICMP_EQ: Likely = false; break;
1158	case CmpInst::ICMP_NE: Likely = true; break;
1159	default: return false;
1160	}
1161	// clang-format on
1162	} else if (CV->isZero()) {
1163	// clang-format off
1164	switch (CI->getPredicate()) {
1165	case CmpInst::ICMP_EQ: Likely = false; break;
1166	case CmpInst::ICMP_NE: Likely = true; break;
1167	case CmpInst::ICMP_SLT: Likely = false; break;
1168	case CmpInst::ICMP_SGT: Likely = true; break;
1169	default: return false;
1170	}
1171	// clang-format on
1172	} else if (CV->isOne()) {
1173	// clang-format off
1174	switch (CI->getPredicate()) {
1175	case CmpInst::ICMP_SLT: Likely = false; break;
1176	default: return false;
1177	}
1178	// clang-format on
1179	} else if (CV->isMinusOne()) {
1180	// clang-format off
1181	switch (CI->getPredicate()) {
1182	case CmpInst::ICMP_EQ: Likely = false; break;
1183	case CmpInst::ICMP_NE: Likely = true; break;
1184	// InstCombine canonicalizes X >= 0 into X > -1
1185	case CmpInst::ICMP_SGT: Likely = true; break;
1186	default: return false;
1187	}
1188	// clang-format on
1189	} else {
1190	return false;
1191	}
1192
1193	if (Likely)
1194	BPI.setEdgeProbability(Src: BB, Probs: {ZeroTakenProb, ZeroUntakenProb});
1195	else
1196	BPI.setEdgeProbability(Src: BB, Probs: {ZeroUntakenProb, ZeroTakenProb});
1197	return true;
1198	}
1199
1200	bool BPIConstruction::calcFloatingPointHeuristics(const BasicBlock *BB) {
1201	const CondBrInst *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
1202	if (!BI)
1203	return false;
1204
1205	Value *Cond = BI->getCondition();
1206	FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cond);
1207	if (!FCmp)
1208	return false;
1209
1210	if (FCmp->isEquality()) {
1211	if (!FCmp->isTrueWhenEqual()) // f1 == f2 -> Unlikely
1212	BPI.setEdgeProbability(Src: BB, Probs: {FPTakenProb, FPUntakenProb});
1213	else // f1 != f2 -> Likely
1214	BPI.setEdgeProbability(Src: BB, Probs: {FPUntakenProb, FPTakenProb});
1215	} else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
1216	BPI.setEdgeProbability(
1217	Src: BB, Probs: {FPOrdTakenProb, FPOrdUntakenProb}); // !isnan -> Likely
1218	} else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
1219	BPI.setEdgeProbability(
1220	Src: BB, Probs: {FPOrdUntakenProb, FPOrdTakenProb}); // isnan -> Unlikely
1221	} else {
1222	return false;
1223	}
1224	return true;
1225	}
1226	void BPIConstruction::calculate(const Function &F, const LoopInfo &LoopI,
1227	const TargetLibraryInfo TLI, DominatorTree DT,
1228	PostDominatorTree *PDT) {
1229	LI = &LoopI;
1230
1231	SccI = std::make_unique<SccInfo>(args: F);
1232
1233	std::unique_ptr<DominatorTree> DTPtr;
1234	std::unique_ptr<PostDominatorTree> PDTPtr;
1235
1236	if (!DT) {
1237	DTPtr = std::make_unique<DominatorTree>(args&: const_cast<Function &>(F));
1238	DT = DTPtr.get();
1239	}
1240
1241	if (!PDT) {
1242	PDTPtr = std::make_unique<PostDominatorTree>(args&: const_cast<Function &>(F));
1243	PDT = PDTPtr.get();
1244	}
1245
1246	estimateBlockWeights(F, DT, PDT);
1247
1248	// Walk the basic blocks in post-order so that we can build up state about
1249	// the successors of a block iteratively.
1250	for (const auto *BB : post_order(G: &F.getEntryBlock())) {
1251	LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1252	<< "\n");
1253	// If there is no at least two successors, no sense to set probability.
1254	if (BB->getTerminator()->getNumSuccessors() < `2`)
1255	continue;
1256	if (calcMetadataWeights(BB))
1257	continue;
1258	if (calcEstimatedHeuristics(BB))
1259	continue;
1260	if (calcPointerHeuristics(BB))
1261	continue;
1262	if (calcZeroHeuristics(BB, TLI))
1263	continue;
1264	if (calcFloatingPointHeuristics(BB))
1265	continue;
1266	}
1267	}
1268
1269	} // end anonymous namespace
1270
1271	MutableArrayRef<BranchProbability>
1272	BranchProbabilityInfo::allocEdges(const BasicBlock *BB) {
1273	assert(BB->getParent() == LastF);
1274	assert(BlockNumberEpoch == LastF->getBlockNumberEpoch());
1275	unsigned NumSuccs = succ_size(BB);
1276	if (NumSuccs == `0`) {
1277	eraseBlock(BB);
1278	return {};
1279	}
1280	if (EdgeStarts.size() <= BB->getNumber())
1281	EdgeStarts.resize(N: LastF->getMaxBlockNumber(), NV: `0`);
1282	unsigned EdgeStart = Probs.size();
1283	EdgeStarts [BB->getNumber()] = EdgeStart + `1`; // 0 = no edges.
1284	Probs.append(NumInputs: NumSuccs, Elt: {});
1285	return MutableArrayRef(&Probs [EdgeStart], NumSuccs);
1286	}
1287
1288	ArrayRef<BranchProbability>
1289	BranchProbabilityInfo::getEdges(const BasicBlock BB) const* {
1290	assert(BB->getParent() == LastF);
1291	assert(BlockNumberEpoch == LastF->getBlockNumberEpoch());
1292	if (EdgeStarts.size() <= BB->getNumber())
1293	return {};
1294	if (unsigned EdgeStart = EdgeStarts [BB->getNumber()]) {
1295	const BranchProbability Start = &Probs [EdgeStart - `1`]; // 0 = no edges.*
1296	size_t Count = SIZE_MAX; // Avoid querying num successors in release builds.
1297	#ifndef NDEBUG
1298	Count = succ_size(BB);
1299	#endif
1300	return ArrayRef(Start, Count);
1301	}
1302	return {};
1303	}
1304
1305	bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA,
1306	FunctionAnalysisManager::Invalidator &) {
1307	// Check whether the analysis, all analyses on functions, or the function's
1308	// CFG have been preserved.
1309	auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1310	return !(PAC.preserved() \|\| PAC.preservedSet<AllAnalysesOn<Function>>() \|\|
1311	PAC.preservedSet<CFGAnalyses>());
1312	}
1313
1314	void BranchProbabilityInfo::print(raw_ostream &OS) const {
1315	OS << "---- Branch Probabilities ----\n";
1316	// We print the probabilities from the last function the analysis ran over,
1317	// or the function it is currently running over.
1318	assert(LastF && "Cannot print prior to running over a function");
1319	for (const auto &BI : *LastF) {
1320	for (const BasicBlock *Succ : successors(BB: &BI))
1321	printEdgeProbability(OS&: OS << " ", Src: &BI, Dst: Succ);
1322	}
1323	}
1324
1325	bool BranchProbabilityInfo::
1326	isEdgeHot(const BasicBlock Src, const* BasicBlock Dst) const* {
1327	// Hot probability is at least 4/5 = 80%
1328	// FIXME: Compare against a static "hot" BranchProbability.
1329	return getEdgeProbability(Src, Dst) > BranchProbability (`4`, `5`);
1330	}
1331
1332	/// Get the raw edge probability for the edge. If can't find it, return a
1333	/// default probability 1/N where N is the number of successors. Here an edge is
1334	/// specified using PredBlock and an
1335	/// index to the successors.
1336	BranchProbability
1337	BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
1338	unsigned IndexInSuccessors) const {
1339	if (ArrayRef<BranchProbability> P = getEdges(BB: Src); !P.empty())
1340	return P [IndexInSuccessors];
1341	return {`1`, static_cast<uint32_t>(succ_size(BB: Src))};
1342	}
1343
1344	/// Get the raw edge probability calculated for the block pair. This returns the
1345	/// sum of all raw edge probabilities from Src to Dst.
1346	BranchProbability
1347	BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
1348	const BasicBlock Dst) const* {
1349	ArrayRef<BranchProbability> P = getEdges(BB: Src);
1350	if (P.empty())
1351	return BranchProbability (llvm::count(Range: successors(BB: Src), Element: Dst), succ_size(BB: Src));
1352
1353	auto Prob = BranchProbability::getZero();
1354	for (auto It : enumerate(First: successors(BB: Src)))
1355	if (It.value() == Dst)
1356	Prob += P [It.index()];
1357
1358	return Prob;
1359	}
1360
1361	/// Set the edge probability for all edges at once.
1362	void BranchProbabilityInfo::setEdgeProbability(
1363	const BasicBlock *Src, ArrayRef<BranchProbability> Probs) {
1364	assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1365	MutableArrayRef<BranchProbability> P = allocEdges(BB: Src);
1366	uint64_t TotalNumerator = `0`;
1367	for (unsigned SuccIdx = `0`; SuccIdx < Probs.size(); ++SuccIdx) {
1368	P [SuccIdx] = Probs [SuccIdx];
1369	LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1370	<< " successor probability to " << Probs[SuccIdx]
1371	<< "\n");
1372	TotalNumerator += Probs [SuccIdx].getNumerator();
1373	}
1374
1375	// Because of rounding errors the total probability cannot be checked to be
1376	// 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1377	// Instead, every single probability in Probs must be as accurate as possible.
1378	// This results in error 1/denominator at most, thus the total absolute error
1379	// should be within Probs.size / BranchProbability::getDenominator.
1380	if (P.empty())
1381	return; // If we store no probabilities, TotalNumerator is zero.
1382	assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1383	assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1384	(void)TotalNumerator;
1385	}
1386
1387	void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src,
1388	BasicBlock *Dst) {
1389	assert(succ_size(Src) == succ_size(Dst));
1390	// allocEdges can reallocate and must be called first.
1391	MutableArrayRef<BranchProbability> DstP = allocEdges(BB: Dst);
1392	ArrayRef<BranchProbability> SrcP = getEdges(BB: Src);
1393	if (SrcP.empty()) {
1394	// Nothing to copy from, erase again.
1395	eraseBlock(BB: Dst);
1396	return;
1397	}
1398	for (unsigned i = `0`; i != DstP.size(); ++i) {
1399	DstP [i] = SrcP [i];
1400	LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << i
1401	<< " successor probability to " << SrcP[i] << "\n");
1402	}
1403	}
1404
1405	void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) {
1406	assert(Src->getTerminator()->getNumSuccessors() == `2`);
1407	ArrayRef<BranchProbability> P = getEdges(BB: Src);
1408	if (P.empty())
1409	return;
1410	MutableArrayRef<BranchProbability> MP(
1411	const_cast<BranchProbability *>(P.data()), P.size());
1412	std::swap(a&: MP [`0`], b&: MP [`1`]);
1413	}
1414
1415	raw_ostream &
1416	BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
1417	const BasicBlock *Src,
1418	const BasicBlock Dst) const* {
1419	const BranchProbability Prob = getEdgeProbability(Src, Dst);
1420	OS << "edge ";
1421	Src->printAsOperand(O&: OS, PrintType: false, M: Src->getModule());
1422	OS << " -> ";
1423	Dst->printAsOperand(O&: OS, PrintType: false, M: Dst->getModule());
1424	OS << " probability is " << Prob
1425	<< (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1426
1427	return OS;
1428	}
1429
1430	void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
1431	LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1432	assert(BB->getParent() == LastF);
1433	assert(BlockNumberEpoch == LastF->getBlockNumberEpoch());
1434	if (EdgeStarts.size() > BB->getNumber())
1435	EdgeStarts [BB->getNumber()] = `0`;
1436	}
1437
1438	void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI,
1439	const TargetLibraryInfo *TLI,
1440	DominatorTree *DT,
1441	PostDominatorTree *PDT) {
1442	LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1443	<< " ----\n\n");
1444	LastF = &F; // Store the last function we ran on for printing.
1445	BlockNumberEpoch = F.getBlockNumberEpoch();
1446	Probs.clear();
1447	EdgeStarts.clear();
1448	BPIConstruction (*this).calculate(F, LoopI, TLI, DT, PDT);
1449
1450	if (PrintBranchProb && (PrintBranchProbFuncName.empty() \|\|
1451	F.getName() == PrintBranchProbFuncName)) {
1452	print(OS&: dbgs());
1453	}
1454	}
1455
1456	void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
1457	AnalysisUsage &AU) const {
1458	// We require DT so it's available when LI is available. The LI updating code
1459	// asserts that DT is also present so if we don't make sure that we have DT
1460	// here, that assert will trigger.
1461	AU.addRequired<DominatorTreeWrapperPass>();
1462	AU.addRequired<LoopInfoWrapperPass>();
1463	AU.addRequired<TargetLibraryInfoWrapperPass>();
1464	AU.addRequired<DominatorTreeWrapperPass>();
1465	AU.addRequired<PostDominatorTreeWrapperPass>();
1466	AU.setPreservesAll();
1467	}
1468
1469	bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
1470	const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1471	const TargetLibraryInfo &TLI =
1472	getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1473	DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1474	PostDominatorTree &PDT =
1475	getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1476	BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT);
1477	return false;
1478	}
1479
1480	void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
1481	const Module ) const* {
1482	BPI.print(OS);
1483	}
1484
1485	AnalysisKey BranchProbabilityAnalysis::Key;
1486	BranchProbabilityInfo
1487	BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
1488	auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
1489	auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
1490	auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
1491	auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
1492	BranchProbabilityInfo BPI;
1493	BPI.calculate(F, LoopI: LI, TLI: &TLI, DT: &DT, PDT: &PDT);
1494	return BPI;
1495	}
1496
1497	PreservedAnalyses
1498	BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
1499	OS << "Printing analysis 'Branch Probability Analysis' for function '"
1500	<< F.getName() << "':\n";
1501	AM.getResult<BranchProbabilityAnalysis>(IR&: F).print(OS);
1502	return PreservedAnalyses::all();
1503	}
1504

Browse the source code of llvm_projects/llvm/lib/Analysis/BranchProbabilityInfo.cpp