BlockFrequencyInfoImpl.cpp source code [llvm_projects/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp]

1	//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loops should be simplified before this analysis.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
14	#include "llvm/ADT/APInt.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/SCCIterator.h"
17	#include "llvm/ADT/SmallString.h"
18	#include "llvm/Config/llvm-config.h"
19	#include "llvm/IR/Function.h"
20	#include "llvm/Support/BlockFrequency.h"
21	#include "llvm/Support/BranchProbability.h"
22	#include "llvm/Support/Compiler.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/MathExtras.h"
25	#include "llvm/Support/ScaledNumber.h"
26	#include "llvm/Support/raw_ostream.h"
27	#include <algorithm>
28	#include <cassert>
29	#include <cstddef>
30	#include <cstdint>
31	#include <iterator>
32	#include <list>
33	#include <numeric>
34	#include <optional>
35	#include <utility>
36	#include <vector>
37
38	using namespace llvm;
39	using namespace llvm::bfi_detail;
40
41	#define DEBUG_TYPE "block-freq"
42
43	namespace llvm {
44	cl::opt<bool> CheckBFIUnknownBlockQueries(
45	"check-bfi-unknown-block-queries",
46	cl::init(Val: false), cl::Hidden,
47	cl::desc ("Check if block frequency is queried for an unknown block "
48	"for debugging missed BFI updates"));
49
50	cl::opt<bool> UseIterativeBFIInference(
51	"use-iterative-bfi-inference", cl::Hidden,
52	cl::desc ("Apply an iterative post-processing to infer correct BFI counts"));
53
54	cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock(
55	"iterative-bfi-max-iterations-per-block", cl::init(Val: `1000`), cl::Hidden,
56	cl::desc ("Iterative inference: maximum number of update iterations "
57	"per block"));
58
59	cl::opt<double> IterativeBFIPrecision(
60	"iterative-bfi-precision", cl::init(Val: `1e-12`), cl::Hidden,
61	cl::desc ("Iterative inference: delta convergence precision; smaller values "
62	"typically lead to better results at the cost of worsen runtime"));
63	} // namespace llvm
64
65	ScaledNumber<uint64_t> BlockMass::toScaled() const {
66	if (isFull())
67	return ScaledNumber<uint64_t>(`1`, `0`);
68	return ScaledNumber<uint64_t>(getMass() + `1`, -`64`);
69	}
70
71	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
72	LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
73	#endif
74
75	static char getHexDigit(int N) {
76	assert(N < `16`);
77	if (N < `10`)
78	return `'0'` + N;
79	return `'a'` + N - `10`;
80	}
81
82	raw_ostream &BlockMass::print(raw_ostream &OS) const {
83	for (int Digits = `0`; Digits < `16`; ++Digits)
84	OS << getHexDigit(N: Mass >> (`60` - Digits * `4`) & `0xf`);
85	return OS;
86	}
87
88	namespace {
89
90	using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
91	using Distribution = BlockFrequencyInfoImplBase::Distribution;
92	using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList;
93	using Scaled64 = BlockFrequencyInfoImplBase::Scaled64;
94	using LoopData = BlockFrequencyInfoImplBase::LoopData;
95	using Weight = BlockFrequencyInfoImplBase::Weight;
96	using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
97
98	/// Dithering mass distributer.
99	///
100	/// This class splits up a single mass into portions by weight, dithering to
101	/// spread out error. No mass is lost. The dithering precision depends on the
102	/// precision of the product of \a BlockMass and \a BranchProbability.
103	///
104	/// The distribution algorithm follows.
105	///
106	/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
107	/// mass to distribute in \a RemMass.
108	///
109	/// 2. For each portion:
110	///
111	/// 1. Construct a branch probability, P, as the portion's weight divided
112	/// by the current value of \a RemWeight.
113	/// 2. Calculate the portion's mass as \a RemMass times P.
114	/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
115	/// the current portion's weight and mass.
116	struct DitheringDistributer {
117	uint32_t RemWeight;
118	BlockMass RemMass;
119
120	DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
121
122	BlockMass takeMass(uint32_t Weight);
123	};
124
125	} // end anonymous namespace
126
127	DitheringDistributer::DitheringDistributer(Distribution &Dist,
128	const BlockMass &Mass) {
129	Dist.normalize();
130	RemWeight = Dist.Total;
131	RemMass = Mass;
132	}
133
134	BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
135	assert(Weight && "invalid weight");
136	assert(Weight <= RemWeight);
137	BlockMass Mass = RemMass * BranchProbability (Weight, RemWeight);
138
139	// Decrement totals (dither).
140	RemWeight -= Weight;
141	RemMass -= Mass;
142	return Mass;
143	}
144
145	void Distribution::add(const BlockNode &Node, uint64_t Amount,
146	Weight::DistType Type) {
147	assert(Amount && "invalid weight of 0");
148	uint64_t NewTotal = Total + Amount;
149
150	// Check for overflow. It should be impossible to overflow twice.
151	bool IsOverflow = NewTotal < Total;
152	assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
153	DidOverflow \|= IsOverflow;
154
155	// Update the total.
156	Total = NewTotal;
157
158	// Save the weight.
159	Weights.push_back(Elt: Weight (Type, Node, Amount));
160	}
161
162	static void combineWeight(Weight &W, const Weight &OtherW) {
163	assert(OtherW.TargetNode.isValid());
164	if (!W.Amount) {
165	W = OtherW;
166	return;
167	}
168	assert(W.Type == OtherW.Type);
169	assert(W.TargetNode == OtherW.TargetNode);
170	assert(OtherW.Amount && "Expected non-zero weight");
171	if (W.Amount > W.Amount + OtherW.Amount)
172	// Saturate on overflow.
173	W.Amount = UINT64_MAX;
174	else
175	W.Amount += OtherW.Amount;
176	}
177
178	static void combineWeightsBySorting(WeightList &Weights) {
179	// Sort so edges to the same node are adjacent.
180	llvm::sort(C&: Weights, Comp: [](const Weight &L, const Weight &R) {
181	return L.TargetNode < R.TargetNode;
182	});
183
184	// Combine adjacent edges.
185	WeightList::iterator O = Weights.begin();
186	for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
187	++O, (I = L)) {
188	O = I;
189
190	// Find the adjacent weights to the same node.
191	for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
192	combineWeight(W&: O, OtherW: L);
193	}
194
195	// Erase extra entries.
196	Weights.erase(CS: O, CE: Weights.end());
197	}
198
199	static void combineWeightsByHashing(WeightList &Weights) {
200	// Collect weights into a DenseMap.
201	using HashTable = DenseMap<BlockNode::IndexType, Weight>;
202
203	HashTable Combined(NextPowerOf2(A: `2` * Weights.size()));
204	for (const Weight &W : Weights)
205	combineWeight(W&: Combined [W.TargetNode.Index], OtherW: W);
206
207	// Check whether anything changed.
208	if (Weights.size() == Combined.size())
209	return;
210
211	// Fill in the new weights.
212	Weights.clear();
213	Weights.reserve(N: Combined.size());
214	for (const auto &I : Combined)
215	Weights.push_back(Elt: I.second);
216	}
217
218	static void combineWeights(WeightList &Weights) {
219	// Use a hash table for many successors to keep this linear.
220	if (Weights.size() > `128`) {
221	combineWeightsByHashing(Weights);
222	return;
223	}
224
225	combineWeightsBySorting(Weights);
226	}
227
228	static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
229	assert(Shift >= `0`);
230	assert(Shift < `64`);
231	if (!Shift)
232	return N;
233	return (N >> Shift) + (UINT64_C(`1`) & N >> (Shift - `1`));
234	}
235
236	void Distribution::normalize() {
237	// Early exit for termination nodes.
238	if (Weights.empty())
239	return;
240
241	// Only bother if there are multiple successors.
242	if (Weights.size() > `1`)
243	combineWeights(Weights);
244
245	// Early exit when combined into a single successor.
246	if (Weights.size() == `1`) {
247	Total = `1`;
248	Weights.front().Amount = `1`;
249	return;
250	}
251
252	// Determine how much to shift right so that the total fits into 32-bits.
253	//
254	// If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
255	// for each weight can cause a 32-bit overflow.
256	int Shift = `0`;
257	if (DidOverflow)
258	Shift = `33`;
259	else if (Total > UINT32_MAX)
260	Shift = `33` - llvm::countl_zero(Val: Total);
261
262	// Early exit if nothing needs to be scaled.
263	if (!Shift) {
264	// If we didn't overflow then combineWeights() shouldn't have changed the
265	// sum of the weights, but let's double-check.
266	assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(`0`),
267	[](uint64_t Sum, const Weight &W) {
268	return Sum + W.Amount;
269	}) &&
270	"Expected total to be correct");
271	return;
272	}
273
274	// Recompute the total through accumulation (rather than shifting it) so that
275	// it's accurate after shifting and any changes combineWeights() made above.
276	Total = `0`;
277
278	// Sum the weights to each node and shift right if necessary.
279	for (Weight &W : Weights) {
280	// Scale down below UINT32_MAX. Since Shift is larger than necessary, we
281	// can round here without concern about overflow.
282	assert(W.TargetNode.isValid());
283	W.Amount = std::max(UINT64_C(`1`), b: shiftRightAndRound(N: W.Amount, Shift));
284	assert(W.Amount <= UINT32_MAX);
285
286	// Update the total.
287	Total += W.Amount;
288	}
289	assert(Total <= UINT32_MAX);
290	}
291
292	void BlockFrequencyInfoImplBase::clear() {
293	// Swap with a default-constructed std::vector, since std::vector<>::clear()
294	// does not actually clear heap storage.
295	std::vector<FrequencyData>().swap(x&: Freqs);
296	IsIrrLoopHeader.clear();
297	std::vector<WorkingData>().swap(x&: Working);
298	Loops.clear();
299	}
300
301	/// Clear all memory not needed downstream.
302	///
303	/// Releases all memory not used downstream. In particular, saves Freqs.
304	static void cleanup(BlockFrequencyInfoImplBase &BFI) {
305	std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
306	SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
307	BFI.clear();
308	BFI.Freqs = std::move(SavedFreqs);
309	BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
310	}
311
312	bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
313	const LoopData *OuterLoop,
314	const BlockNode &Pred,
315	const BlockNode &Succ,
316	uint64_t Weight) {
317	if (!Weight)
318	Weight = `1`;
319
320	auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
321	return OuterLoop && OuterLoop->isHeader(Node);
322	};
323
324	BlockNode Resolved = Working [Succ.Index].getResolvedNode();
325
326	#ifndef NDEBUG
327	auto debugSuccessor = [&](const char *Type) {
328	dbgs() << " =>"
329	<< " [" << Type << "] weight = " << Weight;
330	if (!isLoopHeader(Resolved))
331	dbgs() << ", succ = " << getBlockName(Succ);
332	if (Resolved != Succ)
333	dbgs() << ", resolved = " << getBlockName(Resolved);
334	dbgs() << "\n";
335	};
336	(void)debugSuccessor;
337	#endif
338
339	if (isLoopHeader (Resolved)) {
340	LLVM_DEBUG(debugSuccessor("backedge"));
341	Dist.addBackedge(Node: Resolved, Amount: Weight);
342	return true;
343	}
344
345	if (Working [Resolved.Index].getContainingLoop() != OuterLoop) {
346	LLVM_DEBUG(debugSuccessor(" exit "));
347	Dist.addExit(Node: Resolved, Amount: Weight);
348	return true;
349	}
350
351	if (Resolved < Pred) {
352	if (!isLoopHeader (Pred)) {
353	// If OuterLoop is an irreducible loop, we can't actually handle this.
354	assert((!OuterLoop \|\| !OuterLoop->isIrreducible()) &&
355	"unhandled irreducible control flow");
356
357	// Irreducible backedge. Abort.
358	LLVM_DEBUG(debugSuccessor("abort!!!"));
359	return false;
360	}
361
362	// If "Pred" is a loop header, then this isn't really a backedge; rather,
363	// OuterLoop must be irreducible. These false backedges can come only from
364	// secondary loop headers.
365	assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
366	"unhandled irreducible control flow");
367	}
368
369	LLVM_DEBUG(debugSuccessor(" local "));
370	Dist.addLocal(Node: Resolved, Amount: Weight);
371	return true;
372	}
373
374	bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
375	const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
376	// Copy the exit map into Dist.
377	for (const auto &I : Loop.Exits)
378	if (!addToDist(Dist, OuterLoop, Pred: Loop.getHeader(), Succ: I.first,
379	Weight: I.second.getMass()))
380	// Irreducible backedge.
381	return false;
382
383	return true;
384	}
385
386	/// Compute the loop scale for a loop.
387	void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
388	// Compute loop scale.
389	LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
390
391	// Infinite loops need special handling. If we give the back edge an infinite
392	// mass, they may saturate all the other scales in the function down to 1,
393	// making all the other region temperatures look exactly the same. Choose an
394	// arbitrary scale to avoid these issues.
395	//
396	// FIXME: An alternate way would be to select a symbolic scale which is later
397	// replaced to be the maximum of all computed scales plus 1. This would
398	// appropriately describe the loop as having a large scale, without skewing
399	// the final frequency computation.
400	const Scaled64 InfiniteLoopScale(`1`, `12`);
401
402	// LoopScale == 1 / ExitMass
403	// ExitMass == HeadMass - BackedgeMass
404	BlockMass TotalBackedgeMass;
405	for (auto &Mass : Loop.BackedgeMass)
406	TotalBackedgeMass += Mass;
407	BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
408
409	// Block scale stores the inverse of the scale. If this is an infinite loop,
410	// its exit mass will be zero. In this case, use an arbitrary scale for the
411	// loop scale.
412	Loop.Scale =
413	ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
414
415	LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
416	<< BlockMass::getFull() << " - " << TotalBackedgeMass
417	<< ")\n"
418	<< " - scale = " << Loop.Scale << "\n");
419	}
420
421	/// Package up a loop.
422	void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
423	LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
424
425	// Clear the subloop exits to prevent quadratic memory usage.
426	for (const BlockNode &M : Loop.Nodes) {
427	if (auto *Loop = Working [M.Index].getPackagedLoop())
428	Loop->Exits.clear();
429	LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
430	}
431	Loop.IsPackaged = true;
432	}
433
434	#ifndef NDEBUG
435	static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
436	const DitheringDistributer &D, const BlockNode &T,
437	const BlockMass &M, const char *Desc) {
438	dbgs() << " => assign " << M << " (" << D.RemMass << ")";
439	if (Desc)
440	dbgs() << " [" << Desc << "]";
441	if (T.isValid())
442	dbgs() << " to " << BFI.getBlockName(T);
443	dbgs() << "\n";
444	}
445	#endif
446
447	void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
448	LoopData *OuterLoop,
449	Distribution &Dist) {
450	BlockMass Mass = Working [Source.Index].getMass();
451	LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
452
453	// Distribute mass to successors as laid out in Dist.
454	DitheringDistributer D(Dist, Mass);
455
456	for (const Weight &W : Dist.Weights) {
457	// Check for a local edge (non-backedge and non-exit).
458	BlockMass Taken = D.takeMass(Weight: W.Amount);
459	if (W.Type == Weight::Local) {
460	Working [W.TargetNode.Index].getMass() += Taken;
461	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
462	continue;
463	}
464
465	// Backedges and exits only make sense if we're processing a loop.
466	assert(OuterLoop && "backedge or exit outside of loop");
467
468	// Check for a backedge.
469	if (W.Type == Weight::Backedge) {
470	OuterLoop->BackedgeMass [OuterLoop->getHeaderIndex(B: W.TargetNode)] += Taken;
471	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
472	continue;
473	}
474
475	// This must be an exit.
476	assert(W.Type == Weight::Exit);
477	OuterLoop->Exits.push_back(Elt: std::make_pair(x: W.TargetNode, y&: Taken));
478	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
479	}
480	}
481
482	static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
483	const Scaled64 &Min, const Scaled64 &Max) {
484	// Scale the Factor to a size that creates integers. If possible scale
485	// integers so that Max == UINT64_MAX so that they can be best differentiated.
486	// Is is possible that the range between min and max cannot be accurately
487	// represented in a 64bit integer without either loosing precision for small
488	// values (so small unequal numbers all map to 1) or saturaturing big numbers
489	// loosing precision for big numbers (so unequal big numbers may map to
490	// UINT64_MAX). We choose to loose precision for small numbers.
491	const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT;
492	// Users often add up multiple BlockFrequency values or multiply them with
493	// things like instruction costs. Leave some room to avoid saturating
494	// operations reaching UIN64_MAX too early.
495	const unsigned Slack = `10`;
496	Scaled64 ScalingFactor = Scaled64 (`1`, MaxBits - Slack) / Max;
497
498	// Translate the floats to integers.
499	LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
500	<< ", factor = " << ScalingFactor << "\n");
501	(void)Min;
502	for (size_t Index = `0`; Index < BFI.Freqs.size(); ++Index) {
503	Scaled64 Scaled = BFI.Freqs [Index].Scaled * ScalingFactor;
504	BFI.Freqs [Index].Integer = std::max(UINT64_C(`1`), b: Scaled.toInt<uint64_t>());
505	LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
506	<< BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
507	<< ", int = " << BFI.Freqs[Index].Integer << "\n");
508	}
509	}
510
511	/// Unwrap a loop package.
512	///
513	/// Visits all the members of a loop, adjusting their BlockData according to
514	/// the loop's pseudo-node.
515	static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
516	LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
517	<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
518	<< "\n");
519	Loop.Scale *= Loop.Mass.toScaled();
520	Loop.IsPackaged = false;
521	LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
522
523	// Propagate the head scale through the loop. Since members are visited in
524	// RPO, the head scale will be updated by the loop scale first, and then the
525	// final head scale will be used for updated the rest of the members.
526	for (const BlockNode &N : Loop.Nodes) {
527	const auto &Working = BFI.Working [N.Index];
528	Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
529	: BFI.Freqs [N.Index].Scaled;
530	Scaled64 New = Loop.Scale * F;
531	LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
532	<< New << "\n");
533	F = New;
534	}
535	}
536
537	void BlockFrequencyInfoImplBase::unwrapLoops() {
538	// Set initial frequencies from loop-local masses.
539	for (size_t Index = `0`; Index < Working.size(); ++Index)
540	Freqs [Index].Scaled = Working [Index].Mass.toScaled();
541
542	for (LoopData &Loop : Loops)
543	unwrapLoop(BFI&: *this, Loop);
544	}
545
546	void BlockFrequencyInfoImplBase::finalizeMetrics() {
547	// Unwrap loop packages in reverse post-order, tracking min and max
548	// frequencies.
549	auto Min = Scaled64::getLargest();
550	auto Max = Scaled64::getZero();
551	for (size_t Index = `0`; Index < Working.size(); ++Index) {
552	// Update min/max scale.
553	Min = std::min(a: Min, b: Freqs [Index].Scaled);
554	Max = std::max(a: Max, b: Freqs [Index].Scaled);
555	}
556
557	// Convert to integers.
558	convertFloatingToInteger(BFI&: *this, Min, Max);
559
560	// Clean up data structures.
561	cleanup(BFI&: *this);
562
563	// Print out the final stats.
564	LLVM_DEBUG(dump());
565	}
566
567	BlockFrequency
568	BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
569	if (!Node.isValid()) {
570	#ifndef NDEBUG
571	if (CheckBFIUnknownBlockQueries) {
572	SmallString<`256`> Msg;
573	raw_svector_ostream OS(Msg);
574	OS << "*** Detected BFI query for unknown block " << getBlockName(Node);
575	report_fatal_error(OS.str());
576	}
577	#endif
578	return BlockFrequency (`0`);
579	}
580	return BlockFrequency (Freqs [Node.Index].Integer);
581	}
582
583	std::optional<uint64_t>
584	BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
585	const BlockNode &Node,
586	bool AllowSynthetic) const {
587	return getProfileCountFromFreq(F, Freq: getBlockFreq(Node), AllowSynthetic);
588	}
589
590	std::optional<uint64_t> BlockFrequencyInfoImplBase::getProfileCountFromFreq(
591	const Function &F, BlockFrequency Freq, bool AllowSynthetic) const {
592	auto EntryCount = F.getEntryCount(AllowSynthetic);
593	if (!EntryCount)
594	return std::nullopt;
595	// Use 128 bit APInt to do the arithmetic to avoid overflow.
596	APInt BlockCount(`128`, EntryCount ->getCount());
597	APInt BlockFreq(`128`, Freq.getFrequency());
598	APInt EntryFreq(`128`, getEntryFreq().getFrequency());
599	BlockCount *= BlockFreq;
600	// Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
601	// lshr by 1 gives EntryFreq/2.
602	BlockCount = (BlockCount + EntryFreq.lshr(shiftAmt: `1`)).udiv(RHS: EntryFreq);
603	return BlockCount.getLimitedValue();
604	}
605
606	bool
607	BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) {
608	if (!Node.isValid())
609	return false;
610	return IsIrrLoopHeader.test(Idx: Node.Index);
611	}
612
613	Scaled64
614	BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
615	if (!Node.isValid())
616	return Scaled64::getZero();
617	return Freqs [Node.Index].Scaled;
618	}
619
620	void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
621	BlockFrequency Freq) {
622	assert(Node.isValid() && "Expected valid node");
623	assert(Node.Index < Freqs.size() && "Expected legal index");
624	Freqs [Node.Index].Integer = Freq.getFrequency();
625	}
626
627	std::string
628	BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
629	return {};
630	}
631
632	std::string
633	BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
634	return getBlockName(Node: Loop.getHeader()) + (Loop.isIrreducible() ? "*" : "");
635	}
636
637	void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
638	Start = OuterLoop.getHeader();
639	Nodes.reserve(n: OuterLoop.Nodes.size());
640	for (auto N : OuterLoop.Nodes)
641	addNode(Node: N);
642	indexNodes();
643	}
644
645	void IrreducibleGraph::addNodesInFunction() {
646	Start = `0`;
647	for (uint32_t Index = `0`; Index < BFI.Working.size(); ++Index)
648	if (!BFI.Working [Index].isPackaged())
649	addNode(Node: Index);
650	indexNodes();
651	}
652
653	void IrreducibleGraph::indexNodes() {
654	for (auto &I : Nodes)
655	Lookup [I.Node.Index] = &I;
656	}
657
658	void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
659	const BFIBase::LoopData *OuterLoop) {
660	if (OuterLoop && OuterLoop->isHeader(Node: Succ))
661	return;
662	auto L = Lookup.find(Val: Succ.Index);
663	if (L == Lookup.end())
664	return;
665	IrrNode &SuccIrr = *L ->second;
666	Irr.Edges.push_back(x: &SuccIrr);
667	SuccIrr.Edges.push_front(x: &Irr);
668	++SuccIrr.NumIn;
669	}
670
671	namespace llvm {
672
673	template <> struct GraphTraits<IrreducibleGraph> {
674	using GraphT = bfi_detail::IrreducibleGraph;
675	using NodeRef = const GraphT::IrrNode *;
676	using ChildIteratorType = GraphT::IrrNode::iterator;
677
678	static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
679	static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
680	static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
681	};
682
683	} // end namespace llvm
684
685	/// Find extra irreducible headers.
686	///
687	/// Find entry blocks and other blocks with backedges, which exist when \c G
688	/// contains irreducible sub-SCCs.
689	static void findIrreducibleHeaders(
690	const BlockFrequencyInfoImplBase &BFI,
691	const IrreducibleGraph &G,
692	const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
693	LoopData::NodeList &Headers, LoopData::NodeList &Others) {
694	// Map from nodes in the SCC to whether it's an entry block.
695	SmallDenseMap<const IrreducibleGraph::IrrNode , bool*, `8`> InSCC;
696
697	// InSCC also acts the set of nodes in the graph. Seed it.
698	for (const auto *I : SCC)
699	InSCC [I] = false;
700
701	for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
702	auto &Irr = *I ->first;
703	for (const auto *P : make_range(x: Irr.pred_begin(), y: Irr.pred_end())) {
704	if (InSCC.count(Val: P))
705	continue;
706
707	// This is an entry block.
708	I ->second = true;
709	Headers.push_back(Elt: Irr.Node);
710	LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
711	<< "\n");
712	break;
713	}
714	}
715	assert(Headers.size() >= `2` &&
716	"Expected irreducible CFG; -loop-info is likely invalid");
717	if (Headers.size() == InSCC.size()) {
718	// Every block is a header.
719	llvm::sort(C&: Headers);
720	return;
721	}
722
723	// Look for extra headers from irreducible sub-SCCs.
724	for (const auto &I : InSCC) {
725	// Entry blocks are already headers.
726	if (I.second)
727	continue;
728
729	auto &Irr = *I.first;
730	for (const auto *P : make_range(x: Irr.pred_begin(), y: Irr.pred_end())) {
731	// Skip forward edges.
732	if (P->Node < Irr.Node)
733	continue;
734
735	// Skip predecessors from entry blocks. These can have inverted
736	// ordering.
737	if (InSCC.lookup(Val: P))
738	continue;
739
740	// Store the extra header.
741	Headers.push_back(Elt: Irr.Node);
742	LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
743	<< "\n");
744	break;
745	}
746	if (Headers.back() == Irr.Node)
747	// Added this as a header.
748	continue;
749
750	// This is not a header.
751	Others.push_back(Elt: Irr.Node);
752	LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
753	}
754	llvm::sort(C&: Headers);
755	llvm::sort(C&: Others);
756	}
757
758	static void createIrreducibleLoop(
759	BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
760	LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
761	const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
762	// Translate the SCC into RPO.
763	LLVM_DEBUG(dbgs() << " - found-scc\n");
764
765	LoopData::NodeList Headers;
766	LoopData::NodeList Others;
767	findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
768
769	auto Loop = BFI.Loops.emplace(position: Insert, args&: OuterLoop, args: Headers.begin(),
770	args: Headers.end(), args: Others.begin(), args: Others.end());
771
772	// Update loop hierarchy.
773	for (const auto &N : Loop ->Nodes)
774	if (BFI.Working [N.Index].isLoopHeader())
775	BFI.Working [N.Index].Loop->Parent = &*Loop;
776	else
777	BFI.Working [N.Index].Loop = &*Loop;
778	}
779
780	iterator_range<std::list<LoopData>::iterator>
781	BlockFrequencyInfoImplBase::analyzeIrreducible(
782	const IrreducibleGraph &G, LoopData *OuterLoop,
783	std::list<LoopData>::iterator Insert) {
784	assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
785	auto Prev = OuterLoop ? std::prev(x: Insert) : Loops.end();
786
787	for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
788	if (I ->size() < `2`)
789	continue;
790
791	// Translate the SCC into RPO.
792	createIrreducibleLoop(BFI&: *this, G, OuterLoop, Insert, SCC: *I);
793	}
794
795	if (OuterLoop)
796	return make_range(x: std::next(x: Prev), y: Insert);
797	return make_range(x: Loops.begin(), y: Insert);
798	}
799
800	void
801	BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
802	OuterLoop.Exits.clear();
803	for (auto &Mass : OuterLoop.BackedgeMass)
804	Mass = BlockMass::getEmpty();
805	auto O = OuterLoop.Nodes.begin() + `1`;
806	for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
807	if (!Working [I->Index].isPackaged())
808	O++ = I;
809	OuterLoop.Nodes.erase(CS: O, CE: OuterLoop.Nodes.end());
810	}
811
812	void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
813	assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
814
815	// Since the loop has more than one header block, the mass flowing back into
816	// each header will be different. Adjust the mass in each header loop to
817	// reflect the masses flowing through back edges.
818	//
819	// To do this, we distribute the initial mass using the backedge masses
820	// as weights for the distribution.
821	BlockMass LoopMass = BlockMass::getFull();
822	Distribution Dist;
823
824	LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
825	for (uint32_t H = `0`; H < Loop.NumHeaders; ++H) {
826	auto &HeaderNode = Loop.Nodes [H];
827	auto &BackedgeMass = Loop.BackedgeMass [Loop.getHeaderIndex(B: HeaderNode)];
828	LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
829	<< getBlockName(HeaderNode) << ": " << BackedgeMass
830	<< "\n");
831	if (BackedgeMass.getMass() > `0`)
832	Dist.addLocal(Node: HeaderNode, Amount: BackedgeMass.getMass());
833	else
834	LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
835	}
836
837	DitheringDistributer D(Dist, LoopMass);
838
839	LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
840	<< " to headers using above weights\n");
841	for (const Weight &W : Dist.Weights) {
842	BlockMass Taken = D.takeMass(Weight: W.Amount);
843	assert(W.Type == Weight::Local && "all weights should be local");
844	Working [W.TargetNode.Index].getMass() = Taken;
845	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
846	}
847	}
848
849	void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) {
850	BlockMass LoopMass = BlockMass::getFull();
851	DitheringDistributer D(Dist, LoopMass);
852	for (const Weight &W : Dist.Weights) {
853	BlockMass Taken = D.takeMass(Weight: W.Amount);
854	assert(W.Type == Weight::Local && "all weights should be local");
855	Working [W.TargetNode.Index].getMass() = Taken;
856	LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
857	}
858	}
859

Browse the source code of llvm_projects/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp