SampleProfileInference.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/SampleProfileInference.cpp]

1	//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a profile inference algorithm. Given an incomplete and
10	// possibly imprecise block counts, the algorithm reconstructs realistic block
11	// and edge counts that satisfy flow conservation rules, while minimally modify
12	// input block counts.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "llvm/Transforms/Utils/SampleProfileInference.h"
17	#include "llvm/ADT/BitVector.h"
18	#include "llvm/Support/CommandLine.h"
19	#include "llvm/Support/Debug.h"
20	#include <queue>
21	#include <set>
22	#include <stack>
23	#include <unordered_set>
24
25	using namespace llvm;
26	#define DEBUG_TYPE "sample-profile-inference"
27
28	namespace {
29
30	static cl::opt<bool> SampleProfileEvenFlowDistribution(
31	"sample-profile-even-flow-distribution", cl::init(Val: true), cl::Hidden,
32	cl::desc ("Try to evenly distribute flow when there are multiple equally "
33	"likely options."));
34
35	static cl::opt<bool> SampleProfileRebalanceUnknown(
36	"sample-profile-rebalance-unknown", cl::init(Val: true), cl::Hidden,
37	cl::desc ("Evenly re-distribute flow among unknown subgraphs."));
38
39	static cl::opt<bool> SampleProfileJoinIslands(
40	"sample-profile-join-islands", cl::init(Val: true), cl::Hidden,
41	cl::desc ("Join isolated components having positive flow."));
42
43	static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
44	"sample-profile-profi-cost-block-inc", cl::init(Val: `10`), cl::Hidden,
45	cl::desc ("The cost of increasing a block's count by one."));
46
47	static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
48	"sample-profile-profi-cost-block-dec", cl::init(Val: `20`), cl::Hidden,
49	cl::desc ("The cost of decreasing a block's count by one."));
50
51	static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
52	"sample-profile-profi-cost-block-entry-inc", cl::init(Val: `40`), cl::Hidden,
53	cl::desc ("The cost of increasing the entry block's count by one."));
54
55	static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
56	"sample-profile-profi-cost-block-entry-dec", cl::init(Val: `10`), cl::Hidden,
57	cl::desc ("The cost of decreasing the entry block's count by one."));
58
59	static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
60	"sample-profile-profi-cost-block-zero-inc", cl::init(Val: `11`), cl::Hidden,
61	cl::desc ("The cost of increasing a count of zero-weight block by one."));
62
63	static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
64	"sample-profile-profi-cost-block-unknown-inc", cl::init(Val: `0`), cl::Hidden,
65	cl::desc ("The cost of increasing an unknown block's count by one."));
66
67	/// A value indicating an infinite flow/capacity/weight of a block/edge.
68	/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
69	/// during the execution.
70	static constexpr int64_t INF = ((int64_t)`1`) << `50`;
71
72	/// The minimum-cost maximum flow algorithm.
73	///
74	/// The algorithm finds the maximum flow of minimum cost on a given (directed)
75	/// network using a modified version of the classical Moore-Bellman-Ford
76	/// approach. The algorithm applies a number of augmentation iterations in which
77	/// flow is sent along paths of positive capacity from the source to the sink.
78	/// The worst-case time complexity of the implementation is O(v(f)mn), where
79	/// where m is the number of edges, n is the number of vertices, and v(f) is the
80	/// value of the maximum flow. However, the observed running time on typical
81	/// instances is sub-quadratic, that is, o(n^2).
82	///
83	/// The input is a set of edges with specified costs and capacities, and a pair
84	/// of nodes (source and sink). The output is the flow along each edge of the
85	/// minimum total cost respecting the given edge capacities.
86	class MinCostMaxFlow {
87	public:
88	MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
89
90	// Initialize algorithm's data structures for a network of a given size.
91	void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
92	Source = SourceNode;
93	Target = SinkNode;
94
95	Nodes = std::vector<Node>(NodeCount);
96	Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
97	if (Params.EvenFlowDistribution)
98	AugmentingEdges =
99	std::vector<std::vector<Edge >>(NodeCount, std::vector<Edge >());
100	}
101
102	// Run the algorithm.
103	int64_t run() {
104	LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
105
106	// Iteratively find an augmentation path/dag in the network and send the
107	// flow along its edges
108	size_t AugmentationIters = applyFlowAugmentation();
109
110	// Compute the total flow and its cost
111	int64_t TotalCost = `0`;
112	int64_t TotalFlow = `0`;
113	for (uint64_t Src = `0`; Src < Nodes.size(); Src++) {
114	for (auto &Edge : Edges [Src]) {
115	if (Edge.Flow > `0`) {
116	TotalCost += Edge.Cost * Edge.Flow;
117	if (Src == Source)
118	TotalFlow += Edge.Flow;
119	}
120	}
121	}
122	LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
123	<< " iterations with " << TotalFlow << " total flow"
124	<< " of " << TotalCost << " cost\n");
125	(void)TotalFlow;
126	(void)AugmentationIters;
127	return TotalCost;
128	}
129
130	/// Adding an edge to the network with a specified capacity and a cost.
131	/// Multiple edges between a pair of nodes are allowed but self-edges
132	/// are not supported.
133	void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
134	assert(Capacity > `0` && "adding an edge of zero capacity");
135	assert(Src != Dst && "loop edge are not supported");
136
137	Edge SrcEdge;
138	SrcEdge.Dst = Dst;
139	SrcEdge.Cost = Cost;
140	SrcEdge.Capacity = Capacity;
141	SrcEdge.Flow = `0`;
142	SrcEdge.RevEdgeIndex = Edges [Dst].size();
143
144	Edge DstEdge;
145	DstEdge.Dst = Src;
146	DstEdge.Cost = -Cost;
147	DstEdge.Capacity = `0`;
148	DstEdge.Flow = `0`;
149	DstEdge.RevEdgeIndex = Edges [Src].size();
150
151	Edges [Src].push_back(x: SrcEdge);
152	Edges [Dst].push_back(x: DstEdge);
153	}
154
155	/// Adding an edge to the network of infinite capacity and a given cost.
156	void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
157	addEdge(Src, Dst, Capacity: INF, Cost);
158	}
159
160	/// Get the total flow from a given source node.
161	/// Returns a list of pairs (target node, amount of flow to the target).
162	std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
163	std::vector<std::pair<uint64_t, int64_t>> Flow;
164	for (const auto &Edge : Edges [Src]) {
165	if (Edge.Flow > `0`)
166	Flow.push_back(x: std::make_pair(x: Edge.Dst, y: Edge.Flow));
167	}
168	return Flow;
169	}
170
171	/// Get the total flow between a pair of nodes.
172	int64_t getFlow(uint64_t Src, uint64_t Dst) const {
173	int64_t Flow = `0`;
174	for (const auto &Edge : Edges [Src]) {
175	if (Edge.Dst == Dst) {
176	Flow += Edge.Flow;
177	}
178	}
179	return Flow;
180	}
181
182	private:
183	/// Iteratively find an augmentation path/dag in the network and send the
184	/// flow along its edges. The method returns the number of applied iterations.
185	size_t applyFlowAugmentation() {
186	size_t AugmentationIters = `0`;
187	while (findAugmentingPath()) {
188	uint64_t PathCapacity = computeAugmentingPathCapacity();
189	while (PathCapacity > `0`) {
190	bool Progress = false;
191	if (Params.EvenFlowDistribution) {
192	// Identify node/edge candidates for augmentation
193	identifyShortestEdges(PathCapacity);
194
195	// Find an augmenting DAG
196	auto AugmentingOrder = findAugmentingDAG();
197
198	// Apply the DAG augmentation
199	Progress = augmentFlowAlongDAG(AugmentingOrder);
200	PathCapacity = computeAugmentingPathCapacity();
201	}
202
203	if (!Progress) {
204	augmentFlowAlongPath(PathCapacity);
205	PathCapacity = `0`;
206	}
207
208	AugmentationIters++;
209	}
210	}
211	return AugmentationIters;
212	}
213
214	/// Compute the capacity of the cannonical augmenting path. If the path is
215	/// saturated (that is, no flow can be sent along the path), then return 0.
216	uint64_t computeAugmentingPathCapacity() {
217	uint64_t PathCapacity = INF;
218	uint64_t Now = Target;
219	while (Now != Source) {
220	uint64_t Pred = Nodes [Now].ParentNode;
221	auto &Edge = Edges [Pred][Nodes [Now].ParentEdgeIndex];
222
223	assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
224	uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
225	PathCapacity = std::min(a: PathCapacity, b: EdgeCapacity);
226
227	Now = Pred;
228	}
229	return PathCapacity;
230	}
231
232	/// Check for existence of an augmenting path with a positive capacity.
233	bool findAugmentingPath() {
234	// Initialize data structures
235	for (auto &Node : Nodes) {
236	Node.Distance = INF;
237	Node.ParentNode = uint64_t(-`1`);
238	Node.ParentEdgeIndex = uint64_t(-`1`);
239	Node.Taken = false;
240	}
241
242	std::queue<uint64_t> Queue;
243	Queue.push(x: Source);
244	Nodes [Source].Distance = `0`;
245	Nodes [Source].Taken = true;
246	while (!Queue.empty()) {
247	uint64_t Src = Queue.front();
248	Queue.pop();
249	Nodes [Src].Taken = false;
250	// Although the residual network contains edges with negative costs
251	// (in particular, backward edges), it can be shown that there are no
252	// negative-weight cycles and the following two invariants are maintained:
253	// (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
254	// where Dist is the length of the shortest path between two nodes. This
255	// allows to prune the search-space of the path-finding algorithm using
256	// the following early-stop criteria:
257	// -- If we find a path with zero-distance from Source to Target, stop the
258	// search, as the path is the shortest since Dist[Source, Target] >= 0;
259	// -- If we have Dist[Source, V] > Dist[Source, Target], then do not
260	// process node V, as it is guaranteed _not_ to be on a shortest path
261	// from Source to Target; it follows from inequalities
262	// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
263	// >= Dist[Source, V]
264	if (!Params.EvenFlowDistribution && Nodes [Target].Distance == `0`)
265	break;
266	if (Nodes [Src].Distance > Nodes [Target].Distance)
267	continue;
268
269	// Process adjacent edges
270	for (uint64_t EdgeIdx = `0`; EdgeIdx < Edges [Src].size(); EdgeIdx++) {
271	auto &Edge = Edges [Src][EdgeIdx];
272	if (Edge.Flow < Edge.Capacity) {
273	uint64_t Dst = Edge.Dst;
274	int64_t NewDistance = Nodes [Src].Distance + Edge.Cost;
275	if (Nodes [Dst].Distance > NewDistance) {
276	// Update the distance and the parent node/edge
277	Nodes [Dst].Distance = NewDistance;
278	Nodes [Dst].ParentNode = Src;
279	Nodes [Dst].ParentEdgeIndex = EdgeIdx;
280	// Add the node to the queue, if it is not there yet
281	if (!Nodes [Dst].Taken) {
282	Queue.push(x: Dst);
283	Nodes [Dst].Taken = true;
284	}
285	}
286	}
287	}
288	}
289
290	return Nodes [Target].Distance != INF;
291	}
292
293	/// Update the current flow along the augmenting path.
294	void augmentFlowAlongPath(uint64_t PathCapacity) {
295	assert(PathCapacity > `0` && "found an incorrect augmenting path");
296	uint64_t Now = Target;
297	while (Now != Source) {
298	uint64_t Pred = Nodes [Now].ParentNode;
299	auto &Edge = Edges [Pred][Nodes [Now].ParentEdgeIndex];
300	auto &RevEdge = Edges [Now][Edge.RevEdgeIndex];
301
302	Edge.Flow += PathCapacity;
303	RevEdge.Flow -= PathCapacity;
304
305	Now = Pred;
306	}
307	}
308
309	/// Find an Augmenting DAG order using a modified version of DFS in which we
310	/// can visit a node multiple times. In the DFS search, when scanning each
311	/// edge out of a node, continue search at Edge.Dst endpoint if it has not
312	/// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313	/// runs in O(MaxDfsCalls \|Edges\| + \|Nodes\|) time.*
314	/// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315	/// that starts with Source and ends with Target.
316	std::vector<uint64_t> findAugmentingDAG() {
317	// We use a stack based implemenation of DFS to avoid recursion.
318	// Defining DFS data structures:
319	// A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320	// - we are currently visiting Nodes[NodeIdx] and
321	// - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322	typedef std::pair<uint64_t, uint64_t> StackItemType;
323	std::stack<StackItemType> Stack;
324	std::vector<uint64_t> AugmentingOrder;
325
326	// Phase 0: Initialize Node attributes and Time for DFS run
327	for (auto &Node : Nodes) {
328	Node.Discovery = `0`;
329	Node.Finish = `0`;
330	Node.NumCalls = `0`;
331	Node.Taken = false;
332	}
333	uint64_t Time = `0`;
334	// Mark Target as Taken
335	// Taken attribute will be propagated backwards from Target towards Source
336	Nodes [Target].Taken = true;
337
338	// Phase 1: Start DFS traversal from Source
339	Stack.emplace(args&: Source, args: `0`);
340	Nodes [Source].Discovery = ++Time;
341	while (!Stack.empty()) {
342	auto NodeIdx = Stack.top().first;
343	auto EdgeIdx = Stack.top().second;
344
345	// If we haven't scanned all edges out of NodeIdx, continue scanning
346	if (EdgeIdx < Edges [NodeIdx].size()) {
347	auto &Edge = Edges [NodeIdx][EdgeIdx];
348	auto &Dst = Nodes [Edge.Dst];
349	Stack.top().second++;
350
351	if (Edge.OnShortestPath) {
352	// If we haven't seen Edge.Dst so far, continue DFS search there
353	if (Dst.Discovery == `0` && Dst.NumCalls < MaxDfsCalls) {
354	Dst.Discovery = ++Time;
355	Stack.emplace(args&: Edge.Dst, args: `0`);
356	Dst.NumCalls++;
357	} else if (Dst.Taken && Dst.Finish != `0`) {
358	// Else, if Edge.Dst already have a path to Target, so that NodeIdx
359	Nodes [NodeIdx].Taken = true;
360	}
361	}
362	} else {
363	// If we are done scanning all edge out of NodeIdx
364	Stack.pop();
365	// If we haven't found a path from NodeIdx to Target, forget about it
366	if (!Nodes [NodeIdx].Taken) {
367	Nodes [NodeIdx].Discovery = `0`;
368	} else {
369	// If we have found a path from NodeIdx to Target, then finish NodeIdx
370	// and propagate Taken flag to DFS parent unless at the Source
371	Nodes [NodeIdx].Finish = ++Time;
372	// NodeIdx == Source if and only if the stack is empty
373	if (NodeIdx != Source) {
374	assert(!Stack.empty() && "empty stack while running dfs");
375	Nodes [Stack.top().first].Taken = true;
376	}
377	AugmentingOrder.push_back(x: NodeIdx);
378	}
379	}
380	}
381	// Nodes are collected decreasing Finish time, so the order is reversed
382	std::reverse(first: AugmentingOrder.begin(), last: AugmentingOrder.end());
383
384	// Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385	for (size_t Src : AugmentingOrder) {
386	AugmentingEdges [Src].clear();
387	for (auto &Edge : Edges [Src]) {
388	uint64_t Dst = Edge.Dst;
389	if (Edge.OnShortestPath && Nodes [Src].Taken && Nodes [Dst].Taken &&
390	Nodes [Dst].Finish < Nodes [Src].Finish) {
391	AugmentingEdges [Src].push_back(x: &Edge);
392	}
393	}
394	assert((Src == Target \|\| !AugmentingEdges[Src].empty()) &&
395	"incorrectly constructed augmenting edges");
396	}
397
398	return AugmentingOrder;
399	}
400
401	/// Update the current flow along the given (acyclic) subgraph specified by
402	/// the vertex order, AugmentingOrder. The objective is to send as much flow
403	/// as possible while evenly distributing flow among successors of each node.
404	/// After the update at least one edge is saturated.
405	bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406	// Phase 0: Initialization
407	for (uint64_t Src : AugmentingOrder) {
408	Nodes [Src].FracFlow = `0`;
409	Nodes [Src].IntFlow = `0`;
410	for (auto &Edge : AugmentingEdges [Src]) {
411	Edge->AugmentedFlow = `0`;
412	}
413	}
414
415	// Phase 1: Send a unit of fractional flow along the DAG
416	uint64_t MaxFlowAmount = INF;
417	Nodes [Source].FracFlow = `1.0`;
418	for (uint64_t Src : AugmentingOrder) {
419	assert((Src == Target \|\| Nodes[Src].FracFlow > `0.0`) &&
420	"incorrectly computed fractional flow");
421	// Distribute flow evenly among successors of Src
422	uint64_t Degree = AugmentingEdges [Src].size();
423	for (auto &Edge : AugmentingEdges [Src]) {
424	double EdgeFlow = Nodes [Src].FracFlow / Degree;
425	Nodes [Edge->Dst].FracFlow += EdgeFlow;
426	if (Edge->Capacity == INF)
427	continue;
428	uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429	MaxFlowAmount = std::min(a: MaxFlowAmount, b: MaxIntFlow);
430	}
431	}
432	// Stop early if we cannot send any (integral) flow from Source to Target
433	if (MaxFlowAmount == `0`)
434	return false;
435
436	// Phase 2: Send an integral flow of MaxFlowAmount
437	Nodes [Source].IntFlow = MaxFlowAmount;
438	for (uint64_t Src : AugmentingOrder) {
439	if (Src == Target)
440	break;
441	// Distribute flow evenly among successors of Src, rounding up to make
442	// sure all flow is sent
443	uint64_t Degree = AugmentingEdges [Src].size();
444	// We are guaranteeed that Node[Src].IntFlow <= SuccFlow Degree*
445	uint64_t SuccFlow = (Nodes [Src].IntFlow + Degree - `1`) / Degree;
446	for (auto &Edge : AugmentingEdges [Src]) {
447	uint64_t Dst = Edge->Dst;
448	uint64_t EdgeFlow = std::min(a: Nodes [Src].IntFlow, b: SuccFlow);
449	EdgeFlow = std::min(a: EdgeFlow, b: uint64_t(Edge->Capacity - Edge->Flow));
450	Nodes [Dst].IntFlow += EdgeFlow;
451	Nodes [Src].IntFlow -= EdgeFlow;
452	Edge->AugmentedFlow += EdgeFlow;
453	}
454	}
455	assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456	Nodes [Target].IntFlow = `0`;
457
458	// Phase 3: Send excess flow back traversing the nodes backwards.
459	// Because of rounding, not all flow can be sent along the edges of Src.
460	// Hence, sending the remaining flow back to maintain flow conservation
461	for (size_t Idx = AugmentingOrder.size() - `1`; Idx > `0`; Idx--) {
462	uint64_t Src = AugmentingOrder [Idx - `1`];
463	// Try to send excess flow back along each edge.
464	// Make sure we only send back flow we just augmented (AugmentedFlow).
465	for (auto &Edge : AugmentingEdges [Src]) {
466	uint64_t Dst = Edge->Dst;
467	if (Nodes [Dst].IntFlow == `0`)
468	continue;
469	uint64_t EdgeFlow = std::min(a: Nodes [Dst].IntFlow, b: Edge->AugmentedFlow);
470	Nodes [Dst].IntFlow -= EdgeFlow;
471	Nodes [Src].IntFlow += EdgeFlow;
472	Edge->AugmentedFlow -= EdgeFlow;
473	}
474	}
475
476	// Phase 4: Update flow values along all edges
477	bool HasSaturatedEdges = false;
478	for (uint64_t Src : AugmentingOrder) {
479	// Verify that we have sent all the excess flow from the node
480	assert(Src == Source \|\| Nodes[Src].IntFlow == `0`);
481	for (auto &Edge : AugmentingEdges [Src]) {
482	assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483	// Update flow values along the edge and its reverse copy
484	auto &RevEdge = Edges [Edge->Dst][Edge->RevEdgeIndex];
485	Edge->Flow += Edge->AugmentedFlow;
486	RevEdge.Flow -= Edge->AugmentedFlow;
487	if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > `0`)
488	HasSaturatedEdges = true;
489	}
490	}
491
492	// The augmentation is successful iff at least one edge becomes saturated
493	return HasSaturatedEdges;
494	}
495
496	/// Identify candidate (shortest) edges for augmentation.
497	void identifyShortestEdges(uint64_t PathCapacity) {
498	assert(PathCapacity > `0` && "found an incorrect augmenting DAG");
499	// To make sure the augmentation DAG contains only edges with large residual
500	// capacity, we prune all edges whose capacity is below a fraction of
501	// the capacity of the augmented path.
502	// (All edges of the path itself are always in the DAG)
503	uint64_t MinCapacity = std::max(a: PathCapacity / `2`, b: uint64_t(`1`));
504
505	// Decide which edges are on a shortest path from Source to Target
506	for (size_t Src = `0`; Src < Nodes.size(); Src++) {
507	// An edge cannot be augmenting if the endpoint has large distance
508	if (Nodes [Src].Distance > Nodes [Target].Distance)
509	continue;
510
511	for (auto &Edge : Edges [Src]) {
512	uint64_t Dst = Edge.Dst;
513	Edge.OnShortestPath =
514	Src != Target && Dst != Source &&
515	Nodes [Dst].Distance <= Nodes [Target].Distance &&
516	Nodes [Dst].Distance == Nodes [Src].Distance + Edge.Cost &&
517	Edge.Capacity > Edge.Flow &&
518	uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519	}
520	}
521	}
522
523	/// Maximum number of DFS iterations for DAG finding.
524	static constexpr uint64_t MaxDfsCalls = `10`;
525
526	/// A node in a flow network.
527	struct Node {
528	/// The cost of the cheapest path from the source to the current node.
529	int64_t Distance;
530	/// The node preceding the current one in the path.
531	uint64_t ParentNode;
532	/// The index of the edge between ParentNode and the current node.
533	uint64_t ParentEdgeIndex;
534	/// An indicator of whether the current node is in a queue.
535	bool Taken;
536
537	/// Data fields utilized in DAG-augmentation:
538	/// Fractional flow.
539	double FracFlow;
540	/// Integral flow.
541	uint64_t IntFlow;
542	/// Discovery time.
543	uint64_t Discovery;
544	/// Finish time.
545	uint64_t Finish;
546	/// NumCalls.
547	uint64_t NumCalls;
548	};
549
550	/// An edge in a flow network.
551	struct Edge {
552	/// The cost of the edge.
553	int64_t Cost;
554	/// The capacity of the edge.
555	int64_t Capacity;
556	/// The current flow on the edge.
557	int64_t Flow;
558	/// The destination node of the edge.
559	uint64_t Dst;
560	/// The index of the reverse edge between Dst and the current node.
561	uint64_t RevEdgeIndex;
562
563	/// Data fields utilized in DAG-augmentation:
564	/// Whether the edge is currently on a shortest path from Source to Target.
565	bool OnShortestPath;
566	/// Extra flow along the edge.
567	uint64_t AugmentedFlow;
568	};
569
570	/// The set of network nodes.
571	std::vector<Node> Nodes;
572	/// The set of network edges.
573	std::vector<std::vector<Edge>> Edges;
574	/// Source node of the flow.
575	uint64_t Source;
576	/// Target (sink) node of the flow.
577	uint64_t Target;
578	/// Augmenting edges.
579	std::vector<std::vector<Edge *>> AugmentingEdges;
580	/// Params for flow computation.
581	const ProfiParams &Params;
582	};
583
584	/// A post-processing adjustment of the control flow. It applies two steps by
585	/// rerouting some flow and making it more realistic:
586	///
587	/// - First, it removes all isolated components ("islands") with a positive flow
588	/// that are unreachable from the entry block. For every such component, we
589	/// find the shortest from the entry to an exit passing through the component,
590	/// and increase the flow by one unit along the path.
591	///
592	/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
593	/// with no sampled counts. Then it rebalnces the flow that goes through such
594	/// a subgraph so that each branch is taken with probability 50%.
595	/// An unknown subgraph is such that for every two nodes u and v:
596	/// - u dominates v and u is not unknown;
597	/// - v post-dominates u; and
598	/// - all inner-nodes of all (u,v)-paths are unknown.
599	///
600	class FlowAdjuster {
601	public:
602	FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
603	: Params(Params), Func(Func) {}
604
605	/// Apply the post-processing.
606	void run() {
607	if (Params.JoinIslands) {
608	// Adjust the flow to get rid of isolated components
609	joinIsolatedComponents();
610	}
611
612	if (Params.RebalanceUnknown) {
613	// Rebalance the flow inside unknown subgraphs
614	rebalanceUnknownSubgraphs();
615	}
616	}
617
618	private:
619	void joinIsolatedComponents() {
620	// Find blocks that are reachable from the source
621	auto Visited = BitVector (NumBlocks(), false);
622	findReachable(Src: Func.Entry, Visited);
623
624	// Iterate over all non-reachable blocks and adjust their weights
625	for (uint64_t I = `0`; I < NumBlocks(); I++) {
626	auto &Block = Func.Blocks [I];
627	if (Block.Flow > `0` && !Visited [I]) {
628	// Find a path from the entry to an exit passing through the block I
629	auto Path = findShortestPath(BlockIdx: I);
630	// Increase the flow along the path
631	assert(Path.size() > `0` && Path[`0`]->Source == Func.Entry &&
632	"incorrectly computed path adjusting control flow");
633	Func.Blocks [Func.Entry].Flow += `1`;
634	for (auto &Jump : Path) {
635	Jump->Flow += `1`;
636	Func.Blocks [Jump->Target].Flow += `1`;
637	// Update reachability
638	findReachable(Src: Jump->Target, Visited);
639	}
640	}
641	}
642	}
643
644	/// Run BFS from a given block along the jumps with a positive flow and mark
645	/// all reachable blocks.
646	void findReachable(uint64_t Src, BitVector &Visited) {
647	if (Visited [Src])
648	return;
649	std::queue<uint64_t> Queue;
650	Queue.push(x: Src);
651	Visited [Src] = true;
652	while (!Queue.empty()) {
653	Src = Queue.front();
654	Queue.pop();
655	for (auto *Jump : Func.Blocks [Src].SuccJumps) {
656	uint64_t Dst = Jump->Target;
657	if (Jump->Flow > `0` && !Visited [Dst]) {
658	Queue.push(x: Dst);
659	Visited [Dst] = true;
660	}
661	}
662	}
663	}
664
665	/// Find the shortest path from the entry block to an exit block passing
666	/// through a given block.
667	std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
668	// A path from the entry block to BlockIdx
669	auto ForwardPath = findShortestPath(Source: Func.Entry, Target: BlockIdx);
670	// A path from BlockIdx to an exit block
671	auto BackwardPath = findShortestPath(Source: BlockIdx, Target: AnyExitBlock);
672
673	// Concatenate the two paths
674	std::vector<FlowJump *> Result;
675	Result.insert(position: Result.end(), first: ForwardPath.begin(), last: ForwardPath.end());
676	Result.insert(position: Result.end(), first: BackwardPath.begin(), last: BackwardPath.end());
677	return Result;
678	}
679
680	/// Apply the Dijkstra algorithm to find the shortest path from a given
681	/// Source to a given Target block.
682	/// If Target == -1, then the path ends at an exit block.
683	std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
684	// Quit early, if possible
685	if (Source == Target)
686	return std::vector<FlowJump *>();
687	if (Func.Blocks [Source].isExit() && Target == AnyExitBlock)
688	return std::vector<FlowJump *>();
689
690	// Initialize data structures
691	auto Distance = std::vector<int64_t>(NumBlocks(), INF);
692	auto Parent = std::vector<FlowJump >(NumBlocks(), nullptr*);
693	Distance [Source] = `0`;
694	std::set<std::pair<uint64_t, uint64_t>> Queue;
695	Queue.insert(x: std::make_pair(x&: Distance [Source], y&: Source));
696
697	// Run the Dijkstra algorithm
698	while (!Queue.empty()) {
699	uint64_t Src = Queue.begin()->second;
700	Queue.erase(position: Queue.begin());
701	// If we found a solution, quit early
702	if (Src == Target \|\|
703	(Func.Blocks [Src].isExit() && Target == AnyExitBlock))
704	break;
705
706	for (auto *Jump : Func.Blocks [Src].SuccJumps) {
707	uint64_t Dst = Jump->Target;
708	int64_t JumpDist = jumpDistance(Jump);
709	if (Distance [Dst] > Distance [Src] + JumpDist) {
710	Queue.erase(x: std::make_pair(x&: Distance [Dst], y&: Dst));
711
712	Distance [Dst] = Distance [Src] + JumpDist;
713	Parent [Dst] = Jump;
714
715	Queue.insert(x: std::make_pair(x&: Distance [Dst], y&: Dst));
716	}
717	}
718	}
719	// If Target is not provided, find the closest exit block
720	if (Target == AnyExitBlock) {
721	for (uint64_t I = `0`; I < NumBlocks(); I++) {
722	if (Func.Blocks [I].isExit() && Parent [I] != nullptr) {
723	if (Target == AnyExitBlock \|\| Distance [Target] > Distance [I]) {
724	Target = I;
725	}
726	}
727	}
728	}
729	assert(Parent[Target] != nullptr && "a path does not exist");
730
731	// Extract the constructed path
732	std::vector<FlowJump *> Result;
733	uint64_t Now = Target;
734	while (Now != Source) {
735	assert(Now == Parent[Now]->Target && "incorrect parent jump");
736	Result.push_back(x: Parent [Now]);
737	Now = Parent [Now]->Source;
738	}
739	// Reverse the path, since it is extracted from Target to Source
740	std::reverse(first: Result.begin(), last: Result.end());
741	return Result;
742	}
743
744	/// A distance of a path for a given jump.
745	/// In order to incite the path to use blocks/jumps with large positive flow,
746	/// and avoid changing branch probability of outgoing edges drastically,
747	/// set the jump distance so as:
748	/// - to minimize the number of unlikely jumps used and subject to that,
749	/// - to minimize the number of Flow == 0 jumps used and subject to that,
750	/// - minimizes total multiplicative Flow increase for the remaining edges.
751	/// To capture this objective with integer distances, we round off fractional
752	/// parts to a multiple of 1 / BaseDistance.
753	int64_t jumpDistance(FlowJump Jump) const* {
754	if (Jump->IsUnlikely)
755	return Params.CostUnlikely;
756	uint64_t BaseDistance =
757	std::max(a: FlowAdjuster::MinBaseDistance,
758	b: std::min(a: Func.Blocks [Func.Entry].Flow,
759	b: Params.CostUnlikely / (`2` * (NumBlocks() + `1`))));
760	if (Jump->Flow > `0`)
761	return BaseDistance + BaseDistance / Jump->Flow;
762	return `2` * BaseDistance * (NumBlocks() + `1`);
763	};
764
765	uint64_t NumBlocks() const { return Func.Blocks.size(); }
766
767	/// Rebalance unknown subgraphs so that the flow is split evenly across the
768	/// outgoing branches of every block of the subgraph. The method iterates over
769	/// blocks with known weight and identifies unknown subgraphs rooted at the
770	/// blocks. Then it verifies if flow rebalancing is feasible and applies it.
771	void rebalanceUnknownSubgraphs() {
772	// Try to find unknown subgraphs from each block
773	for (const FlowBlock &SrcBlock : Func.Blocks) {
774	// Verify if rebalancing rooted at SrcBlock is feasible
775	if (!canRebalanceAtRoot(SrcBlock: &SrcBlock))
776	continue;
777
778	// Find an unknown subgraphs starting at SrcBlock. Along the way,
779	// fill in known destinations and intermediate unknown blocks.
780	std::vector<FlowBlock *> UnknownBlocks;
781	std::vector<FlowBlock *> KnownDstBlocks;
782	findUnknownSubgraph(SrcBlock: &SrcBlock, KnownDstBlocks, UnknownBlocks);
783
784	// Verify if rebalancing of the subgraph is feasible. If the search is
785	// successful, find the unique destination block (which can be null)
786	FlowBlock DstBlock = nullptr*;
787	if (!canRebalanceSubgraph(SrcBlock: &SrcBlock, KnownDstBlocks, UnknownBlocks,
788	DstBlock))
789	continue;
790
791	// We cannot rebalance subgraphs containing cycles among unknown blocks
792	if (!isAcyclicSubgraph(SrcBlock: &SrcBlock, DstBlock, UnknownBlocks))
793	continue;
794
795	// Rebalance the flow
796	rebalanceUnknownSubgraph(SrcBlock: &SrcBlock, DstBlock, UnknownBlocks);
797	}
798	}
799
800	/// Verify if rebalancing rooted at a given block is possible.
801	bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
802	// Do not attempt to find unknown subgraphs from an unknown or a
803	// zero-flow block
804	if (SrcBlock->HasUnknownWeight \|\| SrcBlock->Flow == `0`)
805	return false;
806
807	// Do not attempt to process subgraphs from a block w/o unknown sucessors
808	bool HasUnknownSuccs = false;
809	for (auto *Jump : SrcBlock->SuccJumps) {
810	if (Func.Blocks [Jump->Target].HasUnknownWeight) {
811	HasUnknownSuccs = true;
812	break;
813	}
814	}
815	if (!HasUnknownSuccs)
816	return false;
817
818	return true;
819	}
820
821	/// Find an unknown subgraph starting at block SrcBlock. The method sets
822	/// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
823	void findUnknownSubgraph(const FlowBlock *SrcBlock,
824	std::vector<FlowBlock *> &KnownDstBlocks,
825	std::vector<FlowBlock *> &UnknownBlocks) {
826	// Run BFS from SrcBlock and make sure all paths are going through unknown
827	// blocks and end at a known DstBlock
828	auto Visited = BitVector (NumBlocks(), false);
829	std::queue<uint64_t> Queue;
830
831	Queue.push(x: SrcBlock->Index);
832	Visited [SrcBlock->Index] = true;
833	while (!Queue.empty()) {
834	auto &Block = Func.Blocks [Queue.front()];
835	Queue.pop();
836	// Process blocks reachable from Block
837	for (auto *Jump : Block.SuccJumps) {
838	// If Jump can be ignored, skip it
839	if (ignoreJump(SrcBlock, DstBlock: nullptr, Jump))
840	continue;
841
842	uint64_t Dst = Jump->Target;
843	// If Dst has been visited, skip Jump
844	if (Visited [Dst])
845	continue;
846	// Process block Dst
847	Visited [Dst] = true;
848	if (!Func.Blocks [Dst].HasUnknownWeight) {
849	KnownDstBlocks.push_back(x: &Func.Blocks [Dst]);
850	} else {
851	Queue.push(x: Dst);
852	UnknownBlocks.push_back(x: &Func.Blocks [Dst]);
853	}
854	}
855	}
856	}
857
858	/// Verify if rebalancing of the subgraph is feasible. If the checks are
859	/// successful, set the unique destination block, DstBlock (can be null).
860	bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
861	const std::vector<FlowBlock *> &KnownDstBlocks,
862	const std::vector<FlowBlock *> &UnknownBlocks,
863	FlowBlock *&DstBlock) {
864	// If the list of unknown blocks is empty, we don't need rebalancing
865	if (UnknownBlocks.empty())
866	return false;
867
868	// If there are multiple known sinks, we can't rebalance
869	if (KnownDstBlocks.size() > `1`)
870	return false;
871	DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
872
873	// Verify sinks of the subgraph
874	for (auto *Block : UnknownBlocks) {
875	if (Block->SuccJumps.empty()) {
876	// If there are multiple (known and unknown) sinks, we can't rebalance
877	if (DstBlock != nullptr)
878	return false;
879	continue;
880	}
881	size_t NumIgnoredJumps = `0`;
882	for (auto *Jump : Block->SuccJumps) {
883	if (ignoreJump(SrcBlock, DstBlock, Jump))
884	NumIgnoredJumps++;
885	}
886	// If there is a non-sink block in UnknownBlocks with all jumps ignored,
887	// then we can't rebalance
888	if (NumIgnoredJumps == Block->SuccJumps.size())
889	return false;
890	}
891
892	return true;
893	}
894
895	/// Decide whether the Jump is ignored while processing an unknown subgraphs
896	/// rooted at basic block SrcBlock with the destination block, DstBlock.
897	bool ignoreJump(const FlowBlock SrcBlock, const* FlowBlock *DstBlock,
898	const FlowJump *Jump) {
899	// Ignore unlikely jumps with zero flow
900	if (Jump->IsUnlikely && Jump->Flow == `0`)
901	return true;
902
903	auto JumpSource = &Func.Blocks [Jump->Source];
904	auto JumpTarget = &Func.Blocks [Jump->Target];
905
906	// Do not ignore jumps coming into DstBlock
907	if (DstBlock != nullptr && JumpTarget == DstBlock)
908	return false;
909
910	// Ignore jumps out of SrcBlock to known blocks
911	if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
912	return true;
913
914	// Ignore jumps to known blocks with zero flow
915	if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == `0`)
916	return true;
917
918	return false;
919	}
920
921	/// Verify if the given unknown subgraph is acyclic, and if yes, reorder
922	/// UnknownBlocks in the topological order (so that all jumps are "forward").
923	bool isAcyclicSubgraph(const FlowBlock SrcBlock, const* FlowBlock *DstBlock,
924	std::vector<FlowBlock *> &UnknownBlocks) {
925	// Extract local in-degrees in the considered subgraph
926	auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), `0`);
927	auto fillInDegree = [&](const FlowBlock *Block) {
928	for (auto *Jump : Block->SuccJumps) {
929	if (ignoreJump(SrcBlock, DstBlock, Jump))
930	continue;
931	LocalInDegree [Jump->Target]++;
932	}
933	};
934	fillInDegree(SrcBlock);
935	for (auto *Block : UnknownBlocks) {
936	fillInDegree(Block);
937	}
938	// A loop containing SrcBlock
939	if (LocalInDegree [SrcBlock->Index] > `0`)
940	return false;
941
942	std::vector<FlowBlock *> AcyclicOrder;
943	std::queue<uint64_t> Queue;
944	Queue.push(x: SrcBlock->Index);
945	while (!Queue.empty()) {
946	FlowBlock *Block = &Func.Blocks [Queue.front()];
947	Queue.pop();
948	// Stop propagation once we reach DstBlock, if any
949	if (DstBlock != nullptr && Block == DstBlock)
950	break;
951
952	// Keep an acyclic order of unknown blocks
953	if (Block->HasUnknownWeight && Block != SrcBlock)
954	AcyclicOrder.push_back(x: Block);
955
956	// Add to the queue all successors with zero local in-degree
957	for (auto *Jump : Block->SuccJumps) {
958	if (ignoreJump(SrcBlock, DstBlock, Jump))
959	continue;
960	uint64_t Dst = Jump->Target;
961	LocalInDegree [Dst]--;
962	if (LocalInDegree [Dst] == `0`) {
963	Queue.push(x: Dst);
964	}
965	}
966	}
967
968	// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
969	// of all blocks
970	if (UnknownBlocks.size() != AcyclicOrder.size())
971	return false;
972	UnknownBlocks = AcyclicOrder;
973	return true;
974	}
975
976	/// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
977	/// having UnknownBlocks intermediate blocks.
978	void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
979	const FlowBlock *DstBlock,
980	const std::vector<FlowBlock *> &UnknownBlocks) {
981	assert(SrcBlock->Flow > `0` && "zero-flow block in unknown subgraph");
982
983	// Ditribute flow from the source block
984	uint64_t BlockFlow = `0`;
985	// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986	for (auto *Jump : SrcBlock->SuccJumps) {
987	if (ignoreJump(SrcBlock, DstBlock, Jump))
988	continue;
989	BlockFlow += Jump->Flow;
990	}
991	rebalanceBlock(SrcBlock, DstBlock, Block: SrcBlock, BlockFlow);
992
993	// Ditribute flow from the remaining blocks
994	for (auto *Block : UnknownBlocks) {
995	assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
996	uint64_t BlockFlow = `0`;
997	// Block's flow is the sum of incoming flows
998	for (auto *Jump : Block->PredJumps) {
999	BlockFlow += Jump->Flow;
1000	}
1001	Block->Flow = BlockFlow;
1002	rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1003	}
1004	}
1005
1006	/// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1007	/// and ending at DstBlock.
1008	void rebalanceBlock(const FlowBlock SrcBlock, const* FlowBlock *DstBlock,
1009	const FlowBlock *Block, uint64_t BlockFlow) {
1010	// Process all successor jumps and update corresponding flow values
1011	size_t BlockDegree = `0`;
1012	for (auto *Jump : Block->SuccJumps) {
1013	if (ignoreJump(SrcBlock, DstBlock, Jump))
1014	continue;
1015	BlockDegree++;
1016	}
1017	// If all successor jumps of the block are ignored, skip it
1018	if (DstBlock == nullptr && BlockDegree == `0`)
1019	return;
1020	assert(BlockDegree > `0` && "all outgoing jumps are ignored");
1021
1022	// Each of the Block's successors gets the following amount of flow.
1023	// Rounding the value up so that all flow is propagated
1024	uint64_t SuccFlow = (BlockFlow + BlockDegree - `1`) / BlockDegree;
1025	for (auto *Jump : Block->SuccJumps) {
1026	if (ignoreJump(SrcBlock, DstBlock, Jump))
1027	continue;
1028	uint64_t Flow = std::min(a: SuccFlow, b: BlockFlow);
1029	Jump->Flow = Flow;
1030	BlockFlow -= Flow;
1031	}
1032	assert(BlockFlow == `0` && "not all flow is propagated");
1033	}
1034
1035	/// A constant indicating an arbitrary exit block of a function.
1036	static constexpr uint64_t AnyExitBlock = uint64_t(-`1`);
1037	/// Minimum BaseDistance for the jump distance values in island joining.
1038	static constexpr uint64_t MinBaseDistance = `10000`;
1039
1040	/// Params for flow computation.
1041	const ProfiParams &Params;
1042	/// The function.
1043	FlowFunction &Func;
1044	};
1045
1046	std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1047	const FlowBlock &Block);
1048	std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1049	const FlowJump &Jump);
1050
1051	/// Initializing flow network for a given function.
1052	///
1053	/// Every block is split into two nodes that are responsible for (i) an
1054	/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1055	/// reduction of the block weight.
1056	void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1057	FlowFunction &Func) {
1058	uint64_t NumBlocks = Func.Blocks.size();
1059	assert(NumBlocks > `1` && "Too few blocks in a function");
1060	uint64_t NumJumps = Func.Jumps.size();
1061	assert(NumJumps > `0` && "Too few jumps in a function");
1062
1063	// Introducing dummy source/sink pairs to allow flow circulation.
1064	// The nodes corresponding to blocks of the function have indices in
1065	// the range [0 .. 2 NumBlocks); the dummy sources/sinks are indexed by the*
1066	// next four values.
1067	uint64_t S = `2` * NumBlocks;
1068	uint64_t T = S + `1`;
1069	uint64_t S1 = S + `2`;
1070	uint64_t T1 = S + `3`;
1071
1072	Network.initialize(NodeCount: `2` * NumBlocks + `4`, SourceNode: S1, SinkNode: T1);
1073
1074	// Initialize nodes of the flow network
1075	for (uint64_t B = `0`; B < NumBlocks; B++) {
1076	auto &Block = Func.Blocks [B];
1077
1078	// Split every block into two auxiliary nodes to allow
1079	// increase/reduction of the block count.
1080	uint64_t Bin = `2` * B;
1081	uint64_t Bout = `2` * B + `1`;
1082
1083	// Edges from S and to T
1084	if (Block.isEntry()) {
1085	Network.addEdge(Src: S, Dst: Bin, Cost: `0`);
1086	} else if (Block.isExit()) {
1087	Network.addEdge(Src: Bout, Dst: T, Cost: `0`);
1088	}
1089
1090	// Assign costs for increasing/decreasing the block counts
1091	auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1092
1093	// Add the corresponding edges to the network
1094	Network.addEdge(Src: Bin, Dst: Bout, Cost: AuxCostInc);
1095	if (Block.Weight > `0`) {
1096	Network.addEdge(Src: Bout, Dst: Bin, Capacity: Block.Weight, Cost: AuxCostDec);
1097	Network.addEdge(Src: S1, Dst: Bout, Capacity: Block.Weight, Cost: `0`);
1098	Network.addEdge(Src: Bin, Dst: T1, Capacity: Block.Weight, Cost: `0`);
1099	}
1100	}
1101
1102	// Initialize edges of the flow network
1103	for (uint64_t J = `0`; J < NumJumps; J++) {
1104	auto &Jump = Func.Jumps [J];
1105
1106	// Get the endpoints corresponding to the jump
1107	uint64_t Jin = `2` * Jump.Source + `1`;
1108	uint64_t Jout = `2` * Jump.Target;
1109
1110	// Assign costs for increasing/decreasing the jump counts
1111	auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1112
1113	// Add the corresponding edges to the network
1114	Network.addEdge(Src: Jin, Dst: Jout, Cost: AuxCostInc);
1115	if (Jump.Weight > `0`) {
1116	Network.addEdge(Src: Jout, Dst: Jin, Capacity: Jump.Weight, Cost: AuxCostDec);
1117	Network.addEdge(Src: S1, Dst: Jout, Capacity: Jump.Weight, Cost: `0`);
1118	Network.addEdge(Src: Jin, Dst: T1, Capacity: Jump.Weight, Cost: `0`);
1119	}
1120	}
1121
1122	// Make sure we have a valid flow circulation
1123	Network.addEdge(Src: T, Dst: S, Cost: `0`);
1124	}
1125
1126	/// Assign costs for increasing/decreasing the block counts.
1127	std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1128	const FlowBlock &Block) {
1129	// Modifying the weight of an unlikely block is expensive
1130	if (Block.IsUnlikely)
1131	return std::make_pair(x: Params.CostUnlikely, y: Params.CostUnlikely);
1132
1133	// Assign default values for the costs
1134	int64_t CostInc = Params.CostBlockInc;
1135	int64_t CostDec = Params.CostBlockDec;
1136	// Update the costs depending on the block metadata
1137	if (Block.HasUnknownWeight) {
1138	CostInc = Params.CostBlockUnknownInc;
1139	CostDec = `0`;
1140	} else {
1141	// Increasing the count for "cold" blocks with zero initial count is more
1142	// expensive than for "hot" ones
1143	if (Block.Weight == `0`)
1144	CostInc = Params.CostBlockZeroInc;
1145	// Modifying the count of the entry block is expensive
1146	if (Block.isEntry()) {
1147	CostInc = Params.CostBlockEntryInc;
1148	CostDec = Params.CostBlockEntryDec;
1149	}
1150	}
1151	return std::make_pair(x&: CostInc, y&: CostDec);
1152	}
1153
1154	/// Assign costs for increasing/decreasing the jump counts.
1155	std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1156	const FlowJump &Jump) {
1157	// Modifying the weight of an unlikely jump is expensive
1158	if (Jump.IsUnlikely)
1159	return std::make_pair(x: Params.CostUnlikely, y: Params.CostUnlikely);
1160
1161	// Assign default values for the costs
1162	int64_t CostInc = Params.CostJumpInc;
1163	int64_t CostDec = Params.CostJumpDec;
1164	// Update the costs depending on the block metadata
1165	if (Jump.Source + `1` == Jump.Target) {
1166	// Adjusting the fall-through branch
1167	CostInc = Params.CostJumpFTInc;
1168	CostDec = Params.CostJumpFTDec;
1169	}
1170	if (Jump.HasUnknownWeight) {
1171	// The cost is different for fall-through and non-fall-through branches
1172	if (Jump.Source + `1` == Jump.Target)
1173	CostInc = Params.CostJumpUnknownFTInc;
1174	else
1175	CostInc = Params.CostJumpUnknownInc;
1176	CostDec = `0`;
1177	} else {
1178	assert(Jump.Weight > `0` && "found zero-weight jump with a positive weight");
1179	}
1180	return std::make_pair(x&: CostInc, y&: CostDec);
1181	}
1182
1183	/// Extract resulting block and edge counts from the flow network.
1184	void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1185	FlowFunction &Func) {
1186	uint64_t NumBlocks = Func.Blocks.size();
1187	uint64_t NumJumps = Func.Jumps.size();
1188
1189	// Extract resulting jump counts
1190	for (uint64_t J = `0`; J < NumJumps; J++) {
1191	auto &Jump = Func.Jumps [J];
1192	uint64_t SrcOut = `2` * Jump.Source + `1`;
1193	uint64_t DstIn = `2` * Jump.Target;
1194
1195	int64_t Flow = `0`;
1196	int64_t AuxFlow = Network.getFlow(Src: SrcOut, Dst: DstIn);
1197	if (Jump.Source != Jump.Target)
1198	Flow = int64_t(Jump.Weight) + AuxFlow;
1199	else
1200	Flow = int64_t(Jump.Weight) + (AuxFlow > `0` ? AuxFlow : `0`);
1201
1202	Jump.Flow = Flow;
1203	assert(Flow >= `0` && "negative jump flow");
1204	}
1205
1206	// Extract resulting block counts
1207	auto InFlow = std::vector<uint64_t>(NumBlocks, `0`);
1208	auto OutFlow = std::vector<uint64_t>(NumBlocks, `0`);
1209	for (auto &Jump : Func.Jumps) {
1210	InFlow [Jump.Target] += Jump.Flow;
1211	OutFlow [Jump.Source] += Jump.Flow;
1212	}
1213	for (uint64_t B = `0`; B < NumBlocks; B++) {
1214	auto &Block = Func.Blocks [B];
1215	Block.Flow = std::max(a: OutFlow [B], b: InFlow [B]);
1216	}
1217	}
1218
1219	#ifndef NDEBUG
1220	/// Verify that the provided block/jump weights are as expected.
1221	void verifyInput(const FlowFunction &Func) {
1222	// Verify entry and exit blocks
1223	assert(Func.Entry == `0` && Func.Blocks[`0`].isEntry());
1224	size_t NumExitBlocks = `0`;
1225	for (size_t I = `1`; I < Func.Blocks.size(); I++) {
1226	assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1227	if (Func.Blocks[I].isExit())
1228	NumExitBlocks++;
1229	}
1230	assert(NumExitBlocks > `0` && "cannot find exit blocks");
1231
1232	// Verify that there are no parallel edges
1233	for (auto &Block : Func.Blocks) {
1234	std::unordered_set<uint64_t> UniqueSuccs;
1235	for (auto &Jump : Block.SuccJumps) {
1236	auto It = UniqueSuccs.insert(Jump->Target);
1237	assert(It.second && "input CFG contains parallel edges");
1238	}
1239	}
1240	// Verify CFG jumps
1241	for (auto &Block : Func.Blocks) {
1242	assert((!Block.isEntry() \|\| !Block.isExit()) &&
1243	"a block cannot be an entry and an exit");
1244	}
1245	// Verify input block weights
1246	for (auto &Block : Func.Blocks) {
1247	assert((!Block.HasUnknownWeight \|\| Block.Weight == `0` \|\| Block.isEntry()) &&
1248	"non-zero weight of a block w/o weight except for an entry");
1249	}
1250	// Verify input jump weights
1251	for (auto &Jump : Func.Jumps) {
1252	assert((!Jump.HasUnknownWeight \|\| Jump.Weight == `0`) &&
1253	"non-zero weight of a jump w/o weight");
1254	}
1255	}
1256
1257	/// Verify that the computed flow values satisfy flow conservation rules.
1258	void verifyOutput(const FlowFunction &Func) {
1259	const uint64_t NumBlocks = Func.Blocks.size();
1260	auto InFlow = std::vector<uint64_t>(NumBlocks, `0`);
1261	auto OutFlow = std::vector<uint64_t>(NumBlocks, `0`);
1262	for (const auto &Jump : Func.Jumps) {
1263	InFlow[Jump.Target] += Jump.Flow;
1264	OutFlow[Jump.Source] += Jump.Flow;
1265	}
1266
1267	uint64_t TotalInFlow = `0`;
1268	uint64_t TotalOutFlow = `0`;
1269	for (uint64_t I = `0`; I < NumBlocks; I++) {
1270	auto &Block = Func.Blocks[I];
1271	if (Block.isEntry()) {
1272	TotalInFlow += Block.Flow;
1273	assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1274	} else if (Block.isExit()) {
1275	TotalOutFlow += Block.Flow;
1276	assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1277	} else {
1278	assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1279	assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1280	}
1281	}
1282	assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1283
1284	// Verify that there are no isolated flow components
1285	// One could modify FlowFunction to hold edges indexed by the sources, which
1286	// will avoid a creation of the object
1287	auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1288	for (const auto &Jump : Func.Jumps) {
1289	if (Jump.Flow > `0`) {
1290	PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1291	}
1292	}
1293
1294	// Run BFS from the source along edges with positive flow
1295	std::queue<uint64_t> Queue;
1296	auto Visited = BitVector(NumBlocks, false);
1297	Queue.push(Func.Entry);
1298	Visited[Func.Entry] = true;
1299	while (!Queue.empty()) {
1300	uint64_t Src = Queue.front();
1301	Queue.pop();
1302	for (uint64_t Dst : PositiveFlowEdges[Src]) {
1303	if (!Visited[Dst]) {
1304	Queue.push(Dst);
1305	Visited[Dst] = true;
1306	}
1307	}
1308	}
1309
1310	// Verify that every block that has a positive flow is reached from the source
1311	// along edges with a positive flow
1312	for (uint64_t I = `0`; I < NumBlocks; I++) {
1313	auto &Block = Func.Blocks[I];
1314	assert((Visited[I] \|\| Block.Flow == `0`) && "an isolated flow component");
1315	}
1316	}
1317	#endif
1318
1319	} // end of anonymous namespace
1320
1321	/// Apply the profile inference algorithm for a given function and provided
1322	/// profi options
1323	void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
1324	// Check if the function has samples and assign initial flow values
1325	bool HasSamples = false;
1326	for (FlowBlock &Block : Func.Blocks) {
1327	if (Block.Weight > `0`)
1328	HasSamples = true;
1329	Block.Flow = Block.Weight;
1330	}
1331	for (FlowJump &Jump : Func.Jumps) {
1332	if (Jump.Weight > `0`)
1333	HasSamples = true;
1334	Jump.Flow = Jump.Weight;
1335	}
1336
1337	// Quit early for functions with a single block or ones w/o samples
1338	if (Func.Blocks.size() <= `1` \|\| !HasSamples)
1339	return;
1340
1341	#ifndef NDEBUG
1342	// Verify the input data
1343	verifyInput(Func);
1344	#endif
1345
1346	// Create and apply an inference network model
1347	auto InferenceNetwork = MinCostMaxFlow (Params);
1348	initializeNetwork(Params, Network&: InferenceNetwork, Func);
1349	InferenceNetwork.run();
1350
1351	// Extract flow values for every block and every edge
1352	extractWeights(Params, Network&: InferenceNetwork, Func);
1353
1354	// Post-processing adjustments to the flow
1355	auto Adjuster = FlowAdjuster (Params, Func);
1356	Adjuster.run();
1357
1358	#ifndef NDEBUG
1359	// Verify the result
1360	verifyOutput(Func);
1361	#endif
1362	}
1363
1364	/// Apply the profile inference algorithm for a given flow function
1365	void llvm::applyFlowInference(FlowFunction &Func) {
1366	ProfiParams Params;
1367	// Set the params from the command-line flags.
1368	Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1369	Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1370	Params.JoinIslands = SampleProfileJoinIslands;
1371	Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1372	Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1373	Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1374	Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1375	Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1376	Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1377
1378	applyFlowInference(Params, Func);
1379	}
1380

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/SampleProfileInference.cpp