| 1 | //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements a profile inference algorithm. Given an incomplete and |
| 10 | // possibly imprecise block counts, the algorithm reconstructs realistic block |
| 11 | // and edge counts that satisfy flow conservation rules, while minimally modify |
| 12 | // input block counts. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/Transforms/Utils/SampleProfileInference.h" |
| 17 | #include "llvm/ADT/BitVector.h" |
| 18 | #include "llvm/Support/CommandLine.h" |
| 19 | #include "llvm/Support/Debug.h" |
| 20 | #include <queue> |
| 21 | #include <set> |
| 22 | #include <stack> |
| 23 | #include <unordered_set> |
| 24 | |
| 25 | using namespace llvm; |
| 26 | #define DEBUG_TYPE "sample-profile-inference" |
| 27 | |
| 28 | namespace { |
| 29 | |
| 30 | static cl::opt<bool> SampleProfileEvenFlowDistribution( |
| 31 | "sample-profile-even-flow-distribution" , cl::init(Val: true), cl::Hidden, |
| 32 | cl::desc("Try to evenly distribute flow when there are multiple equally " |
| 33 | "likely options." )); |
| 34 | |
| 35 | static cl::opt<bool> SampleProfileRebalanceUnknown( |
| 36 | "sample-profile-rebalance-unknown" , cl::init(Val: true), cl::Hidden, |
| 37 | cl::desc("Evenly re-distribute flow among unknown subgraphs." )); |
| 38 | |
| 39 | static cl::opt<bool> SampleProfileJoinIslands( |
| 40 | "sample-profile-join-islands" , cl::init(Val: true), cl::Hidden, |
| 41 | cl::desc("Join isolated components having positive flow." )); |
| 42 | |
| 43 | static cl::opt<unsigned> SampleProfileProfiCostBlockInc( |
| 44 | "sample-profile-profi-cost-block-inc" , cl::init(Val: 10), cl::Hidden, |
| 45 | cl::desc("The cost of increasing a block's count by one." )); |
| 46 | |
| 47 | static cl::opt<unsigned> SampleProfileProfiCostBlockDec( |
| 48 | "sample-profile-profi-cost-block-dec" , cl::init(Val: 20), cl::Hidden, |
| 49 | cl::desc("The cost of decreasing a block's count by one." )); |
| 50 | |
| 51 | static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc( |
| 52 | "sample-profile-profi-cost-block-entry-inc" , cl::init(Val: 40), cl::Hidden, |
| 53 | cl::desc("The cost of increasing the entry block's count by one." )); |
| 54 | |
| 55 | static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec( |
| 56 | "sample-profile-profi-cost-block-entry-dec" , cl::init(Val: 10), cl::Hidden, |
| 57 | cl::desc("The cost of decreasing the entry block's count by one." )); |
| 58 | |
| 59 | static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc( |
| 60 | "sample-profile-profi-cost-block-zero-inc" , cl::init(Val: 11), cl::Hidden, |
| 61 | cl::desc("The cost of increasing a count of zero-weight block by one." )); |
| 62 | |
| 63 | static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc( |
| 64 | "sample-profile-profi-cost-block-unknown-inc" , cl::init(Val: 0), cl::Hidden, |
| 65 | cl::desc("The cost of increasing an unknown block's count by one." )); |
| 66 | |
| 67 | /// A value indicating an infinite flow/capacity/weight of a block/edge. |
| 68 | /// Not using numeric_limits<int64_t>::max(), as the values can be summed up |
| 69 | /// during the execution. |
| 70 | static constexpr int64_t INF = ((int64_t)1) << 50; |
| 71 | |
| 72 | /// The minimum-cost maximum flow algorithm. |
| 73 | /// |
| 74 | /// The algorithm finds the maximum flow of minimum cost on a given (directed) |
| 75 | /// network using a modified version of the classical Moore-Bellman-Ford |
| 76 | /// approach. The algorithm applies a number of augmentation iterations in which |
| 77 | /// flow is sent along paths of positive capacity from the source to the sink. |
| 78 | /// The worst-case time complexity of the implementation is O(v(f)*m*n), where |
| 79 | /// where m is the number of edges, n is the number of vertices, and v(f) is the |
| 80 | /// value of the maximum flow. However, the observed running time on typical |
| 81 | /// instances is sub-quadratic, that is, o(n^2). |
| 82 | /// |
| 83 | /// The input is a set of edges with specified costs and capacities, and a pair |
| 84 | /// of nodes (source and sink). The output is the flow along each edge of the |
| 85 | /// minimum total cost respecting the given edge capacities. |
| 86 | class MinCostMaxFlow { |
| 87 | public: |
| 88 | MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {} |
| 89 | |
| 90 | // Initialize algorithm's data structures for a network of a given size. |
| 91 | void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { |
| 92 | Source = SourceNode; |
| 93 | Target = SinkNode; |
| 94 | |
| 95 | Nodes = std::vector<Node>(NodeCount); |
| 96 | Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); |
| 97 | if (Params.EvenFlowDistribution) |
| 98 | AugmentingEdges = |
| 99 | std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); |
| 100 | } |
| 101 | |
| 102 | // Run the algorithm. |
| 103 | int64_t run() { |
| 104 | LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n" ); |
| 105 | |
| 106 | // Iteratively find an augmentation path/dag in the network and send the |
| 107 | // flow along its edges |
| 108 | size_t AugmentationIters = applyFlowAugmentation(); |
| 109 | |
| 110 | // Compute the total flow and its cost |
| 111 | int64_t TotalCost = 0; |
| 112 | int64_t TotalFlow = 0; |
| 113 | for (uint64_t Src = 0; Src < Nodes.size(); Src++) { |
| 114 | for (auto &Edge : Edges[Src]) { |
| 115 | if (Edge.Flow > 0) { |
| 116 | TotalCost += Edge.Cost * Edge.Flow; |
| 117 | if (Src == Source) |
| 118 | TotalFlow += Edge.Flow; |
| 119 | } |
| 120 | } |
| 121 | } |
| 122 | LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters |
| 123 | << " iterations with " << TotalFlow << " total flow" |
| 124 | << " of " << TotalCost << " cost\n" ); |
| 125 | (void)TotalFlow; |
| 126 | (void)AugmentationIters; |
| 127 | return TotalCost; |
| 128 | } |
| 129 | |
| 130 | /// Adding an edge to the network with a specified capacity and a cost. |
| 131 | /// Multiple edges between a pair of nodes are allowed but self-edges |
| 132 | /// are not supported. |
| 133 | void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { |
| 134 | assert(Capacity > 0 && "adding an edge of zero capacity" ); |
| 135 | assert(Src != Dst && "loop edge are not supported" ); |
| 136 | |
| 137 | Edge SrcEdge; |
| 138 | SrcEdge.Dst = Dst; |
| 139 | SrcEdge.Cost = Cost; |
| 140 | SrcEdge.Capacity = Capacity; |
| 141 | SrcEdge.Flow = 0; |
| 142 | SrcEdge.RevEdgeIndex = Edges[Dst].size(); |
| 143 | |
| 144 | Edge DstEdge; |
| 145 | DstEdge.Dst = Src; |
| 146 | DstEdge.Cost = -Cost; |
| 147 | DstEdge.Capacity = 0; |
| 148 | DstEdge.Flow = 0; |
| 149 | DstEdge.RevEdgeIndex = Edges[Src].size(); |
| 150 | |
| 151 | Edges[Src].push_back(x: SrcEdge); |
| 152 | Edges[Dst].push_back(x: DstEdge); |
| 153 | } |
| 154 | |
| 155 | /// Adding an edge to the network of infinite capacity and a given cost. |
| 156 | void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { |
| 157 | addEdge(Src, Dst, Capacity: INF, Cost); |
| 158 | } |
| 159 | |
| 160 | /// Get the total flow from a given source node. |
| 161 | /// Returns a list of pairs (target node, amount of flow to the target). |
| 162 | std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { |
| 163 | std::vector<std::pair<uint64_t, int64_t>> Flow; |
| 164 | for (const auto &Edge : Edges[Src]) { |
| 165 | if (Edge.Flow > 0) |
| 166 | Flow.push_back(x: std::make_pair(x: Edge.Dst, y: Edge.Flow)); |
| 167 | } |
| 168 | return Flow; |
| 169 | } |
| 170 | |
| 171 | /// Get the total flow between a pair of nodes. |
| 172 | int64_t getFlow(uint64_t Src, uint64_t Dst) const { |
| 173 | int64_t Flow = 0; |
| 174 | for (const auto &Edge : Edges[Src]) { |
| 175 | if (Edge.Dst == Dst) { |
| 176 | Flow += Edge.Flow; |
| 177 | } |
| 178 | } |
| 179 | return Flow; |
| 180 | } |
| 181 | |
| 182 | private: |
| 183 | /// Iteratively find an augmentation path/dag in the network and send the |
| 184 | /// flow along its edges. The method returns the number of applied iterations. |
| 185 | size_t applyFlowAugmentation() { |
| 186 | size_t AugmentationIters = 0; |
| 187 | while (findAugmentingPath()) { |
| 188 | uint64_t PathCapacity = computeAugmentingPathCapacity(); |
| 189 | while (PathCapacity > 0) { |
| 190 | bool Progress = false; |
| 191 | if (Params.EvenFlowDistribution) { |
| 192 | // Identify node/edge candidates for augmentation |
| 193 | identifyShortestEdges(PathCapacity); |
| 194 | |
| 195 | // Find an augmenting DAG |
| 196 | auto AugmentingOrder = findAugmentingDAG(); |
| 197 | |
| 198 | // Apply the DAG augmentation |
| 199 | Progress = augmentFlowAlongDAG(AugmentingOrder); |
| 200 | PathCapacity = computeAugmentingPathCapacity(); |
| 201 | } |
| 202 | |
| 203 | if (!Progress) { |
| 204 | augmentFlowAlongPath(PathCapacity); |
| 205 | PathCapacity = 0; |
| 206 | } |
| 207 | |
| 208 | AugmentationIters++; |
| 209 | } |
| 210 | } |
| 211 | return AugmentationIters; |
| 212 | } |
| 213 | |
| 214 | /// Compute the capacity of the cannonical augmenting path. If the path is |
| 215 | /// saturated (that is, no flow can be sent along the path), then return 0. |
| 216 | uint64_t computeAugmentingPathCapacity() { |
| 217 | uint64_t PathCapacity = INF; |
| 218 | uint64_t Now = Target; |
| 219 | while (Now != Source) { |
| 220 | uint64_t Pred = Nodes[Now].ParentNode; |
| 221 | auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; |
| 222 | |
| 223 | assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow" ); |
| 224 | uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); |
| 225 | PathCapacity = std::min(a: PathCapacity, b: EdgeCapacity); |
| 226 | |
| 227 | Now = Pred; |
| 228 | } |
| 229 | return PathCapacity; |
| 230 | } |
| 231 | |
| 232 | /// Check for existence of an augmenting path with a positive capacity. |
| 233 | bool findAugmentingPath() { |
| 234 | // Initialize data structures |
| 235 | for (auto &Node : Nodes) { |
| 236 | Node.Distance = INF; |
| 237 | Node.ParentNode = uint64_t(-1); |
| 238 | Node.ParentEdgeIndex = uint64_t(-1); |
| 239 | Node.Taken = false; |
| 240 | } |
| 241 | |
| 242 | std::queue<uint64_t> Queue; |
| 243 | Queue.push(x: Source); |
| 244 | Nodes[Source].Distance = 0; |
| 245 | Nodes[Source].Taken = true; |
| 246 | while (!Queue.empty()) { |
| 247 | uint64_t Src = Queue.front(); |
| 248 | Queue.pop(); |
| 249 | Nodes[Src].Taken = false; |
| 250 | // Although the residual network contains edges with negative costs |
| 251 | // (in particular, backward edges), it can be shown that there are no |
| 252 | // negative-weight cycles and the following two invariants are maintained: |
| 253 | // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, |
| 254 | // where Dist is the length of the shortest path between two nodes. This |
| 255 | // allows to prune the search-space of the path-finding algorithm using |
| 256 | // the following early-stop criteria: |
| 257 | // -- If we find a path with zero-distance from Source to Target, stop the |
| 258 | // search, as the path is the shortest since Dist[Source, Target] >= 0; |
| 259 | // -- If we have Dist[Source, V] > Dist[Source, Target], then do not |
| 260 | // process node V, as it is guaranteed _not_ to be on a shortest path |
| 261 | // from Source to Target; it follows from inequalities |
| 262 | // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] |
| 263 | // >= Dist[Source, V] |
| 264 | if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0) |
| 265 | break; |
| 266 | if (Nodes[Src].Distance > Nodes[Target].Distance) |
| 267 | continue; |
| 268 | |
| 269 | // Process adjacent edges |
| 270 | for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { |
| 271 | auto &Edge = Edges[Src][EdgeIdx]; |
| 272 | if (Edge.Flow < Edge.Capacity) { |
| 273 | uint64_t Dst = Edge.Dst; |
| 274 | int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; |
| 275 | if (Nodes[Dst].Distance > NewDistance) { |
| 276 | // Update the distance and the parent node/edge |
| 277 | Nodes[Dst].Distance = NewDistance; |
| 278 | Nodes[Dst].ParentNode = Src; |
| 279 | Nodes[Dst].ParentEdgeIndex = EdgeIdx; |
| 280 | // Add the node to the queue, if it is not there yet |
| 281 | if (!Nodes[Dst].Taken) { |
| 282 | Queue.push(x: Dst); |
| 283 | Nodes[Dst].Taken = true; |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | return Nodes[Target].Distance != INF; |
| 291 | } |
| 292 | |
| 293 | /// Update the current flow along the augmenting path. |
| 294 | void augmentFlowAlongPath(uint64_t PathCapacity) { |
| 295 | assert(PathCapacity > 0 && "found an incorrect augmenting path" ); |
| 296 | uint64_t Now = Target; |
| 297 | while (Now != Source) { |
| 298 | uint64_t Pred = Nodes[Now].ParentNode; |
| 299 | auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; |
| 300 | auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; |
| 301 | |
| 302 | Edge.Flow += PathCapacity; |
| 303 | RevEdge.Flow -= PathCapacity; |
| 304 | |
| 305 | Now = Pred; |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /// Find an Augmenting DAG order using a modified version of DFS in which we |
| 310 | /// can visit a node multiple times. In the DFS search, when scanning each |
| 311 | /// edge out of a node, continue search at Edge.Dst endpoint if it has not |
| 312 | /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm |
| 313 | /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. |
| 314 | /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) |
| 315 | /// that starts with Source and ends with Target. |
| 316 | std::vector<uint64_t> findAugmentingDAG() { |
| 317 | // We use a stack based implemenation of DFS to avoid recursion. |
| 318 | // Defining DFS data structures: |
| 319 | // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that |
| 320 | // - we are currently visiting Nodes[NodeIdx] and |
| 321 | // - the next edge to scan is Edges[NodeIdx][EdgeIdx] |
| 322 | typedef std::pair<uint64_t, uint64_t> StackItemType; |
| 323 | std::stack<StackItemType> Stack; |
| 324 | std::vector<uint64_t> AugmentingOrder; |
| 325 | |
| 326 | // Phase 0: Initialize Node attributes and Time for DFS run |
| 327 | for (auto &Node : Nodes) { |
| 328 | Node.Discovery = 0; |
| 329 | Node.Finish = 0; |
| 330 | Node.NumCalls = 0; |
| 331 | Node.Taken = false; |
| 332 | } |
| 333 | uint64_t Time = 0; |
| 334 | // Mark Target as Taken |
| 335 | // Taken attribute will be propagated backwards from Target towards Source |
| 336 | Nodes[Target].Taken = true; |
| 337 | |
| 338 | // Phase 1: Start DFS traversal from Source |
| 339 | Stack.emplace(args&: Source, args: 0); |
| 340 | Nodes[Source].Discovery = ++Time; |
| 341 | while (!Stack.empty()) { |
| 342 | auto NodeIdx = Stack.top().first; |
| 343 | auto EdgeIdx = Stack.top().second; |
| 344 | |
| 345 | // If we haven't scanned all edges out of NodeIdx, continue scanning |
| 346 | if (EdgeIdx < Edges[NodeIdx].size()) { |
| 347 | auto &Edge = Edges[NodeIdx][EdgeIdx]; |
| 348 | auto &Dst = Nodes[Edge.Dst]; |
| 349 | Stack.top().second++; |
| 350 | |
| 351 | if (Edge.OnShortestPath) { |
| 352 | // If we haven't seen Edge.Dst so far, continue DFS search there |
| 353 | if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) { |
| 354 | Dst.Discovery = ++Time; |
| 355 | Stack.emplace(args&: Edge.Dst, args: 0); |
| 356 | Dst.NumCalls++; |
| 357 | } else if (Dst.Taken && Dst.Finish != 0) { |
| 358 | // Else, if Edge.Dst already have a path to Target, so that NodeIdx |
| 359 | Nodes[NodeIdx].Taken = true; |
| 360 | } |
| 361 | } |
| 362 | } else { |
| 363 | // If we are done scanning all edge out of NodeIdx |
| 364 | Stack.pop(); |
| 365 | // If we haven't found a path from NodeIdx to Target, forget about it |
| 366 | if (!Nodes[NodeIdx].Taken) { |
| 367 | Nodes[NodeIdx].Discovery = 0; |
| 368 | } else { |
| 369 | // If we have found a path from NodeIdx to Target, then finish NodeIdx |
| 370 | // and propagate Taken flag to DFS parent unless at the Source |
| 371 | Nodes[NodeIdx].Finish = ++Time; |
| 372 | // NodeIdx == Source if and only if the stack is empty |
| 373 | if (NodeIdx != Source) { |
| 374 | assert(!Stack.empty() && "empty stack while running dfs" ); |
| 375 | Nodes[Stack.top().first].Taken = true; |
| 376 | } |
| 377 | AugmentingOrder.push_back(x: NodeIdx); |
| 378 | } |
| 379 | } |
| 380 | } |
| 381 | // Nodes are collected decreasing Finish time, so the order is reversed |
| 382 | std::reverse(first: AugmentingOrder.begin(), last: AugmentingOrder.end()); |
| 383 | |
| 384 | // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges |
| 385 | for (size_t Src : AugmentingOrder) { |
| 386 | AugmentingEdges[Src].clear(); |
| 387 | for (auto &Edge : Edges[Src]) { |
| 388 | uint64_t Dst = Edge.Dst; |
| 389 | if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && |
| 390 | Nodes[Dst].Finish < Nodes[Src].Finish) { |
| 391 | AugmentingEdges[Src].push_back(x: &Edge); |
| 392 | } |
| 393 | } |
| 394 | assert((Src == Target || !AugmentingEdges[Src].empty()) && |
| 395 | "incorrectly constructed augmenting edges" ); |
| 396 | } |
| 397 | |
| 398 | return AugmentingOrder; |
| 399 | } |
| 400 | |
| 401 | /// Update the current flow along the given (acyclic) subgraph specified by |
| 402 | /// the vertex order, AugmentingOrder. The objective is to send as much flow |
| 403 | /// as possible while evenly distributing flow among successors of each node. |
| 404 | /// After the update at least one edge is saturated. |
| 405 | bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { |
| 406 | // Phase 0: Initialization |
| 407 | for (uint64_t Src : AugmentingOrder) { |
| 408 | Nodes[Src].FracFlow = 0; |
| 409 | Nodes[Src].IntFlow = 0; |
| 410 | for (auto &Edge : AugmentingEdges[Src]) { |
| 411 | Edge->AugmentedFlow = 0; |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | // Phase 1: Send a unit of fractional flow along the DAG |
| 416 | uint64_t MaxFlowAmount = INF; |
| 417 | Nodes[Source].FracFlow = 1.0; |
| 418 | for (uint64_t Src : AugmentingOrder) { |
| 419 | assert((Src == Target || Nodes[Src].FracFlow > 0.0) && |
| 420 | "incorrectly computed fractional flow" ); |
| 421 | // Distribute flow evenly among successors of Src |
| 422 | uint64_t Degree = AugmentingEdges[Src].size(); |
| 423 | for (auto &Edge : AugmentingEdges[Src]) { |
| 424 | double EdgeFlow = Nodes[Src].FracFlow / Degree; |
| 425 | Nodes[Edge->Dst].FracFlow += EdgeFlow; |
| 426 | if (Edge->Capacity == INF) |
| 427 | continue; |
| 428 | uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; |
| 429 | MaxFlowAmount = std::min(a: MaxFlowAmount, b: MaxIntFlow); |
| 430 | } |
| 431 | } |
| 432 | // Stop early if we cannot send any (integral) flow from Source to Target |
| 433 | if (MaxFlowAmount == 0) |
| 434 | return false; |
| 435 | |
| 436 | // Phase 2: Send an integral flow of MaxFlowAmount |
| 437 | Nodes[Source].IntFlow = MaxFlowAmount; |
| 438 | for (uint64_t Src : AugmentingOrder) { |
| 439 | if (Src == Target) |
| 440 | break; |
| 441 | // Distribute flow evenly among successors of Src, rounding up to make |
| 442 | // sure all flow is sent |
| 443 | uint64_t Degree = AugmentingEdges[Src].size(); |
| 444 | // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree |
| 445 | uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; |
| 446 | for (auto &Edge : AugmentingEdges[Src]) { |
| 447 | uint64_t Dst = Edge->Dst; |
| 448 | uint64_t EdgeFlow = std::min(a: Nodes[Src].IntFlow, b: SuccFlow); |
| 449 | EdgeFlow = std::min(a: EdgeFlow, b: uint64_t(Edge->Capacity - Edge->Flow)); |
| 450 | Nodes[Dst].IntFlow += EdgeFlow; |
| 451 | Nodes[Src].IntFlow -= EdgeFlow; |
| 452 | Edge->AugmentedFlow += EdgeFlow; |
| 453 | } |
| 454 | } |
| 455 | assert(Nodes[Target].IntFlow <= MaxFlowAmount); |
| 456 | Nodes[Target].IntFlow = 0; |
| 457 | |
| 458 | // Phase 3: Send excess flow back traversing the nodes backwards. |
| 459 | // Because of rounding, not all flow can be sent along the edges of Src. |
| 460 | // Hence, sending the remaining flow back to maintain flow conservation |
| 461 | for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { |
| 462 | uint64_t Src = AugmentingOrder[Idx - 1]; |
| 463 | // Try to send excess flow back along each edge. |
| 464 | // Make sure we only send back flow we just augmented (AugmentedFlow). |
| 465 | for (auto &Edge : AugmentingEdges[Src]) { |
| 466 | uint64_t Dst = Edge->Dst; |
| 467 | if (Nodes[Dst].IntFlow == 0) |
| 468 | continue; |
| 469 | uint64_t EdgeFlow = std::min(a: Nodes[Dst].IntFlow, b: Edge->AugmentedFlow); |
| 470 | Nodes[Dst].IntFlow -= EdgeFlow; |
| 471 | Nodes[Src].IntFlow += EdgeFlow; |
| 472 | Edge->AugmentedFlow -= EdgeFlow; |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | // Phase 4: Update flow values along all edges |
| 477 | bool HasSaturatedEdges = false; |
| 478 | for (uint64_t Src : AugmentingOrder) { |
| 479 | // Verify that we have sent all the excess flow from the node |
| 480 | assert(Src == Source || Nodes[Src].IntFlow == 0); |
| 481 | for (auto &Edge : AugmentingEdges[Src]) { |
| 482 | assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); |
| 483 | // Update flow values along the edge and its reverse copy |
| 484 | auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; |
| 485 | Edge->Flow += Edge->AugmentedFlow; |
| 486 | RevEdge.Flow -= Edge->AugmentedFlow; |
| 487 | if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) |
| 488 | HasSaturatedEdges = true; |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | // The augmentation is successful iff at least one edge becomes saturated |
| 493 | return HasSaturatedEdges; |
| 494 | } |
| 495 | |
| 496 | /// Identify candidate (shortest) edges for augmentation. |
| 497 | void identifyShortestEdges(uint64_t PathCapacity) { |
| 498 | assert(PathCapacity > 0 && "found an incorrect augmenting DAG" ); |
| 499 | // To make sure the augmentation DAG contains only edges with large residual |
| 500 | // capacity, we prune all edges whose capacity is below a fraction of |
| 501 | // the capacity of the augmented path. |
| 502 | // (All edges of the path itself are always in the DAG) |
| 503 | uint64_t MinCapacity = std::max(a: PathCapacity / 2, b: uint64_t(1)); |
| 504 | |
| 505 | // Decide which edges are on a shortest path from Source to Target |
| 506 | for (size_t Src = 0; Src < Nodes.size(); Src++) { |
| 507 | // An edge cannot be augmenting if the endpoint has large distance |
| 508 | if (Nodes[Src].Distance > Nodes[Target].Distance) |
| 509 | continue; |
| 510 | |
| 511 | for (auto &Edge : Edges[Src]) { |
| 512 | uint64_t Dst = Edge.Dst; |
| 513 | Edge.OnShortestPath = |
| 514 | Src != Target && Dst != Source && |
| 515 | Nodes[Dst].Distance <= Nodes[Target].Distance && |
| 516 | Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && |
| 517 | Edge.Capacity > Edge.Flow && |
| 518 | uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; |
| 519 | } |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | /// Maximum number of DFS iterations for DAG finding. |
| 524 | static constexpr uint64_t MaxDfsCalls = 10; |
| 525 | |
| 526 | /// A node in a flow network. |
| 527 | struct Node { |
| 528 | /// The cost of the cheapest path from the source to the current node. |
| 529 | int64_t Distance; |
| 530 | /// The node preceding the current one in the path. |
| 531 | uint64_t ParentNode; |
| 532 | /// The index of the edge between ParentNode and the current node. |
| 533 | uint64_t ParentEdgeIndex; |
| 534 | /// An indicator of whether the current node is in a queue. |
| 535 | bool Taken; |
| 536 | |
| 537 | /// Data fields utilized in DAG-augmentation: |
| 538 | /// Fractional flow. |
| 539 | double FracFlow; |
| 540 | /// Integral flow. |
| 541 | uint64_t IntFlow; |
| 542 | /// Discovery time. |
| 543 | uint64_t Discovery; |
| 544 | /// Finish time. |
| 545 | uint64_t Finish; |
| 546 | /// NumCalls. |
| 547 | uint64_t NumCalls; |
| 548 | }; |
| 549 | |
| 550 | /// An edge in a flow network. |
| 551 | struct Edge { |
| 552 | /// The cost of the edge. |
| 553 | int64_t Cost; |
| 554 | /// The capacity of the edge. |
| 555 | int64_t Capacity; |
| 556 | /// The current flow on the edge. |
| 557 | int64_t Flow; |
| 558 | /// The destination node of the edge. |
| 559 | uint64_t Dst; |
| 560 | /// The index of the reverse edge between Dst and the current node. |
| 561 | uint64_t RevEdgeIndex; |
| 562 | |
| 563 | /// Data fields utilized in DAG-augmentation: |
| 564 | /// Whether the edge is currently on a shortest path from Source to Target. |
| 565 | bool OnShortestPath; |
| 566 | /// Extra flow along the edge. |
| 567 | uint64_t AugmentedFlow; |
| 568 | }; |
| 569 | |
| 570 | /// The set of network nodes. |
| 571 | std::vector<Node> Nodes; |
| 572 | /// The set of network edges. |
| 573 | std::vector<std::vector<Edge>> Edges; |
| 574 | /// Source node of the flow. |
| 575 | uint64_t Source; |
| 576 | /// Target (sink) node of the flow. |
| 577 | uint64_t Target; |
| 578 | /// Augmenting edges. |
| 579 | std::vector<std::vector<Edge *>> AugmentingEdges; |
| 580 | /// Params for flow computation. |
| 581 | const ProfiParams &Params; |
| 582 | }; |
| 583 | |
| 584 | /// A post-processing adjustment of the control flow. It applies two steps by |
| 585 | /// rerouting some flow and making it more realistic: |
| 586 | /// |
| 587 | /// - First, it removes all isolated components ("islands") with a positive flow |
| 588 | /// that are unreachable from the entry block. For every such component, we |
| 589 | /// find the shortest from the entry to an exit passing through the component, |
| 590 | /// and increase the flow by one unit along the path. |
| 591 | /// |
| 592 | /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks |
| 593 | /// with no sampled counts. Then it rebalnces the flow that goes through such |
| 594 | /// a subgraph so that each branch is taken with probability 50%. |
| 595 | /// An unknown subgraph is such that for every two nodes u and v: |
| 596 | /// - u dominates v and u is not unknown; |
| 597 | /// - v post-dominates u; and |
| 598 | /// - all inner-nodes of all (u,v)-paths are unknown. |
| 599 | /// |
| 600 | class FlowAdjuster { |
| 601 | public: |
| 602 | FlowAdjuster(const ProfiParams &Params, FlowFunction &Func) |
| 603 | : Params(Params), Func(Func) {} |
| 604 | |
| 605 | /// Apply the post-processing. |
| 606 | void run() { |
| 607 | if (Params.JoinIslands) { |
| 608 | // Adjust the flow to get rid of isolated components |
| 609 | joinIsolatedComponents(); |
| 610 | } |
| 611 | |
| 612 | if (Params.RebalanceUnknown) { |
| 613 | // Rebalance the flow inside unknown subgraphs |
| 614 | rebalanceUnknownSubgraphs(); |
| 615 | } |
| 616 | } |
| 617 | |
| 618 | private: |
| 619 | void joinIsolatedComponents() { |
| 620 | // Find blocks that are reachable from the source |
| 621 | auto Visited = BitVector(NumBlocks(), false); |
| 622 | findReachable(Src: Func.Entry, Visited); |
| 623 | |
| 624 | // Iterate over all non-reachable blocks and adjust their weights |
| 625 | for (uint64_t I = 0; I < NumBlocks(); I++) { |
| 626 | auto &Block = Func.Blocks[I]; |
| 627 | if (Block.Flow > 0 && !Visited[I]) { |
| 628 | // Find a path from the entry to an exit passing through the block I |
| 629 | auto Path = findShortestPath(BlockIdx: I); |
| 630 | // Increase the flow along the path |
| 631 | assert(Path.size() > 0 && Path[0]->Source == Func.Entry && |
| 632 | "incorrectly computed path adjusting control flow" ); |
| 633 | Func.Blocks[Func.Entry].Flow += 1; |
| 634 | for (auto &Jump : Path) { |
| 635 | Jump->Flow += 1; |
| 636 | Func.Blocks[Jump->Target].Flow += 1; |
| 637 | // Update reachability |
| 638 | findReachable(Src: Jump->Target, Visited); |
| 639 | } |
| 640 | } |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | /// Run BFS from a given block along the jumps with a positive flow and mark |
| 645 | /// all reachable blocks. |
| 646 | void findReachable(uint64_t Src, BitVector &Visited) { |
| 647 | if (Visited[Src]) |
| 648 | return; |
| 649 | std::queue<uint64_t> Queue; |
| 650 | Queue.push(x: Src); |
| 651 | Visited[Src] = true; |
| 652 | while (!Queue.empty()) { |
| 653 | Src = Queue.front(); |
| 654 | Queue.pop(); |
| 655 | for (auto *Jump : Func.Blocks[Src].SuccJumps) { |
| 656 | uint64_t Dst = Jump->Target; |
| 657 | if (Jump->Flow > 0 && !Visited[Dst]) { |
| 658 | Queue.push(x: Dst); |
| 659 | Visited[Dst] = true; |
| 660 | } |
| 661 | } |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | /// Find the shortest path from the entry block to an exit block passing |
| 666 | /// through a given block. |
| 667 | std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { |
| 668 | // A path from the entry block to BlockIdx |
| 669 | auto ForwardPath = findShortestPath(Source: Func.Entry, Target: BlockIdx); |
| 670 | // A path from BlockIdx to an exit block |
| 671 | auto BackwardPath = findShortestPath(Source: BlockIdx, Target: AnyExitBlock); |
| 672 | |
| 673 | // Concatenate the two paths |
| 674 | std::vector<FlowJump *> Result; |
| 675 | llvm::append_range(C&: Result, R&: ForwardPath); |
| 676 | llvm::append_range(C&: Result, R&: BackwardPath); |
| 677 | return Result; |
| 678 | } |
| 679 | |
| 680 | /// Apply the Dijkstra algorithm to find the shortest path from a given |
| 681 | /// Source to a given Target block. |
| 682 | /// If Target == -1, then the path ends at an exit block. |
| 683 | std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { |
| 684 | // Quit early, if possible |
| 685 | if (Source == Target) |
| 686 | return std::vector<FlowJump *>(); |
| 687 | if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) |
| 688 | return std::vector<FlowJump *>(); |
| 689 | |
| 690 | // Initialize data structures |
| 691 | auto Distance = std::vector<int64_t>(NumBlocks(), INF); |
| 692 | auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); |
| 693 | Distance[Source] = 0; |
| 694 | std::set<std::pair<uint64_t, uint64_t>> Queue; |
| 695 | Queue.insert(x: std::make_pair(x&: Distance[Source], y&: Source)); |
| 696 | |
| 697 | // Run the Dijkstra algorithm |
| 698 | while (!Queue.empty()) { |
| 699 | uint64_t Src = Queue.begin()->second; |
| 700 | Queue.erase(position: Queue.begin()); |
| 701 | // If we found a solution, quit early |
| 702 | if (Src == Target || |
| 703 | (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) |
| 704 | break; |
| 705 | |
| 706 | for (auto *Jump : Func.Blocks[Src].SuccJumps) { |
| 707 | uint64_t Dst = Jump->Target; |
| 708 | int64_t JumpDist = jumpDistance(Jump); |
| 709 | if (Distance[Dst] > Distance[Src] + JumpDist) { |
| 710 | Queue.erase(x: std::make_pair(x&: Distance[Dst], y&: Dst)); |
| 711 | |
| 712 | Distance[Dst] = Distance[Src] + JumpDist; |
| 713 | Parent[Dst] = Jump; |
| 714 | |
| 715 | Queue.insert(x: std::make_pair(x&: Distance[Dst], y&: Dst)); |
| 716 | } |
| 717 | } |
| 718 | } |
| 719 | // If Target is not provided, find the closest exit block |
| 720 | if (Target == AnyExitBlock) { |
| 721 | for (uint64_t I = 0; I < NumBlocks(); I++) { |
| 722 | if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { |
| 723 | if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { |
| 724 | Target = I; |
| 725 | } |
| 726 | } |
| 727 | } |
| 728 | } |
| 729 | assert(Parent[Target] != nullptr && "a path does not exist" ); |
| 730 | |
| 731 | // Extract the constructed path |
| 732 | std::vector<FlowJump *> Result; |
| 733 | uint64_t Now = Target; |
| 734 | while (Now != Source) { |
| 735 | assert(Now == Parent[Now]->Target && "incorrect parent jump" ); |
| 736 | Result.push_back(x: Parent[Now]); |
| 737 | Now = Parent[Now]->Source; |
| 738 | } |
| 739 | // Reverse the path, since it is extracted from Target to Source |
| 740 | std::reverse(first: Result.begin(), last: Result.end()); |
| 741 | return Result; |
| 742 | } |
| 743 | |
| 744 | /// A distance of a path for a given jump. |
| 745 | /// In order to incite the path to use blocks/jumps with large positive flow, |
| 746 | /// and avoid changing branch probability of outgoing edges drastically, |
| 747 | /// set the jump distance so as: |
| 748 | /// - to minimize the number of unlikely jumps used and subject to that, |
| 749 | /// - to minimize the number of Flow == 0 jumps used and subject to that, |
| 750 | /// - minimizes total multiplicative Flow increase for the remaining edges. |
| 751 | /// To capture this objective with integer distances, we round off fractional |
| 752 | /// parts to a multiple of 1 / BaseDistance. |
| 753 | int64_t jumpDistance(FlowJump *Jump) const { |
| 754 | if (Jump->IsUnlikely) |
| 755 | return Params.CostUnlikely; |
| 756 | uint64_t BaseDistance = |
| 757 | std::max(a: FlowAdjuster::MinBaseDistance, |
| 758 | b: std::min(a: Func.Blocks[Func.Entry].Flow, |
| 759 | b: Params.CostUnlikely / (2 * (NumBlocks() + 1)))); |
| 760 | if (Jump->Flow > 0) |
| 761 | return BaseDistance + BaseDistance / Jump->Flow; |
| 762 | return 2 * BaseDistance * (NumBlocks() + 1); |
| 763 | }; |
| 764 | |
| 765 | uint64_t NumBlocks() const { return Func.Blocks.size(); } |
| 766 | |
| 767 | /// Rebalance unknown subgraphs so that the flow is split evenly across the |
| 768 | /// outgoing branches of every block of the subgraph. The method iterates over |
| 769 | /// blocks with known weight and identifies unknown subgraphs rooted at the |
| 770 | /// blocks. Then it verifies if flow rebalancing is feasible and applies it. |
| 771 | void rebalanceUnknownSubgraphs() { |
| 772 | // Try to find unknown subgraphs from each block |
| 773 | for (const FlowBlock &SrcBlock : Func.Blocks) { |
| 774 | // Verify if rebalancing rooted at SrcBlock is feasible |
| 775 | if (!canRebalanceAtRoot(SrcBlock: &SrcBlock)) |
| 776 | continue; |
| 777 | |
| 778 | // Find an unknown subgraphs starting at SrcBlock. Along the way, |
| 779 | // fill in known destinations and intermediate unknown blocks. |
| 780 | std::vector<FlowBlock *> UnknownBlocks; |
| 781 | std::vector<FlowBlock *> KnownDstBlocks; |
| 782 | findUnknownSubgraph(SrcBlock: &SrcBlock, KnownDstBlocks, UnknownBlocks); |
| 783 | |
| 784 | // Verify if rebalancing of the subgraph is feasible. If the search is |
| 785 | // successful, find the unique destination block (which can be null) |
| 786 | FlowBlock *DstBlock = nullptr; |
| 787 | if (!canRebalanceSubgraph(SrcBlock: &SrcBlock, KnownDstBlocks, UnknownBlocks, |
| 788 | DstBlock)) |
| 789 | continue; |
| 790 | |
| 791 | // We cannot rebalance subgraphs containing cycles among unknown blocks |
| 792 | if (!isAcyclicSubgraph(SrcBlock: &SrcBlock, DstBlock, UnknownBlocks)) |
| 793 | continue; |
| 794 | |
| 795 | // Rebalance the flow |
| 796 | rebalanceUnknownSubgraph(SrcBlock: &SrcBlock, DstBlock, UnknownBlocks); |
| 797 | } |
| 798 | } |
| 799 | |
| 800 | /// Verify if rebalancing rooted at a given block is possible. |
| 801 | bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { |
| 802 | // Do not attempt to find unknown subgraphs from an unknown or a |
| 803 | // zero-flow block |
| 804 | if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0) |
| 805 | return false; |
| 806 | |
| 807 | // Do not attempt to process subgraphs from a block w/o unknown sucessors |
| 808 | bool HasUnknownSuccs = false; |
| 809 | for (auto *Jump : SrcBlock->SuccJumps) { |
| 810 | if (Func.Blocks[Jump->Target].HasUnknownWeight) { |
| 811 | HasUnknownSuccs = true; |
| 812 | break; |
| 813 | } |
| 814 | } |
| 815 | if (!HasUnknownSuccs) |
| 816 | return false; |
| 817 | |
| 818 | return true; |
| 819 | } |
| 820 | |
| 821 | /// Find an unknown subgraph starting at block SrcBlock. The method sets |
| 822 | /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. |
| 823 | void findUnknownSubgraph(const FlowBlock *SrcBlock, |
| 824 | std::vector<FlowBlock *> &KnownDstBlocks, |
| 825 | std::vector<FlowBlock *> &UnknownBlocks) { |
| 826 | // Run BFS from SrcBlock and make sure all paths are going through unknown |
| 827 | // blocks and end at a known DstBlock |
| 828 | auto Visited = BitVector(NumBlocks(), false); |
| 829 | std::queue<uint64_t> Queue; |
| 830 | |
| 831 | Queue.push(x: SrcBlock->Index); |
| 832 | Visited[SrcBlock->Index] = true; |
| 833 | while (!Queue.empty()) { |
| 834 | auto &Block = Func.Blocks[Queue.front()]; |
| 835 | Queue.pop(); |
| 836 | // Process blocks reachable from Block |
| 837 | for (auto *Jump : Block.SuccJumps) { |
| 838 | // If Jump can be ignored, skip it |
| 839 | if (ignoreJump(SrcBlock, DstBlock: nullptr, Jump)) |
| 840 | continue; |
| 841 | |
| 842 | uint64_t Dst = Jump->Target; |
| 843 | // If Dst has been visited, skip Jump |
| 844 | if (Visited[Dst]) |
| 845 | continue; |
| 846 | // Process block Dst |
| 847 | Visited[Dst] = true; |
| 848 | if (!Func.Blocks[Dst].HasUnknownWeight) { |
| 849 | KnownDstBlocks.push_back(x: &Func.Blocks[Dst]); |
| 850 | } else { |
| 851 | Queue.push(x: Dst); |
| 852 | UnknownBlocks.push_back(x: &Func.Blocks[Dst]); |
| 853 | } |
| 854 | } |
| 855 | } |
| 856 | } |
| 857 | |
| 858 | /// Verify if rebalancing of the subgraph is feasible. If the checks are |
| 859 | /// successful, set the unique destination block, DstBlock (can be null). |
| 860 | bool canRebalanceSubgraph(const FlowBlock *SrcBlock, |
| 861 | const std::vector<FlowBlock *> &KnownDstBlocks, |
| 862 | const std::vector<FlowBlock *> &UnknownBlocks, |
| 863 | FlowBlock *&DstBlock) { |
| 864 | // If the list of unknown blocks is empty, we don't need rebalancing |
| 865 | if (UnknownBlocks.empty()) |
| 866 | return false; |
| 867 | |
| 868 | // If there are multiple known sinks, we can't rebalance |
| 869 | if (KnownDstBlocks.size() > 1) |
| 870 | return false; |
| 871 | DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); |
| 872 | |
| 873 | // Verify sinks of the subgraph |
| 874 | for (auto *Block : UnknownBlocks) { |
| 875 | if (Block->SuccJumps.empty()) { |
| 876 | // If there are multiple (known and unknown) sinks, we can't rebalance |
| 877 | if (DstBlock != nullptr) |
| 878 | return false; |
| 879 | continue; |
| 880 | } |
| 881 | size_t NumIgnoredJumps = 0; |
| 882 | for (auto *Jump : Block->SuccJumps) { |
| 883 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 884 | NumIgnoredJumps++; |
| 885 | } |
| 886 | // If there is a non-sink block in UnknownBlocks with all jumps ignored, |
| 887 | // then we can't rebalance |
| 888 | if (NumIgnoredJumps == Block->SuccJumps.size()) |
| 889 | return false; |
| 890 | } |
| 891 | |
| 892 | return true; |
| 893 | } |
| 894 | |
| 895 | /// Decide whether the Jump is ignored while processing an unknown subgraphs |
| 896 | /// rooted at basic block SrcBlock with the destination block, DstBlock. |
| 897 | bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, |
| 898 | const FlowJump *Jump) { |
| 899 | // Ignore unlikely jumps with zero flow |
| 900 | if (Jump->IsUnlikely && Jump->Flow == 0) |
| 901 | return true; |
| 902 | |
| 903 | auto JumpSource = &Func.Blocks[Jump->Source]; |
| 904 | auto JumpTarget = &Func.Blocks[Jump->Target]; |
| 905 | |
| 906 | // Do not ignore jumps coming into DstBlock |
| 907 | if (DstBlock != nullptr && JumpTarget == DstBlock) |
| 908 | return false; |
| 909 | |
| 910 | // Ignore jumps out of SrcBlock to known blocks |
| 911 | if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock) |
| 912 | return true; |
| 913 | |
| 914 | // Ignore jumps to known blocks with zero flow |
| 915 | if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0) |
| 916 | return true; |
| 917 | |
| 918 | return false; |
| 919 | } |
| 920 | |
| 921 | /// Verify if the given unknown subgraph is acyclic, and if yes, reorder |
| 922 | /// UnknownBlocks in the topological order (so that all jumps are "forward"). |
| 923 | bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, |
| 924 | std::vector<FlowBlock *> &UnknownBlocks) { |
| 925 | // Extract local in-degrees in the considered subgraph |
| 926 | auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); |
| 927 | auto fillInDegree = [&](const FlowBlock *Block) { |
| 928 | for (auto *Jump : Block->SuccJumps) { |
| 929 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 930 | continue; |
| 931 | LocalInDegree[Jump->Target]++; |
| 932 | } |
| 933 | }; |
| 934 | fillInDegree(SrcBlock); |
| 935 | for (auto *Block : UnknownBlocks) { |
| 936 | fillInDegree(Block); |
| 937 | } |
| 938 | // A loop containing SrcBlock |
| 939 | if (LocalInDegree[SrcBlock->Index] > 0) |
| 940 | return false; |
| 941 | |
| 942 | std::vector<FlowBlock *> AcyclicOrder; |
| 943 | std::queue<uint64_t> Queue; |
| 944 | Queue.push(x: SrcBlock->Index); |
| 945 | while (!Queue.empty()) { |
| 946 | FlowBlock *Block = &Func.Blocks[Queue.front()]; |
| 947 | Queue.pop(); |
| 948 | // Stop propagation once we reach DstBlock, if any |
| 949 | if (DstBlock != nullptr && Block == DstBlock) |
| 950 | break; |
| 951 | |
| 952 | // Keep an acyclic order of unknown blocks |
| 953 | if (Block->HasUnknownWeight && Block != SrcBlock) |
| 954 | AcyclicOrder.push_back(x: Block); |
| 955 | |
| 956 | // Add to the queue all successors with zero local in-degree |
| 957 | for (auto *Jump : Block->SuccJumps) { |
| 958 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 959 | continue; |
| 960 | uint64_t Dst = Jump->Target; |
| 961 | LocalInDegree[Dst]--; |
| 962 | if (LocalInDegree[Dst] == 0) { |
| 963 | Queue.push(x: Dst); |
| 964 | } |
| 965 | } |
| 966 | } |
| 967 | |
| 968 | // If there is a cycle in the subgraph, AcyclicOrder contains only a subset |
| 969 | // of all blocks |
| 970 | if (UnknownBlocks.size() != AcyclicOrder.size()) |
| 971 | return false; |
| 972 | UnknownBlocks = AcyclicOrder; |
| 973 | return true; |
| 974 | } |
| 975 | |
| 976 | /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and |
| 977 | /// having UnknownBlocks intermediate blocks. |
| 978 | void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, |
| 979 | const FlowBlock *DstBlock, |
| 980 | const std::vector<FlowBlock *> &UnknownBlocks) { |
| 981 | assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph" ); |
| 982 | |
| 983 | // Ditribute flow from the source block |
| 984 | uint64_t BlockFlow = 0; |
| 985 | // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps |
| 986 | for (auto *Jump : SrcBlock->SuccJumps) { |
| 987 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 988 | continue; |
| 989 | BlockFlow += Jump->Flow; |
| 990 | } |
| 991 | rebalanceBlock(SrcBlock, DstBlock, Block: SrcBlock, BlockFlow); |
| 992 | |
| 993 | // Ditribute flow from the remaining blocks |
| 994 | for (auto *Block : UnknownBlocks) { |
| 995 | assert(Block->HasUnknownWeight && "incorrect unknown subgraph" ); |
| 996 | uint64_t BlockFlow = 0; |
| 997 | // Block's flow is the sum of incoming flows |
| 998 | for (auto *Jump : Block->PredJumps) { |
| 999 | BlockFlow += Jump->Flow; |
| 1000 | } |
| 1001 | Block->Flow = BlockFlow; |
| 1002 | rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); |
| 1003 | } |
| 1004 | } |
| 1005 | |
| 1006 | /// Redistribute flow for a block in a subgraph rooted at SrcBlock, |
| 1007 | /// and ending at DstBlock. |
| 1008 | void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, |
| 1009 | const FlowBlock *Block, uint64_t BlockFlow) { |
| 1010 | // Process all successor jumps and update corresponding flow values |
| 1011 | size_t BlockDegree = 0; |
| 1012 | for (auto *Jump : Block->SuccJumps) { |
| 1013 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 1014 | continue; |
| 1015 | BlockDegree++; |
| 1016 | } |
| 1017 | // If all successor jumps of the block are ignored, skip it |
| 1018 | if (DstBlock == nullptr && BlockDegree == 0) |
| 1019 | return; |
| 1020 | assert(BlockDegree > 0 && "all outgoing jumps are ignored" ); |
| 1021 | |
| 1022 | // Each of the Block's successors gets the following amount of flow. |
| 1023 | // Rounding the value up so that all flow is propagated |
| 1024 | uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; |
| 1025 | for (auto *Jump : Block->SuccJumps) { |
| 1026 | if (ignoreJump(SrcBlock, DstBlock, Jump)) |
| 1027 | continue; |
| 1028 | uint64_t Flow = std::min(a: SuccFlow, b: BlockFlow); |
| 1029 | Jump->Flow = Flow; |
| 1030 | BlockFlow -= Flow; |
| 1031 | } |
| 1032 | assert(BlockFlow == 0 && "not all flow is propagated" ); |
| 1033 | } |
| 1034 | |
| 1035 | /// A constant indicating an arbitrary exit block of a function. |
| 1036 | static constexpr uint64_t AnyExitBlock = uint64_t(-1); |
| 1037 | /// Minimum BaseDistance for the jump distance values in island joining. |
| 1038 | static constexpr uint64_t MinBaseDistance = 10000; |
| 1039 | |
| 1040 | /// Params for flow computation. |
| 1041 | const ProfiParams &Params; |
| 1042 | /// The function. |
| 1043 | FlowFunction &Func; |
| 1044 | }; |
| 1045 | |
| 1046 | std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, |
| 1047 | const FlowBlock &Block); |
| 1048 | std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, |
| 1049 | const FlowJump &Jump); |
| 1050 | |
| 1051 | /// Initializing flow network for a given function. |
| 1052 | /// |
| 1053 | /// Every block is split into two nodes that are responsible for (i) an |
| 1054 | /// incoming flow, (ii) an outgoing flow; they penalize an increase or a |
| 1055 | /// reduction of the block weight. |
| 1056 | void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network, |
| 1057 | FlowFunction &Func) { |
| 1058 | uint64_t NumBlocks = Func.Blocks.size(); |
| 1059 | assert(NumBlocks > 1 && "Too few blocks in a function" ); |
| 1060 | uint64_t NumJumps = Func.Jumps.size(); |
| 1061 | assert(NumJumps > 0 && "Too few jumps in a function" ); |
| 1062 | |
| 1063 | // Introducing dummy source/sink pairs to allow flow circulation. |
| 1064 | // The nodes corresponding to blocks of the function have indices in |
| 1065 | // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the |
| 1066 | // next four values. |
| 1067 | uint64_t S = 2 * NumBlocks; |
| 1068 | uint64_t T = S + 1; |
| 1069 | uint64_t S1 = S + 2; |
| 1070 | uint64_t T1 = S + 3; |
| 1071 | |
| 1072 | Network.initialize(NodeCount: 2 * NumBlocks + 4, SourceNode: S1, SinkNode: T1); |
| 1073 | |
| 1074 | // Initialize nodes of the flow network |
| 1075 | for (uint64_t B = 0; B < NumBlocks; B++) { |
| 1076 | auto &Block = Func.Blocks[B]; |
| 1077 | |
| 1078 | // Split every block into two auxiliary nodes to allow |
| 1079 | // increase/reduction of the block count. |
| 1080 | uint64_t Bin = 2 * B; |
| 1081 | uint64_t Bout = 2 * B + 1; |
| 1082 | |
| 1083 | // Edges from S and to T |
| 1084 | if (Block.isEntry()) { |
| 1085 | Network.addEdge(Src: S, Dst: Bin, Cost: 0); |
| 1086 | } else if (Block.isExit()) { |
| 1087 | Network.addEdge(Src: Bout, Dst: T, Cost: 0); |
| 1088 | } |
| 1089 | |
| 1090 | // Assign costs for increasing/decreasing the block counts |
| 1091 | auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block); |
| 1092 | |
| 1093 | // Add the corresponding edges to the network |
| 1094 | Network.addEdge(Src: Bin, Dst: Bout, Cost: AuxCostInc); |
| 1095 | if (Block.Weight > 0) { |
| 1096 | Network.addEdge(Src: Bout, Dst: Bin, Capacity: Block.Weight, Cost: AuxCostDec); |
| 1097 | Network.addEdge(Src: S1, Dst: Bout, Capacity: Block.Weight, Cost: 0); |
| 1098 | Network.addEdge(Src: Bin, Dst: T1, Capacity: Block.Weight, Cost: 0); |
| 1099 | } |
| 1100 | } |
| 1101 | |
| 1102 | // Initialize edges of the flow network |
| 1103 | for (uint64_t J = 0; J < NumJumps; J++) { |
| 1104 | auto &Jump = Func.Jumps[J]; |
| 1105 | |
| 1106 | // Get the endpoints corresponding to the jump |
| 1107 | uint64_t Jin = 2 * Jump.Source + 1; |
| 1108 | uint64_t Jout = 2 * Jump.Target; |
| 1109 | |
| 1110 | // Assign costs for increasing/decreasing the jump counts |
| 1111 | auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump); |
| 1112 | |
| 1113 | // Add the corresponding edges to the network |
| 1114 | Network.addEdge(Src: Jin, Dst: Jout, Cost: AuxCostInc); |
| 1115 | if (Jump.Weight > 0) { |
| 1116 | Network.addEdge(Src: Jout, Dst: Jin, Capacity: Jump.Weight, Cost: AuxCostDec); |
| 1117 | Network.addEdge(Src: S1, Dst: Jout, Capacity: Jump.Weight, Cost: 0); |
| 1118 | Network.addEdge(Src: Jin, Dst: T1, Capacity: Jump.Weight, Cost: 0); |
| 1119 | } |
| 1120 | } |
| 1121 | |
| 1122 | // Make sure we have a valid flow circulation |
| 1123 | Network.addEdge(Src: T, Dst: S, Cost: 0); |
| 1124 | } |
| 1125 | |
| 1126 | /// Assign costs for increasing/decreasing the block counts. |
| 1127 | std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params, |
| 1128 | const FlowBlock &Block) { |
| 1129 | // Modifying the weight of an unlikely block is expensive |
| 1130 | if (Block.IsUnlikely) |
| 1131 | return std::make_pair(x: Params.CostUnlikely, y: Params.CostUnlikely); |
| 1132 | |
| 1133 | // Assign default values for the costs |
| 1134 | int64_t CostInc = Params.CostBlockInc; |
| 1135 | int64_t CostDec = Params.CostBlockDec; |
| 1136 | // Update the costs depending on the block metadata |
| 1137 | if (Block.HasUnknownWeight) { |
| 1138 | CostInc = Params.CostBlockUnknownInc; |
| 1139 | CostDec = 0; |
| 1140 | } else { |
| 1141 | // Increasing the count for "cold" blocks with zero initial count is more |
| 1142 | // expensive than for "hot" ones |
| 1143 | if (Block.Weight == 0) |
| 1144 | CostInc = Params.CostBlockZeroInc; |
| 1145 | // Modifying the count of the entry block is expensive |
| 1146 | if (Block.isEntry()) { |
| 1147 | CostInc = Params.CostBlockEntryInc; |
| 1148 | CostDec = Params.CostBlockEntryDec; |
| 1149 | } |
| 1150 | } |
| 1151 | return std::make_pair(x&: CostInc, y&: CostDec); |
| 1152 | } |
| 1153 | |
| 1154 | /// Assign costs for increasing/decreasing the jump counts. |
| 1155 | std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params, |
| 1156 | const FlowJump &Jump) { |
| 1157 | // Modifying the weight of an unlikely jump is expensive |
| 1158 | if (Jump.IsUnlikely) |
| 1159 | return std::make_pair(x: Params.CostUnlikely, y: Params.CostUnlikely); |
| 1160 | |
| 1161 | // Assign default values for the costs |
| 1162 | int64_t CostInc = Params.CostJumpInc; |
| 1163 | int64_t CostDec = Params.CostJumpDec; |
| 1164 | // Update the costs depending on the block metadata |
| 1165 | if (Jump.Source + 1 == Jump.Target) { |
| 1166 | // Adjusting the fall-through branch |
| 1167 | CostInc = Params.CostJumpFTInc; |
| 1168 | CostDec = Params.CostJumpFTDec; |
| 1169 | } |
| 1170 | if (Jump.HasUnknownWeight) { |
| 1171 | // The cost is different for fall-through and non-fall-through branches |
| 1172 | if (Jump.Source + 1 == Jump.Target) |
| 1173 | CostInc = Params.CostJumpUnknownFTInc; |
| 1174 | else |
| 1175 | CostInc = Params.CostJumpUnknownInc; |
| 1176 | CostDec = 0; |
| 1177 | } else { |
| 1178 | assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight" ); |
| 1179 | } |
| 1180 | return std::make_pair(x&: CostInc, y&: CostDec); |
| 1181 | } |
| 1182 | |
| 1183 | /// Extract resulting block and edge counts from the flow network. |
| 1184 | void (const ProfiParams &Params, MinCostMaxFlow &Network, |
| 1185 | FlowFunction &Func) { |
| 1186 | uint64_t NumBlocks = Func.Blocks.size(); |
| 1187 | uint64_t NumJumps = Func.Jumps.size(); |
| 1188 | |
| 1189 | // Extract resulting jump counts |
| 1190 | for (uint64_t J = 0; J < NumJumps; J++) { |
| 1191 | auto &Jump = Func.Jumps[J]; |
| 1192 | uint64_t SrcOut = 2 * Jump.Source + 1; |
| 1193 | uint64_t DstIn = 2 * Jump.Target; |
| 1194 | |
| 1195 | int64_t Flow = 0; |
| 1196 | int64_t AuxFlow = Network.getFlow(Src: SrcOut, Dst: DstIn); |
| 1197 | if (Jump.Source != Jump.Target) |
| 1198 | Flow = int64_t(Jump.Weight) + AuxFlow; |
| 1199 | else |
| 1200 | Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0); |
| 1201 | |
| 1202 | Jump.Flow = Flow; |
| 1203 | assert(Flow >= 0 && "negative jump flow" ); |
| 1204 | } |
| 1205 | |
| 1206 | // Extract resulting block counts |
| 1207 | auto InFlow = std::vector<uint64_t>(NumBlocks, 0); |
| 1208 | auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); |
| 1209 | for (auto &Jump : Func.Jumps) { |
| 1210 | InFlow[Jump.Target] += Jump.Flow; |
| 1211 | OutFlow[Jump.Source] += Jump.Flow; |
| 1212 | } |
| 1213 | for (uint64_t B = 0; B < NumBlocks; B++) { |
| 1214 | auto &Block = Func.Blocks[B]; |
| 1215 | Block.Flow = std::max(a: OutFlow[B], b: InFlow[B]); |
| 1216 | } |
| 1217 | } |
| 1218 | |
| 1219 | #ifndef NDEBUG |
| 1220 | /// Verify that the provided block/jump weights are as expected. |
| 1221 | void verifyInput(const FlowFunction &Func) { |
| 1222 | // Verify entry and exit blocks |
| 1223 | assert(Func.Entry == 0 && Func.Blocks[0].isEntry()); |
| 1224 | size_t NumExitBlocks = 0; |
| 1225 | for (size_t I = 1; I < Func.Blocks.size(); I++) { |
| 1226 | assert(!Func.Blocks[I].isEntry() && "multiple entry blocks" ); |
| 1227 | if (Func.Blocks[I].isExit()) |
| 1228 | NumExitBlocks++; |
| 1229 | } |
| 1230 | assert(NumExitBlocks > 0 && "cannot find exit blocks" ); |
| 1231 | |
| 1232 | // Verify that there are no parallel edges |
| 1233 | for (auto &Block : Func.Blocks) { |
| 1234 | std::unordered_set<uint64_t> UniqueSuccs; |
| 1235 | for (auto &Jump : Block.SuccJumps) { |
| 1236 | auto It = UniqueSuccs.insert(Jump->Target); |
| 1237 | assert(It.second && "input CFG contains parallel edges" ); |
| 1238 | } |
| 1239 | } |
| 1240 | // Verify CFG jumps |
| 1241 | for (auto &Block : Func.Blocks) { |
| 1242 | assert((!Block.isEntry() || !Block.isExit()) && |
| 1243 | "a block cannot be an entry and an exit" ); |
| 1244 | } |
| 1245 | // Verify input block weights |
| 1246 | for (auto &Block : Func.Blocks) { |
| 1247 | assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) && |
| 1248 | "non-zero weight of a block w/o weight except for an entry" ); |
| 1249 | } |
| 1250 | // Verify input jump weights |
| 1251 | for (auto &Jump : Func.Jumps) { |
| 1252 | assert((!Jump.HasUnknownWeight || Jump.Weight == 0) && |
| 1253 | "non-zero weight of a jump w/o weight" ); |
| 1254 | } |
| 1255 | } |
| 1256 | |
| 1257 | /// Verify that the computed flow values satisfy flow conservation rules. |
| 1258 | void verifyOutput(const FlowFunction &Func) { |
| 1259 | const uint64_t NumBlocks = Func.Blocks.size(); |
| 1260 | auto InFlow = std::vector<uint64_t>(NumBlocks, 0); |
| 1261 | auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); |
| 1262 | for (const auto &Jump : Func.Jumps) { |
| 1263 | InFlow[Jump.Target] += Jump.Flow; |
| 1264 | OutFlow[Jump.Source] += Jump.Flow; |
| 1265 | } |
| 1266 | |
| 1267 | uint64_t TotalInFlow = 0; |
| 1268 | uint64_t TotalOutFlow = 0; |
| 1269 | for (uint64_t I = 0; I < NumBlocks; I++) { |
| 1270 | auto &Block = Func.Blocks[I]; |
| 1271 | if (Block.isEntry()) { |
| 1272 | TotalInFlow += Block.Flow; |
| 1273 | assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow" ); |
| 1274 | } else if (Block.isExit()) { |
| 1275 | TotalOutFlow += Block.Flow; |
| 1276 | assert(Block.Flow == InFlow[I] && "incorrectly computed control flow" ); |
| 1277 | } else { |
| 1278 | assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow" ); |
| 1279 | assert(Block.Flow == InFlow[I] && "incorrectly computed control flow" ); |
| 1280 | } |
| 1281 | } |
| 1282 | assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow" ); |
| 1283 | |
| 1284 | // Verify that there are no isolated flow components |
| 1285 | // One could modify FlowFunction to hold edges indexed by the sources, which |
| 1286 | // will avoid a creation of the object |
| 1287 | auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); |
| 1288 | for (const auto &Jump : Func.Jumps) { |
| 1289 | if (Jump.Flow > 0) { |
| 1290 | PositiveFlowEdges[Jump.Source].push_back(Jump.Target); |
| 1291 | } |
| 1292 | } |
| 1293 | |
| 1294 | // Run BFS from the source along edges with positive flow |
| 1295 | std::queue<uint64_t> Queue; |
| 1296 | auto Visited = BitVector(NumBlocks, false); |
| 1297 | Queue.push(Func.Entry); |
| 1298 | Visited[Func.Entry] = true; |
| 1299 | while (!Queue.empty()) { |
| 1300 | uint64_t Src = Queue.front(); |
| 1301 | Queue.pop(); |
| 1302 | for (uint64_t Dst : PositiveFlowEdges[Src]) { |
| 1303 | if (!Visited[Dst]) { |
| 1304 | Queue.push(Dst); |
| 1305 | Visited[Dst] = true; |
| 1306 | } |
| 1307 | } |
| 1308 | } |
| 1309 | |
| 1310 | // Verify that every block that has a positive flow is reached from the source |
| 1311 | // along edges with a positive flow |
| 1312 | for (uint64_t I = 0; I < NumBlocks; I++) { |
| 1313 | auto &Block = Func.Blocks[I]; |
| 1314 | assert((Visited[I] || Block.Flow == 0) && "an isolated flow component" ); |
| 1315 | } |
| 1316 | } |
| 1317 | #endif |
| 1318 | |
| 1319 | } // end of anonymous namespace |
| 1320 | |
| 1321 | /// Apply the profile inference algorithm for a given function and provided |
| 1322 | /// profi options |
| 1323 | void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { |
| 1324 | // Check if the function has samples and assign initial flow values |
| 1325 | bool HasSamples = false; |
| 1326 | for (FlowBlock &Block : Func.Blocks) { |
| 1327 | if (Block.Weight > 0) |
| 1328 | HasSamples = true; |
| 1329 | Block.Flow = Block.Weight; |
| 1330 | } |
| 1331 | for (FlowJump &Jump : Func.Jumps) { |
| 1332 | if (Jump.Weight > 0) |
| 1333 | HasSamples = true; |
| 1334 | Jump.Flow = Jump.Weight; |
| 1335 | } |
| 1336 | |
| 1337 | // Quit early for functions with a single block or ones w/o samples |
| 1338 | if (Func.Blocks.size() <= 1 || !HasSamples) |
| 1339 | return; |
| 1340 | |
| 1341 | #ifndef NDEBUG |
| 1342 | // Verify the input data |
| 1343 | verifyInput(Func); |
| 1344 | #endif |
| 1345 | |
| 1346 | // Create and apply an inference network model |
| 1347 | auto InferenceNetwork = MinCostMaxFlow(Params); |
| 1348 | initializeNetwork(Params, Network&: InferenceNetwork, Func); |
| 1349 | InferenceNetwork.run(); |
| 1350 | |
| 1351 | // Extract flow values for every block and every edge |
| 1352 | extractWeights(Params, Network&: InferenceNetwork, Func); |
| 1353 | |
| 1354 | // Post-processing adjustments to the flow |
| 1355 | auto Adjuster = FlowAdjuster(Params, Func); |
| 1356 | Adjuster.run(); |
| 1357 | |
| 1358 | #ifndef NDEBUG |
| 1359 | // Verify the result |
| 1360 | verifyOutput(Func); |
| 1361 | #endif |
| 1362 | } |
| 1363 | |
| 1364 | /// Apply the profile inference algorithm for a given flow function |
| 1365 | void llvm::applyFlowInference(FlowFunction &Func) { |
| 1366 | ProfiParams Params; |
| 1367 | // Set the params from the command-line flags. |
| 1368 | Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution; |
| 1369 | Params.RebalanceUnknown = SampleProfileRebalanceUnknown; |
| 1370 | Params.JoinIslands = SampleProfileJoinIslands; |
| 1371 | Params.CostBlockInc = SampleProfileProfiCostBlockInc; |
| 1372 | Params.CostBlockDec = SampleProfileProfiCostBlockDec; |
| 1373 | Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc; |
| 1374 | Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec; |
| 1375 | Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc; |
| 1376 | Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc; |
| 1377 | |
| 1378 | applyFlowInference(Params, Func); |
| 1379 | } |
| 1380 | |