1 | //===- BalancedPartitioning.cpp -------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements BalancedPartitioning, a recursive balanced graph |
10 | // partitioning algorithm. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Support/BalancedPartitioning.h" |
15 | #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS |
16 | #include "llvm/Support/Debug.h" |
17 | #include "llvm/Support/Format.h" |
18 | #include "llvm/Support/FormatVariadic.h" |
19 | #include "llvm/Support/ThreadPool.h" |
20 | |
21 | using namespace llvm; |
22 | #define DEBUG_TYPE "balanced-partitioning" |
23 | |
24 | void BPFunctionNode::dump(raw_ostream &OS) const { |
25 | OS << formatv(Fmt: "{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}" , Vals: Id, |
26 | Vals: make_range(x: UtilityNodes.begin(), y: UtilityNodes.end()), Vals: Bucket); |
27 | } |
28 | |
29 | template <typename Func> |
30 | void BalancedPartitioning::BPThreadPool::async(Func &&F) { |
31 | #if LLVM_ENABLE_THREADS |
32 | // This new thread could spawn more threads, so mark it as active |
33 | ++NumActiveThreads; |
34 | TheThreadPool.async([this, F]() { |
35 | // Run the task |
36 | F(); |
37 | |
38 | // This thread will no longer spawn new threads, so mark it as inactive |
39 | if (--NumActiveThreads == 0) { |
40 | // There are no more active threads, so mark as finished and notify |
41 | { |
42 | std::unique_lock<std::mutex> lock(mtx); |
43 | assert(!IsFinishedSpawning); |
44 | IsFinishedSpawning = true; |
45 | } |
46 | cv.notify_one(); |
47 | } |
48 | }); |
49 | #else |
50 | llvm_unreachable("threads are disabled" ); |
51 | #endif |
52 | } |
53 | |
54 | void BalancedPartitioning::BPThreadPool::wait() { |
55 | #if LLVM_ENABLE_THREADS |
56 | // TODO: We could remove the mutex and condition variable and use |
57 | // std::atomic::wait() instead, but that isn't available until C++20 |
58 | { |
59 | std::unique_lock<std::mutex> lock(mtx); |
60 | cv.wait(lock&: lock, p: [&]() { return IsFinishedSpawning; }); |
61 | assert(IsFinishedSpawning && NumActiveThreads == 0); |
62 | } |
63 | // Now we can call ThreadPool::wait() since all tasks have been submitted |
64 | TheThreadPool.wait(); |
65 | #else |
66 | llvm_unreachable("threads are disabled" ); |
67 | #endif |
68 | } |
69 | |
70 | BalancedPartitioning::BalancedPartitioning( |
71 | const BalancedPartitioningConfig &Config) |
72 | : Config(Config) { |
73 | // Pre-computing log2 values |
74 | Log2Cache[0] = 0.0; |
75 | for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) |
76 | Log2Cache[I] = std::log2(x: I); |
77 | } |
78 | |
79 | void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { |
80 | LLVM_DEBUG( |
81 | dbgs() << format( |
82 | "Partitioning %d nodes using depth %d and %d iterations per split\n" , |
83 | Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); |
84 | std::optional<BPThreadPool> TP; |
85 | #if LLVM_ENABLE_THREADS |
86 | DefaultThreadPool TheThreadPool; |
87 | if (Config.TaskSplitDepth > 1) |
88 | TP.emplace(args&: TheThreadPool); |
89 | #endif |
90 | |
91 | // Record the input order |
92 | for (unsigned I = 0; I < Nodes.size(); I++) |
93 | Nodes[I].InputOrderIndex = I; |
94 | |
95 | auto NodesRange = llvm::make_range(x: Nodes.begin(), y: Nodes.end()); |
96 | auto BisectTask = [this, NodesRange, &TP]() { |
97 | bisect(Nodes: NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); |
98 | }; |
99 | if (TP) { |
100 | TP->async(F: std::move(BisectTask)); |
101 | TP->wait(); |
102 | } else { |
103 | BisectTask(); |
104 | } |
105 | |
106 | llvm::stable_sort(Range&: NodesRange, C: [](const auto &L, const auto &R) { |
107 | return L.Bucket < R.Bucket; |
108 | }); |
109 | |
110 | LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n" ); |
111 | } |
112 | |
113 | void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, |
114 | unsigned RecDepth, unsigned RootBucket, |
115 | unsigned Offset, |
116 | std::optional<BPThreadPool> &TP) const { |
117 | unsigned NumNodes = std::distance(first: Nodes.begin(), last: Nodes.end()); |
118 | if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { |
119 | // We've reach the lowest level of the recursion tree. Fall back to the |
120 | // original order and assign to buckets. |
121 | llvm::sort(C: Nodes, Comp: [](const auto &L, const auto &R) { |
122 | return L.InputOrderIndex < R.InputOrderIndex; |
123 | }); |
124 | for (auto &N : Nodes) |
125 | N.Bucket = Offset++; |
126 | return; |
127 | } |
128 | |
129 | LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n" , |
130 | NumNodes, RootBucket)); |
131 | |
132 | std::mt19937 RNG(RootBucket); |
133 | |
134 | unsigned LeftBucket = 2 * RootBucket; |
135 | unsigned RightBucket = 2 * RootBucket + 1; |
136 | |
137 | // Split into two and assign to the left and right buckets |
138 | split(Nodes, StartBucket: LeftBucket); |
139 | |
140 | runIterations(Nodes, LeftBucket, RightBucket, RNG); |
141 | |
142 | // Split nodes wrt the resulting buckets |
143 | auto NodesMid = |
144 | llvm::partition(Range: Nodes, P: [&](auto &N) { return N.Bucket == LeftBucket; }); |
145 | unsigned MidOffset = Offset + std::distance(first: Nodes.begin(), last: NodesMid); |
146 | |
147 | auto LeftNodes = llvm::make_range(x: Nodes.begin(), y: NodesMid); |
148 | auto RightNodes = llvm::make_range(x: NodesMid, y: Nodes.end()); |
149 | |
150 | auto LeftRecTask = [this, LeftNodes, RecDepth, LeftBucket, Offset, &TP]() { |
151 | bisect(Nodes: LeftNodes, RecDepth: RecDepth + 1, RootBucket: LeftBucket, Offset, TP); |
152 | }; |
153 | auto RightRecTask = [this, RightNodes, RecDepth, RightBucket, MidOffset, |
154 | &TP]() { |
155 | bisect(Nodes: RightNodes, RecDepth: RecDepth + 1, RootBucket: RightBucket, Offset: MidOffset, TP); |
156 | }; |
157 | |
158 | if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { |
159 | TP->async(F: std::move(LeftRecTask)); |
160 | TP->async(F: std::move(RightRecTask)); |
161 | } else { |
162 | LeftRecTask(); |
163 | RightRecTask(); |
164 | } |
165 | } |
166 | |
167 | void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, |
168 | unsigned LeftBucket, |
169 | unsigned RightBucket, |
170 | std::mt19937 &RNG) const { |
171 | unsigned NumNodes = std::distance(first: Nodes.begin(), last: Nodes.end()); |
172 | DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; |
173 | for (auto &N : Nodes) |
174 | for (auto &UN : N.UtilityNodes) |
175 | ++UtilityNodeIndex[UN]; |
176 | // Remove utility nodes if they have just one edge or are connected to all |
177 | // functions |
178 | for (auto &N : Nodes) |
179 | llvm::erase_if(C&: N.UtilityNodes, P: [&](auto &UN) { |
180 | unsigned UNI = UtilityNodeIndex[UN]; |
181 | return UNI == 1 || UNI == NumNodes; |
182 | }); |
183 | |
184 | // Renumber utility nodes so they can be used to index into Signatures |
185 | UtilityNodeIndex.clear(); |
186 | for (auto &N : Nodes) |
187 | for (auto &UN : N.UtilityNodes) |
188 | UN = UtilityNodeIndex.insert(KV: {UN, UtilityNodeIndex.size()}).first->second; |
189 | |
190 | // Initialize signatures |
191 | SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); |
192 | for (auto &N : Nodes) { |
193 | for (auto &UN : N.UtilityNodes) { |
194 | assert(UN < Signatures.size()); |
195 | if (N.Bucket == LeftBucket) { |
196 | Signatures[UN].LeftCount++; |
197 | } else { |
198 | Signatures[UN].RightCount++; |
199 | } |
200 | } |
201 | } |
202 | |
203 | for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { |
204 | unsigned NumMovedNodes = |
205 | runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); |
206 | if (NumMovedNodes == 0) |
207 | break; |
208 | } |
209 | } |
210 | |
211 | unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, |
212 | unsigned LeftBucket, |
213 | unsigned RightBucket, |
214 | SignaturesT &Signatures, |
215 | std::mt19937 &RNG) const { |
216 | // Init signature cost caches |
217 | for (auto &Signature : Signatures) { |
218 | if (Signature.CachedGainIsValid) |
219 | continue; |
220 | unsigned L = Signature.LeftCount; |
221 | unsigned R = Signature.RightCount; |
222 | assert((L > 0 || R > 0) && "incorrect signature" ); |
223 | float Cost = logCost(X: L, Y: R); |
224 | Signature.CachedGainLR = 0.f; |
225 | Signature.CachedGainRL = 0.f; |
226 | if (L > 0) |
227 | Signature.CachedGainLR = Cost - logCost(X: L - 1, Y: R + 1); |
228 | if (R > 0) |
229 | Signature.CachedGainRL = Cost - logCost(X: L + 1, Y: R - 1); |
230 | Signature.CachedGainIsValid = true; |
231 | } |
232 | |
233 | // Compute move gains |
234 | typedef std::pair<float, BPFunctionNode *> GainPair; |
235 | std::vector<GainPair> Gains; |
236 | for (auto &N : Nodes) { |
237 | bool FromLeftToRight = (N.Bucket == LeftBucket); |
238 | float Gain = moveGain(N, FromLeftToRight, Signatures); |
239 | Gains.push_back(x: std::make_pair(x&: Gain, y: &N)); |
240 | } |
241 | |
242 | // Collect left and right gains |
243 | auto LeftEnd = llvm::partition( |
244 | Range&: Gains, P: [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); |
245 | auto LeftRange = llvm::make_range(x: Gains.begin(), y: LeftEnd); |
246 | auto RightRange = llvm::make_range(x: LeftEnd, y: Gains.end()); |
247 | |
248 | // Sort gains in descending order |
249 | auto LargerGain = [](const auto &L, const auto &R) { |
250 | return L.first > R.first; |
251 | }; |
252 | llvm::stable_sort(Range&: LeftRange, C: LargerGain); |
253 | llvm::stable_sort(Range&: RightRange, C: LargerGain); |
254 | |
255 | unsigned NumMovedDataVertices = 0; |
256 | for (auto [LeftPair, RightPair] : llvm::zip(t&: LeftRange, u&: RightRange)) { |
257 | auto &[LeftGain, LeftNode] = LeftPair; |
258 | auto &[RightGain, RightNode] = RightPair; |
259 | // Stop when the gain is no longer beneficial |
260 | if (LeftGain + RightGain <= 0.f) |
261 | break; |
262 | // Try to exchange the nodes between buckets |
263 | if (moveFunctionNode(N&: *LeftNode, LeftBucket, RightBucket, Signatures, RNG)) |
264 | ++NumMovedDataVertices; |
265 | if (moveFunctionNode(N&: *RightNode, LeftBucket, RightBucket, Signatures, RNG)) |
266 | ++NumMovedDataVertices; |
267 | } |
268 | return NumMovedDataVertices; |
269 | } |
270 | |
271 | bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, |
272 | unsigned LeftBucket, |
273 | unsigned RightBucket, |
274 | SignaturesT &Signatures, |
275 | std::mt19937 &RNG) const { |
276 | // Sometimes we skip the move. This helps to escape local optima |
277 | if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= |
278 | Config.SkipProbability) |
279 | return false; |
280 | |
281 | bool FromLeftToRight = (N.Bucket == LeftBucket); |
282 | // Update the current bucket |
283 | N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); |
284 | |
285 | // Update signatures and invalidate gain cache |
286 | if (FromLeftToRight) { |
287 | for (auto &UN : N.UtilityNodes) { |
288 | auto &Signature = Signatures[UN]; |
289 | Signature.LeftCount--; |
290 | Signature.RightCount++; |
291 | Signature.CachedGainIsValid = false; |
292 | } |
293 | } else { |
294 | for (auto &UN : N.UtilityNodes) { |
295 | auto &Signature = Signatures[UN]; |
296 | Signature.LeftCount++; |
297 | Signature.RightCount--; |
298 | Signature.CachedGainIsValid = false; |
299 | } |
300 | } |
301 | return true; |
302 | } |
303 | |
304 | void BalancedPartitioning::split(const FunctionNodeRange Nodes, |
305 | unsigned StartBucket) const { |
306 | unsigned NumNodes = std::distance(first: Nodes.begin(), last: Nodes.end()); |
307 | auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; |
308 | |
309 | llvm::sort(Start: Nodes.begin(), End: Nodes.end(), Comp: [](auto &L, auto &R) { |
310 | return L.InputOrderIndex < R.InputOrderIndex; |
311 | }); |
312 | |
313 | for (auto &N : llvm::make_range(x: Nodes.begin(), y: NodesMid)) |
314 | N.Bucket = StartBucket; |
315 | for (auto &N : llvm::make_range(x: NodesMid, y: Nodes.end())) |
316 | N.Bucket = StartBucket + 1; |
317 | } |
318 | |
319 | float BalancedPartitioning::moveGain(const BPFunctionNode &N, |
320 | bool FromLeftToRight, |
321 | const SignaturesT &Signatures) { |
322 | float Gain = 0.f; |
323 | for (auto &UN : N.UtilityNodes) |
324 | Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR |
325 | : Signatures[UN].CachedGainRL); |
326 | return Gain; |
327 | } |
328 | |
329 | float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { |
330 | return -(X * log2Cached(i: X + 1) + Y * log2Cached(i: Y + 1)); |
331 | } |
332 | |
333 | float BalancedPartitioning::log2Cached(unsigned i) const { |
334 | return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(x: i); |
335 | } |
336 | |