| 1 | //===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the spill code placement analysis. |
| 10 | // |
| 11 | // Each edge bundle corresponds to a node in a Hopfield network. Constraints on |
| 12 | // basic blocks are weighted by the block frequency and added to become the node |
| 13 | // bias. |
| 14 | // |
| 15 | // Transparent basic blocks have the variable live through, but don't care if it |
| 16 | // is spilled or in a register. These blocks become connections in the Hopfield |
| 17 | // network, again weighted by block frequency. |
| 18 | // |
| 19 | // The Hopfield network minimizes (possibly locally) its energy function: |
| 20 | // |
| 21 | // E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b ) |
| 22 | // |
| 23 | // The energy function represents the expected spill code execution frequency, |
| 24 | // or the cost of spilling. This is a Lyapunov function which never increases |
| 25 | // when a node is updated. It is guaranteed to converge to a local minimum. |
| 26 | // |
| 27 | //===----------------------------------------------------------------------===// |
| 28 | |
| 29 | #include "llvm/CodeGen/SpillPlacement.h" |
| 30 | #include "llvm/ADT/BitVector.h" |
| 31 | #include "llvm/CodeGen/EdgeBundles.h" |
| 32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 33 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| 34 | #include "llvm/CodeGen/MachineFunction.h" |
| 35 | #include "llvm/CodeGen/Passes.h" |
| 36 | #include "llvm/InitializePasses.h" |
| 37 | #include "llvm/Pass.h" |
| 38 | #include <algorithm> |
| 39 | #include <cassert> |
| 40 | #include <cstdint> |
| 41 | #include <utility> |
| 42 | |
| 43 | using namespace llvm; |
| 44 | |
| 45 | #define DEBUG_TYPE "spill-code-placement" |
| 46 | |
| 47 | char SpillPlacementWrapperLegacy::ID = 0; |
| 48 | |
| 49 | char &llvm::SpillPlacementID = SpillPlacementWrapperLegacy::ID; |
| 50 | |
| 51 | INITIALIZE_PASS_BEGIN(SpillPlacementWrapperLegacy, DEBUG_TYPE, |
| 52 | "Spill Code Placement Analysis" , true, true) |
| 53 | INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) |
| 54 | INITIALIZE_PASS_END(SpillPlacementWrapperLegacy, DEBUG_TYPE, |
| 55 | "Spill Code Placement Analysis" , true, true) |
| 56 | |
| 57 | void SpillPlacementWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { |
| 58 | AU.setPreservesAll(); |
| 59 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
| 60 | AU.addRequiredTransitive<EdgeBundlesWrapperLegacy>(); |
| 61 | MachineFunctionPass::getAnalysisUsage(AU); |
| 62 | } |
| 63 | |
| 64 | /// Node - Each edge bundle corresponds to a Hopfield node. |
| 65 | /// |
| 66 | /// The node contains precomputed frequency data that only depends on the CFG, |
| 67 | /// but Bias and Links are computed each time placeSpills is called. |
| 68 | /// |
| 69 | /// The node Value is positive when the variable should be in a register. The |
| 70 | /// value can change when linked nodes change, but convergence is very fast |
| 71 | /// because all weights are positive. |
| 72 | struct SpillPlacement::Node { |
| 73 | /// BiasN - Sum of blocks that prefer a spill. |
| 74 | BlockFrequency BiasN; |
| 75 | |
| 76 | /// BiasP - Sum of blocks that prefer a register. |
| 77 | BlockFrequency BiasP; |
| 78 | |
| 79 | /// Value - Output value of this node computed from the Bias and links. |
| 80 | /// This is always on of the values {-1, 0, 1}. A positive number means the |
| 81 | /// variable should go in a register through this bundle. |
| 82 | int Value; |
| 83 | |
| 84 | using LinkVector = SmallVector<std::pair<BlockFrequency, unsigned>, 4>; |
| 85 | |
| 86 | /// Links - (Weight, BundleNo) for all transparent blocks connecting to other |
| 87 | /// bundles. The weights are all positive block frequencies. |
| 88 | LinkVector Links; |
| 89 | |
| 90 | /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. |
| 91 | BlockFrequency SumLinkWeights; |
| 92 | |
| 93 | /// preferReg - Return true when this node prefers to be in a register. |
| 94 | bool preferReg() const { |
| 95 | // Undecided nodes (Value==0) go on the stack. |
| 96 | return Value > 0; |
| 97 | } |
| 98 | |
| 99 | /// mustSpill - Return True if this node is so biased that it must spill. |
| 100 | bool mustSpill() const { |
| 101 | // We must spill if Bias < -sum(weights) or the MustSpill flag was set. |
| 102 | // BiasN is saturated when MustSpill is set, make sure this still returns |
| 103 | // true when the RHS saturates. Note that SumLinkWeights includes Threshold. |
| 104 | return BiasN >= BiasP + SumLinkWeights; |
| 105 | } |
| 106 | |
| 107 | /// clear - Reset per-query data, but preserve frequencies that only depend on |
| 108 | /// the CFG. |
| 109 | void clear(BlockFrequency Threshold) { |
| 110 | BiasN = BlockFrequency(0); |
| 111 | BiasP = BlockFrequency(0); |
| 112 | Value = 0; |
| 113 | SumLinkWeights = Threshold; |
| 114 | Links.clear(); |
| 115 | } |
| 116 | |
| 117 | /// addLink - Add a link to bundle b with weight w. |
| 118 | void addLink(unsigned b, BlockFrequency w) { |
| 119 | // Update cached sum. |
| 120 | SumLinkWeights += w; |
| 121 | |
| 122 | // There can be multiple links to the same bundle, add them up. |
| 123 | for (std::pair<BlockFrequency, unsigned> &L : Links) |
| 124 | if (L.second == b) { |
| 125 | L.first += w; |
| 126 | return; |
| 127 | } |
| 128 | // This must be the first link to b. |
| 129 | Links.push_back(Elt: std::make_pair(x&: w, y&: b)); |
| 130 | } |
| 131 | |
| 132 | /// addBias - Bias this node. |
| 133 | void addBias(BlockFrequency freq, BorderConstraint direction) { |
| 134 | switch (direction) { |
| 135 | default: |
| 136 | break; |
| 137 | case PrefReg: |
| 138 | BiasP += freq; |
| 139 | break; |
| 140 | case PrefSpill: |
| 141 | BiasN += freq; |
| 142 | break; |
| 143 | case MustSpill: |
| 144 | BiasN = BlockFrequency::max(); |
| 145 | break; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | /// update - Recompute Value from Bias and Links. Return true when node |
| 150 | /// preference changes. |
| 151 | bool update(const Node nodes[], BlockFrequency Threshold) { |
| 152 | // Compute the weighted sum of inputs. |
| 153 | BlockFrequency SumN = BiasN; |
| 154 | BlockFrequency SumP = BiasP; |
| 155 | for (std::pair<BlockFrequency, unsigned> &L : Links) { |
| 156 | if (nodes[L.second].Value == -1) |
| 157 | SumN += L.first; |
| 158 | else if (nodes[L.second].Value == 1) |
| 159 | SumP += L.first; |
| 160 | } |
| 161 | |
| 162 | // Each weighted sum is going to be less than the total frequency of the |
| 163 | // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we |
| 164 | // will add a dead zone around 0 for two reasons: |
| 165 | // |
| 166 | // 1. It avoids arbitrary bias when all links are 0 as is possible during |
| 167 | // initial iterations. |
| 168 | // 2. It helps tame rounding errors when the links nominally sum to 0. |
| 169 | // |
| 170 | bool Before = preferReg(); |
| 171 | if (SumN >= SumP + Threshold) |
| 172 | Value = -1; |
| 173 | else if (SumP >= SumN + Threshold) |
| 174 | Value = 1; |
| 175 | else |
| 176 | Value = 0; |
| 177 | return Before != preferReg(); |
| 178 | } |
| 179 | |
| 180 | void getDissentingNeighbors(SparseSet<unsigned> &List, |
| 181 | const Node nodes[]) const { |
| 182 | for (const auto &Elt : Links) { |
| 183 | unsigned n = Elt.second; |
| 184 | // Neighbors that already have the same value are not going to |
| 185 | // change because of this node changing. |
| 186 | if (Value != nodes[n].Value) |
| 187 | List.insert(Val: n); |
| 188 | } |
| 189 | } |
| 190 | }; |
| 191 | |
| 192 | bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 193 | auto *Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles(); |
| 194 | auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); |
| 195 | |
| 196 | Impl.run(MF, Bundles, MBFI); |
| 197 | return false; |
| 198 | } |
| 199 | |
| 200 | AnalysisKey SpillPlacementAnalysis::Key; |
| 201 | |
| 202 | SpillPlacement |
| 203 | SpillPlacementAnalysis::run(MachineFunction &MF, |
| 204 | MachineFunctionAnalysisManager &MFAM) { |
| 205 | auto *Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(IR&: MF); |
| 206 | auto *MBFI = &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF); |
| 207 | SpillPlacement Impl; |
| 208 | Impl.run(MF, Bundles, MBFI); |
| 209 | return Impl; |
| 210 | } |
| 211 | |
| 212 | bool SpillPlacementAnalysis::Result::invalidate( |
| 213 | MachineFunction &MF, const PreservedAnalyses &PA, |
| 214 | MachineFunctionAnalysisManager::Invalidator &Inv) { |
| 215 | auto PAC = PA.getChecker<SpillPlacementAnalysis>(); |
| 216 | if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<MachineFunction>>()) |
| 217 | return true; |
| 218 | // Check dependencies. |
| 219 | return Inv.invalidate<EdgeBundlesAnalysis>(IR&: MF, PA) || |
| 220 | Inv.invalidate<MachineBlockFrequencyAnalysis>(IR&: MF, PA); |
| 221 | } |
| 222 | |
| 223 | SpillPlacement::SpillPlacement() = default; |
| 224 | SpillPlacement::~SpillPlacement() = default; |
| 225 | SpillPlacement::SpillPlacement(SpillPlacement &&) = default; |
| 226 | |
| 227 | void SpillPlacement::releaseMemory() { |
| 228 | nodes.reset(); |
| 229 | TodoList.clear(); |
| 230 | } |
| 231 | |
| 232 | void SpillPlacement::run(MachineFunction &mf, EdgeBundles *Bundles, |
| 233 | MachineBlockFrequencyInfo *MBFI) { |
| 234 | MF = &mf; |
| 235 | this->bundles = Bundles; |
| 236 | this->MBFI = MBFI; |
| 237 | |
| 238 | assert(!nodes && "Leaking node array" ); |
| 239 | nodes.reset(p: new Node[bundles->getNumBundles()]); |
| 240 | TodoList.clear(); |
| 241 | TodoList.setUniverse(bundles->getNumBundles()); |
| 242 | |
| 243 | // Compute total ingoing and outgoing block frequencies for all bundles. |
| 244 | BlockFrequencies.resize(N: mf.getNumBlockIDs()); |
| 245 | setThreshold(MBFI->getEntryFreq()); |
| 246 | for (auto &I : mf) { |
| 247 | unsigned Num = I.getNumber(); |
| 248 | BlockFrequencies[Num] = MBFI->getBlockFreq(MBB: &I); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | /// activate - mark node n as active if it wasn't already. |
| 253 | void SpillPlacement::activate(unsigned n) { |
| 254 | TodoList.insert(Val: n); |
| 255 | if (ActiveNodes->test(Idx: n)) |
| 256 | return; |
| 257 | ActiveNodes->set(n); |
| 258 | nodes[n].clear(Threshold); |
| 259 | |
| 260 | // Very large bundles usually come from big switches, indirect branches, |
| 261 | // landing pads, or loops with many 'continue' statements. It is difficult to |
| 262 | // allocate registers when so many different blocks are involved. |
| 263 | // |
| 264 | // Give a small negative bias to large bundles such that a substantial |
| 265 | // fraction of the connected blocks need to be interested before we consider |
| 266 | // expanding the region through the bundle. This helps compile time by |
| 267 | // limiting the number of blocks visited and the number of links in the |
| 268 | // Hopfield network. |
| 269 | if (bundles->getBlocks(Bundle: n).size() > 100) { |
| 270 | nodes[n].BiasP = BlockFrequency(0); |
| 271 | BlockFrequency BiasN = MBFI->getEntryFreq(); |
| 272 | BiasN >>= 4; |
| 273 | nodes[n].BiasN = BiasN; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | /// Set the threshold for a given entry frequency. |
| 278 | /// |
| 279 | /// Set the threshold relative to \c Entry. Since the threshold is used as a |
| 280 | /// bound on the open interval (-Threshold;Threshold), 1 is the minimum |
| 281 | /// threshold. |
| 282 | void SpillPlacement::setThreshold(BlockFrequency Entry) { |
| 283 | // Apparently 2 is a good threshold when Entry==2^14, but we need to scale |
| 284 | // it. Divide by 2^13, rounding as appropriate. |
| 285 | uint64_t Freq = Entry.getFrequency(); |
| 286 | uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); |
| 287 | Threshold = BlockFrequency(std::max(UINT64_C(1), b: Scaled)); |
| 288 | } |
| 289 | |
| 290 | /// addConstraints - Compute node biases and weights from a set of constraints. |
| 291 | /// Set a bit in NodeMask for each active node. |
| 292 | void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { |
| 293 | for (const BlockConstraint &LB : LiveBlocks) { |
| 294 | BlockFrequency Freq = BlockFrequencies[LB.Number]; |
| 295 | |
| 296 | // Live-in to block? |
| 297 | if (LB.Entry != DontCare) { |
| 298 | unsigned ib = bundles->getBundle(N: LB.Number, Out: false); |
| 299 | activate(n: ib); |
| 300 | nodes[ib].addBias(freq: Freq, direction: LB.Entry); |
| 301 | } |
| 302 | |
| 303 | // Live-out from block? |
| 304 | if (LB.Exit != DontCare) { |
| 305 | unsigned ob = bundles->getBundle(N: LB.Number, Out: true); |
| 306 | activate(n: ob); |
| 307 | nodes[ob].addBias(freq: Freq, direction: LB.Exit); |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | /// addPrefSpill - Same as addConstraints(PrefSpill) |
| 313 | void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { |
| 314 | for (unsigned B : Blocks) { |
| 315 | BlockFrequency Freq = BlockFrequencies[B]; |
| 316 | if (Strong) |
| 317 | Freq += Freq; |
| 318 | unsigned ib = bundles->getBundle(N: B, Out: false); |
| 319 | unsigned ob = bundles->getBundle(N: B, Out: true); |
| 320 | activate(n: ib); |
| 321 | activate(n: ob); |
| 322 | nodes[ib].addBias(freq: Freq, direction: PrefSpill); |
| 323 | nodes[ob].addBias(freq: Freq, direction: PrefSpill); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { |
| 328 | for (unsigned Number : Links) { |
| 329 | unsigned ib = bundles->getBundle(N: Number, Out: false); |
| 330 | unsigned ob = bundles->getBundle(N: Number, Out: true); |
| 331 | |
| 332 | // Ignore self-loops. |
| 333 | if (ib == ob) |
| 334 | continue; |
| 335 | activate(n: ib); |
| 336 | activate(n: ob); |
| 337 | BlockFrequency Freq = BlockFrequencies[Number]; |
| 338 | nodes[ib].addLink(b: ob, w: Freq); |
| 339 | nodes[ob].addLink(b: ib, w: Freq); |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | bool SpillPlacement::scanActiveBundles() { |
| 344 | RecentPositive.clear(); |
| 345 | for (unsigned n : ActiveNodes->set_bits()) { |
| 346 | update(n); |
| 347 | // A node that must spill, or a node without any links is not going to |
| 348 | // change its value ever again, so exclude it from iterations. |
| 349 | if (nodes[n].mustSpill()) |
| 350 | continue; |
| 351 | if (nodes[n].preferReg()) |
| 352 | RecentPositive.push_back(Elt: n); |
| 353 | } |
| 354 | return !RecentPositive.empty(); |
| 355 | } |
| 356 | |
| 357 | bool SpillPlacement::update(unsigned n) { |
| 358 | if (!nodes[n].update(nodes: nodes.get(), Threshold)) |
| 359 | return false; |
| 360 | nodes[n].getDissentingNeighbors(List&: TodoList, nodes: nodes.get()); |
| 361 | return true; |
| 362 | } |
| 363 | |
| 364 | /// iterate - Repeatedly update the Hopfield nodes until stability or the |
| 365 | /// maximum number of iterations is reached. |
| 366 | void SpillPlacement::iterate() { |
| 367 | // We do not need to push those node in the todolist. |
| 368 | // They are already been proceeded as part of the previous iteration. |
| 369 | RecentPositive.clear(); |
| 370 | |
| 371 | // Since the last iteration, the todolist have been augmented by calls |
| 372 | // to addConstraints, addLinks, and co. |
| 373 | // Update the network energy starting at this new frontier. |
| 374 | // The call to ::update will add the nodes that changed into the todolist. |
| 375 | unsigned Limit = bundles->getNumBundles() * 10; |
| 376 | while(Limit-- > 0 && !TodoList.empty()) { |
| 377 | unsigned n = TodoList.pop_back_val(); |
| 378 | if (!update(n)) |
| 379 | continue; |
| 380 | if (nodes[n].preferReg()) |
| 381 | RecentPositive.push_back(Elt: n); |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | void SpillPlacement::prepare(BitVector &RegBundles) { |
| 386 | RecentPositive.clear(); |
| 387 | TodoList.clear(); |
| 388 | // Reuse RegBundles as our ActiveNodes vector. |
| 389 | ActiveNodes = &RegBundles; |
| 390 | ActiveNodes->clear(); |
| 391 | ActiveNodes->resize(N: bundles->getNumBundles()); |
| 392 | } |
| 393 | |
| 394 | bool |
| 395 | SpillPlacement::finish() { |
| 396 | assert(ActiveNodes && "Call prepare() first" ); |
| 397 | |
| 398 | // Write preferences back to ActiveNodes. |
| 399 | bool Perfect = true; |
| 400 | for (unsigned n : ActiveNodes->set_bits()) |
| 401 | if (!nodes[n].preferReg()) { |
| 402 | ActiveNodes->reset(Idx: n); |
| 403 | Perfect = false; |
| 404 | } |
| 405 | ActiveNodes = nullptr; |
| 406 | return Perfect; |
| 407 | } |
| 408 | |
| 409 | void SpillPlacement::BlockConstraint::print(raw_ostream &OS) const { |
| 410 | auto toString = [](BorderConstraint C) -> StringRef { |
| 411 | switch(C) { |
| 412 | case DontCare: return "DontCare" ; |
| 413 | case PrefReg: return "PrefReg" ; |
| 414 | case PrefSpill: return "PrefSpill" ; |
| 415 | case PrefBoth: return "PrefBoth" ; |
| 416 | case MustSpill: return "MustSpill" ; |
| 417 | }; |
| 418 | llvm_unreachable("uncovered switch" ); |
| 419 | }; |
| 420 | |
| 421 | dbgs() << "{" << Number << ", " |
| 422 | << toString(Entry) << ", " |
| 423 | << toString(Exit) << ", " |
| 424 | << (ChangesValue ? "changes" : "no change" ) << "}" ; |
| 425 | } |
| 426 | |
| 427 | void SpillPlacement::BlockConstraint::dump() const { |
| 428 | print(OS&: dbgs()); |
| 429 | dbgs() << "\n" ; |
| 430 | } |
| 431 | |