| 1 | //===---------------------- AMDGPUNextUseAnalysis.h ----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements Next Use Analysis. |
| 10 | // |
| 11 | // For each register it goes over all uses and returns the estimated distance of |
| 12 | // the nearest use. This will be used for selecting which registers to spill |
| 13 | // before register allocation. |
| 14 | // |
| 15 | // This is based on ideas from the paper: |
| 16 | // "Register Spilling and Live-Range Splitting for SSA-Form Programs" |
| 17 | // Matthias Braun and Sebastian Hack, CC'09 |
| 18 | // |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | |
| 21 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H |
| 22 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H |
| 23 | |
| 24 | #include "SIInstrInfo.h" |
| 25 | #include "SIRegisterInfo.h" |
| 26 | #include "llvm/CodeGen/LiveIntervals.h" |
| 27 | #include "llvm/CodeGen/LiveVariables.h" |
| 28 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 29 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 30 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 31 | #include "llvm/IR/PassManager.h" |
| 32 | #include "llvm/Support/Format.h" |
| 33 | #include "llvm/Support/JSON.h" |
| 34 | #include <limits> |
| 35 | #include <optional> |
| 36 | |
| 37 | namespace llvm { |
| 38 | |
| 39 | class AMDGPUNextUseAnalysisImpl; |
| 40 | |
| 41 | //============================================================================== |
| 42 | // NextUseDistance - Represents a distance in the next-use analysis. Currently |
| 43 | // wraps a 64-bit int with special encoding for loop depth and unreachable |
| 44 | // distances. |
| 45 | //============================================================================== |
| 46 | class NextUseDistance { |
| 47 | public: |
| 48 | constexpr static NextUseDistance unreachable() { |
| 49 | return NextUseDistance(std::numeric_limits<int64_t>::max()); |
| 50 | } |
| 51 | |
| 52 | constexpr static NextUseDistance fromSize(unsigned Size, unsigned Depth) { |
| 53 | return NextUseDistance(Size).applyLoopWeight(Depth); |
| 54 | } |
| 55 | |
| 56 | constexpr NextUseDistance(unsigned V) : Value(V) {} |
| 57 | constexpr NextUseDistance(int V) : Value(V) {} |
| 58 | constexpr NextUseDistance(const NextUseDistance &B) : Value(B.Value) {} |
| 59 | |
| 60 | constexpr bool isUnreachable() const { return *this == unreachable(); } |
| 61 | constexpr bool isReachable() const { return !isUnreachable(); } |
| 62 | |
| 63 | //---------------------------------------------------------------------------- |
| 64 | // Assignment |
| 65 | //---------------------------------------------------------------------------- |
| 66 | constexpr NextUseDistance &operator=(const NextUseDistance &B) { |
| 67 | Value = B.Value; |
| 68 | return *this; |
| 69 | } |
| 70 | |
| 71 | constexpr NextUseDistance &operator=(unsigned V) { |
| 72 | Value = V; |
| 73 | return *this; |
| 74 | } |
| 75 | |
| 76 | constexpr NextUseDistance &operator=(int V) { |
| 77 | Value = V; |
| 78 | return *this; |
| 79 | } |
| 80 | |
| 81 | //---------------------------------------------------------------------------- |
| 82 | // Arithmetic operators |
| 83 | //---------------------------------------------------------------------------- |
| 84 | constexpr NextUseDistance &operator+=(const NextUseDistance &B) { |
| 85 | Value += B.Value; |
| 86 | return *this; |
| 87 | } |
| 88 | |
| 89 | constexpr NextUseDistance &operator-=(const NextUseDistance &B) { |
| 90 | Value -= B.Value; |
| 91 | return *this; |
| 92 | } |
| 93 | |
| 94 | constexpr NextUseDistance operator-() const { |
| 95 | return NextUseDistance(-Value); |
| 96 | } |
| 97 | |
| 98 | constexpr NextUseDistance applyLoopWeight() const { |
| 99 | NextUseDistance W = fromLoopDepth(Depth: 1); |
| 100 | if (W.isUnreachable()) |
| 101 | return unreachable(); |
| 102 | constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max(); |
| 103 | if (Value != 0 && W.Value > MaxVal / Value) |
| 104 | return unreachable(); |
| 105 | return NextUseDistance(Value * W.Value); |
| 106 | } |
| 107 | |
| 108 | //---------------------------------------------------------------------------- |
| 109 | // Comparison operators |
| 110 | //---------------------------------------------------------------------------- |
| 111 | constexpr bool operator<(const NextUseDistance &B) const { |
| 112 | return Value < B.Value; |
| 113 | } |
| 114 | |
| 115 | constexpr bool operator>(const NextUseDistance &B) const { |
| 116 | return Value > B.Value; |
| 117 | } |
| 118 | |
| 119 | constexpr bool operator<=(const NextUseDistance &B) const { |
| 120 | return Value <= B.Value; |
| 121 | } |
| 122 | |
| 123 | constexpr bool operator>=(const NextUseDistance &B) const { |
| 124 | return Value >= B.Value; |
| 125 | } |
| 126 | |
| 127 | constexpr bool operator==(const NextUseDistance &B) const { |
| 128 | return Value == B.Value; |
| 129 | } |
| 130 | |
| 131 | constexpr bool operator!=(const NextUseDistance &B) const { |
| 132 | return Value != B.Value; |
| 133 | } |
| 134 | |
| 135 | //---------------------------------------------------------------------------- |
| 136 | // Debugging |
| 137 | //---------------------------------------------------------------------------- |
| 138 | format_object<int64_t> fmt() const { return format(Fmt: "%ld" , Vals: Value); } |
| 139 | |
| 140 | void print(raw_ostream &OS) const { |
| 141 | if (isUnreachable()) |
| 142 | OS << "<unreachable>" ; |
| 143 | else |
| 144 | OS << fmt(); |
| 145 | } |
| 146 | |
| 147 | json::Value toJsonValue() const { |
| 148 | if (isUnreachable()) |
| 149 | return "<unreachable>" ; |
| 150 | return Value; |
| 151 | } |
| 152 | |
| 153 | std::string toString() const { |
| 154 | std::string Str; |
| 155 | llvm::raw_string_ostream OS(Str); |
| 156 | print(OS); |
| 157 | return OS.str(); |
| 158 | } |
| 159 | |
| 160 | constexpr int64_t getRawValue() const { return Value; } |
| 161 | using RawValueType = int64_t; |
| 162 | |
| 163 | private: |
| 164 | friend class AMDGPUNextUseAnalysisImpl; |
| 165 | int64_t Value; |
| 166 | constexpr explicit NextUseDistance(int64_t V) : Value(V) {} |
| 167 | |
| 168 | constexpr static NextUseDistance fromLoopDepth(unsigned Depth) { |
| 169 | const unsigned Shift = 7 * Depth; |
| 170 | |
| 171 | // Saturate? |
| 172 | if (Shift >= 63) |
| 173 | return unreachable(); |
| 174 | |
| 175 | // This implementation is multiplicative (f(a+b) == f(a) * f(b)) which we |
| 176 | // take advantage of below in applyLoopWeight(Depth). |
| 177 | return NextUseDistance(int64_t(1) << Shift); |
| 178 | } |
| 179 | |
| 180 | // Semantically: apply fromLoopDepth(1) Depth times (compositional). |
| 181 | // |
| 182 | // Optimized to take advantage of multiplicative implementation of |
| 183 | // fromLoopDepth - a single multiply by fromLoopDepth(Depth) gives the same |
| 184 | // result. If fromLoopDepth is changed to a non-multiplicative formula, |
| 185 | // replace the body with something like: |
| 186 | // |
| 187 | // NextUseDistance D = *this; |
| 188 | // for (unsigned I = 0; I < Depth; ++I) { |
| 189 | // D = D.applyLoopWeight(); |
| 190 | // if (D.isUnreachable()) |
| 191 | // return unreachable(); |
| 192 | // } |
| 193 | // return D; |
| 194 | // |
| 195 | constexpr NextUseDistance applyLoopWeight(unsigned Depth) const { |
| 196 | if (!Depth) |
| 197 | return *this; |
| 198 | NextUseDistance W = fromLoopDepth(Depth); |
| 199 | if (W.isUnreachable()) |
| 200 | return unreachable(); |
| 201 | constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max(); |
| 202 | if (Value != 0 && W.Value > MaxVal / Value) |
| 203 | return unreachable(); |
| 204 | return NextUseDistance(Value * W.Value); |
| 205 | } |
| 206 | }; |
| 207 | |
| 208 | constexpr inline NextUseDistance operator+(NextUseDistance A, |
| 209 | const NextUseDistance &B) { |
| 210 | return A += B; |
| 211 | } |
| 212 | |
| 213 | constexpr inline NextUseDistance operator-(NextUseDistance A, |
| 214 | const NextUseDistance &B) { |
| 215 | return A -= B; |
| 216 | } |
| 217 | |
| 218 | constexpr inline NextUseDistance min(NextUseDistance A, NextUseDistance B) { |
| 219 | return A < B ? A : B; |
| 220 | } |
| 221 | |
| 222 | constexpr inline NextUseDistance max(NextUseDistance A, NextUseDistance B) { |
| 223 | return A > B ? A : B; |
| 224 | } |
| 225 | |
| 226 | //============================================================================== |
| 227 | // AMDGPUNextUseAnalysis - Provides next-use distances for live registers or |
| 228 | // sub-registers at a given MachineInstruction suitable for making spilling |
| 229 | // decisions. |
| 230 | //============================================================================== |
| 231 | class AMDGPUNextUseAnalysis { |
| 232 | friend class AMDGPUNextUseAnalysisLegacyPass; |
| 233 | friend class AMDGPUNextUseAnalysisPrinterLegacyPass; |
| 234 | friend class AMDGPUNextUseAnalysisPass; |
| 235 | friend class AMDGPUNextUseAnalysisPrinterPass; |
| 236 | |
| 237 | std::unique_ptr<AMDGPUNextUseAnalysisImpl> Impl; |
| 238 | |
| 239 | AMDGPUNextUseAnalysis(const MachineFunction *, const MachineLoopInfo *); |
| 240 | |
| 241 | public: |
| 242 | AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other); |
| 243 | ~AMDGPUNextUseAnalysis(); |
| 244 | |
| 245 | AMDGPUNextUseAnalysis &operator=(AMDGPUNextUseAnalysis &&Other); |
| 246 | |
| 247 | // Configuration flags for controlling the distance model. Defaults correspond |
| 248 | // to the Graphics preset. |
| 249 | struct Config { |
| 250 | // Count PHI instructions as having non-zero cost (distance and block |
| 251 | // size). When false, all PHIs share ID 0 and don't contribute to block |
| 252 | // size. |
| 253 | bool CountPhis = true; |
| 254 | |
| 255 | // Restrict inter-block distances to forward-reachable paths only. |
| 256 | // When false, distances through back-edges are also considered. |
| 257 | bool ForwardOnly = true; |
| 258 | |
| 259 | // Model PHI uses as belonging to their incoming edge's block, and apply |
| 260 | // full loop-aware reachability filtering including intermediate-def |
| 261 | // checks. When false, a simple same-block / forward-reachable check is |
| 262 | // used. |
| 263 | bool PreciseUseModeling = false; |
| 264 | |
| 265 | // Promote uses that are inside a loop not yet entered or inside a directly |
| 266 | // nested inner loop to the end of that loop's preheader. This models the |
| 267 | // assumption that a spilled value will be reloaded at the preheader rather |
| 268 | // than at the actual use site. When false, direct shortest distance to the |
| 269 | // use is used instead. |
| 270 | bool = false; |
| 271 | |
| 272 | /// Named presets. See note in AMDGPUNextUseAnalysis.cpp associated with |
| 273 | /// 'amdgpu-next-use-analysis-config' regarding the historical context for |
| 274 | /// these. |
| 275 | static Config Graphics() { return {}; } |
| 276 | static Config Compute() { |
| 277 | Config Cfg; |
| 278 | Cfg.CountPhis = false; |
| 279 | Cfg.ForwardOnly = false; |
| 280 | Cfg.PreciseUseModeling = true; |
| 281 | Cfg.PromoteToPreheader = true; |
| 282 | return Cfg; |
| 283 | } |
| 284 | }; |
| 285 | |
| 286 | Config getConfig() const; |
| 287 | void setConfig(Config); |
| 288 | |
| 289 | void getReachableUses(Register LiveReg, LaneBitmask LaneMask, |
| 290 | const MachineInstr &MI, |
| 291 | SmallVector<const MachineOperand *> &Uses) const; |
| 292 | |
| 293 | /// \Returns the shortest next-use distance from \p CurMI for \p LiveReg. |
| 294 | NextUseDistance |
| 295 | getShortestDistance(Register LiveReg, const MachineInstr &CurMI, |
| 296 | const SmallVector<const MachineOperand *> &Uses, |
| 297 | const MachineOperand **ShortestUseOut = nullptr, |
| 298 | SmallVector<NextUseDistance> *Distances = nullptr) const; |
| 299 | |
| 300 | struct UseDistancePair { |
| 301 | const MachineOperand *Use = nullptr; |
| 302 | NextUseDistance Dist = 0; |
| 303 | UseDistancePair() = default; |
| 304 | UseDistancePair(const MachineOperand *Use, NextUseDistance Dist) |
| 305 | : Use(Use), Dist(Dist) {} |
| 306 | }; |
| 307 | |
| 308 | void getNextUseDistances(const DenseMap<unsigned, LaneBitmask> &LiveRegs, |
| 309 | const MachineInstr &MI, UseDistancePair &Furthest, |
| 310 | UseDistancePair *FurthestSubreg = nullptr, |
| 311 | DenseMap<const MachineOperand *, UseDistancePair> |
| 312 | *RelevantUses = nullptr) const; |
| 313 | }; |
| 314 | |
| 315 | //============================================================================== |
| 316 | // AMDGPUNextUseAnalysisLegacyPass - Legacy and New pass wrapper around |
| 317 | // AMDGPUNextUseAnalysis |
| 318 | //============================================================================== |
| 319 | class AMDGPUNextUseAnalysisLegacyPass : public MachineFunctionPass { |
| 320 | |
| 321 | public: |
| 322 | static char ID; |
| 323 | |
| 324 | AMDGPUNextUseAnalysisLegacyPass(); |
| 325 | |
| 326 | AMDGPUNextUseAnalysis &getNextUseAnalysis() { return *NUA; } |
| 327 | const AMDGPUNextUseAnalysis &getNextUseAnalysis() const { return *NUA; } |
| 328 | StringRef getPassName() const override; |
| 329 | |
| 330 | protected: |
| 331 | bool runOnMachineFunction(MachineFunction &) override; |
| 332 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 333 | |
| 334 | private: |
| 335 | std::unique_ptr<AMDGPUNextUseAnalysis> NUA; |
| 336 | }; |
| 337 | |
| 338 | class AMDGPUNextUseAnalysisPass |
| 339 | : public AnalysisInfoMixin<AMDGPUNextUseAnalysisPass> { |
| 340 | friend AnalysisInfoMixin<AMDGPUNextUseAnalysisPass>; |
| 341 | static AnalysisKey Key; |
| 342 | |
| 343 | public: |
| 344 | using Result = AMDGPUNextUseAnalysis; |
| 345 | Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); |
| 346 | }; |
| 347 | |
| 348 | //============================================================================== |
| 349 | // AMDGPUNextUseAnalysisPrinterLegacyPass - Legacy Pass for printing |
| 350 | // AMDGPUNextUseAnalysis results as JSON. |
| 351 | //============================================================================== |
| 352 | class AMDGPUNextUseAnalysisPrinterLegacyPass : public MachineFunctionPass { |
| 353 | |
| 354 | public: |
| 355 | static char ID; |
| 356 | |
| 357 | AMDGPUNextUseAnalysisPrinterLegacyPass(); |
| 358 | |
| 359 | StringRef getPassName() const override; |
| 360 | |
| 361 | protected: |
| 362 | bool runOnMachineFunction(MachineFunction &) override; |
| 363 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 364 | }; |
| 365 | |
| 366 | class AMDGPUNextUseAnalysisPrinterPass |
| 367 | : public RequiredPassInfoMixin<AMDGPUNextUseAnalysisPrinterPass> { |
| 368 | raw_ostream &OS; |
| 369 | |
| 370 | public: |
| 371 | explicit AMDGPUNextUseAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} |
| 372 | PreservedAnalyses run(MachineFunction &MF, |
| 373 | MachineFunctionAnalysisManager &MFAM); |
| 374 | }; |
| 375 | |
| 376 | } // namespace llvm |
| 377 | #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H |
| 378 | |