1//===---------------------- AMDGPUNextUseAnalysis.h ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements Next Use Analysis.
10//
11// For each register it goes over all uses and returns the estimated distance of
12// the nearest use. This will be used for selecting which registers to spill
13// before register allocation.
14//
15// This is based on ideas from the paper:
16// "Register Spilling and Live-Range Splitting for SSA-Form Programs"
17// Matthias Braun and Sebastian Hack, CC'09
18//
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H
22#define LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H
23
24#include "SIInstrInfo.h"
25#include "SIRegisterInfo.h"
26#include "llvm/CodeGen/LiveIntervals.h"
27#include "llvm/CodeGen/LiveVariables.h"
28#include "llvm/CodeGen/MachineLoopInfo.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/TargetRegisterInfo.h"
31#include "llvm/IR/PassManager.h"
32#include "llvm/Support/Format.h"
33#include "llvm/Support/JSON.h"
34#include <limits>
35#include <optional>
36
37namespace llvm {
38
39class AMDGPUNextUseAnalysisImpl;
40
41//==============================================================================
42// NextUseDistance - Represents a distance in the next-use analysis. Currently
43// wraps a 64-bit int with special encoding for loop depth and unreachable
44// distances.
45//==============================================================================
46class NextUseDistance {
47public:
48 constexpr static NextUseDistance unreachable() {
49 return NextUseDistance(std::numeric_limits<int64_t>::max());
50 }
51
52 constexpr static NextUseDistance fromSize(unsigned Size, unsigned Depth) {
53 return NextUseDistance(Size).applyLoopWeight(Depth);
54 }
55
56 constexpr NextUseDistance(unsigned V) : Value(V) {}
57 constexpr NextUseDistance(int V) : Value(V) {}
58 constexpr NextUseDistance(const NextUseDistance &B) : Value(B.Value) {}
59
60 constexpr bool isUnreachable() const { return *this == unreachable(); }
61 constexpr bool isReachable() const { return !isUnreachable(); }
62
63 //----------------------------------------------------------------------------
64 // Assignment
65 //----------------------------------------------------------------------------
66 constexpr NextUseDistance &operator=(const NextUseDistance &B) {
67 Value = B.Value;
68 return *this;
69 }
70
71 constexpr NextUseDistance &operator=(unsigned V) {
72 Value = V;
73 return *this;
74 }
75
76 constexpr NextUseDistance &operator=(int V) {
77 Value = V;
78 return *this;
79 }
80
81 //----------------------------------------------------------------------------
82 // Arithmetic operators
83 //----------------------------------------------------------------------------
84 constexpr NextUseDistance &operator+=(const NextUseDistance &B) {
85 Value += B.Value;
86 return *this;
87 }
88
89 constexpr NextUseDistance &operator-=(const NextUseDistance &B) {
90 Value -= B.Value;
91 return *this;
92 }
93
94 constexpr NextUseDistance operator-() const {
95 return NextUseDistance(-Value);
96 }
97
98 constexpr NextUseDistance applyLoopWeight() const {
99 NextUseDistance W = fromLoopDepth(Depth: 1);
100 if (W.isUnreachable())
101 return unreachable();
102 constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max();
103 if (Value != 0 && W.Value > MaxVal / Value)
104 return unreachable();
105 return NextUseDistance(Value * W.Value);
106 }
107
108 //----------------------------------------------------------------------------
109 // Comparison operators
110 //----------------------------------------------------------------------------
111 constexpr bool operator<(const NextUseDistance &B) const {
112 return Value < B.Value;
113 }
114
115 constexpr bool operator>(const NextUseDistance &B) const {
116 return Value > B.Value;
117 }
118
119 constexpr bool operator<=(const NextUseDistance &B) const {
120 return Value <= B.Value;
121 }
122
123 constexpr bool operator>=(const NextUseDistance &B) const {
124 return Value >= B.Value;
125 }
126
127 constexpr bool operator==(const NextUseDistance &B) const {
128 return Value == B.Value;
129 }
130
131 constexpr bool operator!=(const NextUseDistance &B) const {
132 return Value != B.Value;
133 }
134
135 //----------------------------------------------------------------------------
136 // Debugging
137 //----------------------------------------------------------------------------
138 format_object<int64_t> fmt() const { return format(Fmt: "%ld", Vals: Value); }
139
140 void print(raw_ostream &OS) const {
141 if (isUnreachable())
142 OS << "<unreachable>";
143 else
144 OS << fmt();
145 }
146
147 json::Value toJsonValue() const {
148 if (isUnreachable())
149 return "<unreachable>";
150 return Value;
151 }
152
153 std::string toString() const {
154 std::string Str;
155 llvm::raw_string_ostream OS(Str);
156 print(OS);
157 return OS.str();
158 }
159
160 constexpr int64_t getRawValue() const { return Value; }
161 using RawValueType = int64_t;
162
163private:
164 friend class AMDGPUNextUseAnalysisImpl;
165 int64_t Value;
166 constexpr explicit NextUseDistance(int64_t V) : Value(V) {}
167
168 constexpr static NextUseDistance fromLoopDepth(unsigned Depth) {
169 const unsigned Shift = 7 * Depth;
170
171 // Saturate?
172 if (Shift >= 63)
173 return unreachable();
174
175 // This implementation is multiplicative (f(a+b) == f(a) * f(b)) which we
176 // take advantage of below in applyLoopWeight(Depth).
177 return NextUseDistance(int64_t(1) << Shift);
178 }
179
180 // Semantically: apply fromLoopDepth(1) Depth times (compositional).
181 //
182 // Optimized to take advantage of multiplicative implementation of
183 // fromLoopDepth - a single multiply by fromLoopDepth(Depth) gives the same
184 // result. If fromLoopDepth is changed to a non-multiplicative formula,
185 // replace the body with something like:
186 //
187 // NextUseDistance D = *this;
188 // for (unsigned I = 0; I < Depth; ++I) {
189 // D = D.applyLoopWeight();
190 // if (D.isUnreachable())
191 // return unreachable();
192 // }
193 // return D;
194 //
195 constexpr NextUseDistance applyLoopWeight(unsigned Depth) const {
196 if (!Depth)
197 return *this;
198 NextUseDistance W = fromLoopDepth(Depth);
199 if (W.isUnreachable())
200 return unreachable();
201 constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max();
202 if (Value != 0 && W.Value > MaxVal / Value)
203 return unreachable();
204 return NextUseDistance(Value * W.Value);
205 }
206};
207
208constexpr inline NextUseDistance operator+(NextUseDistance A,
209 const NextUseDistance &B) {
210 return A += B;
211}
212
213constexpr inline NextUseDistance operator-(NextUseDistance A,
214 const NextUseDistance &B) {
215 return A -= B;
216}
217
218constexpr inline NextUseDistance min(NextUseDistance A, NextUseDistance B) {
219 return A < B ? A : B;
220}
221
222constexpr inline NextUseDistance max(NextUseDistance A, NextUseDistance B) {
223 return A > B ? A : B;
224}
225
226//==============================================================================
227// AMDGPUNextUseAnalysis - Provides next-use distances for live registers or
228// sub-registers at a given MachineInstruction suitable for making spilling
229// decisions.
230//==============================================================================
231class AMDGPUNextUseAnalysis {
232 friend class AMDGPUNextUseAnalysisLegacyPass;
233 friend class AMDGPUNextUseAnalysisPrinterLegacyPass;
234 friend class AMDGPUNextUseAnalysisPass;
235 friend class AMDGPUNextUseAnalysisPrinterPass;
236
237 std::unique_ptr<AMDGPUNextUseAnalysisImpl> Impl;
238
239 AMDGPUNextUseAnalysis(const MachineFunction *, const MachineLoopInfo *);
240
241public:
242 AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other);
243 ~AMDGPUNextUseAnalysis();
244
245 AMDGPUNextUseAnalysis &operator=(AMDGPUNextUseAnalysis &&Other);
246
247 // Configuration flags for controlling the distance model. Defaults correspond
248 // to the Graphics preset.
249 struct Config {
250 // Count PHI instructions as having non-zero cost (distance and block
251 // size). When false, all PHIs share ID 0 and don't contribute to block
252 // size.
253 bool CountPhis = true;
254
255 // Restrict inter-block distances to forward-reachable paths only.
256 // When false, distances through back-edges are also considered.
257 bool ForwardOnly = true;
258
259 // Model PHI uses as belonging to their incoming edge's block, and apply
260 // full loop-aware reachability filtering including intermediate-def
261 // checks. When false, a simple same-block / forward-reachable check is
262 // used.
263 bool PreciseUseModeling = false;
264
265 // Promote uses that are inside a loop not yet entered or inside a directly
266 // nested inner loop to the end of that loop's preheader. This models the
267 // assumption that a spilled value will be reloaded at the preheader rather
268 // than at the actual use site. When false, direct shortest distance to the
269 // use is used instead.
270 bool PromoteToPreheader = false;
271
272 /// Named presets. See note in AMDGPUNextUseAnalysis.cpp associated with
273 /// 'amdgpu-next-use-analysis-config' regarding the historical context for
274 /// these.
275 static Config Graphics() { return {}; }
276 static Config Compute() {
277 Config Cfg;
278 Cfg.CountPhis = false;
279 Cfg.ForwardOnly = false;
280 Cfg.PreciseUseModeling = true;
281 Cfg.PromoteToPreheader = true;
282 return Cfg;
283 }
284 };
285
286 Config getConfig() const;
287 void setConfig(Config);
288
289 void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
290 const MachineInstr &MI,
291 SmallVector<const MachineOperand *> &Uses) const;
292
293 /// \Returns the shortest next-use distance from \p CurMI for \p LiveReg.
294 NextUseDistance
295 getShortestDistance(Register LiveReg, const MachineInstr &CurMI,
296 const SmallVector<const MachineOperand *> &Uses,
297 const MachineOperand **ShortestUseOut = nullptr,
298 SmallVector<NextUseDistance> *Distances = nullptr) const;
299
300 struct UseDistancePair {
301 const MachineOperand *Use = nullptr;
302 NextUseDistance Dist = 0;
303 UseDistancePair() = default;
304 UseDistancePair(const MachineOperand *Use, NextUseDistance Dist)
305 : Use(Use), Dist(Dist) {}
306 };
307
308 void getNextUseDistances(const DenseMap<unsigned, LaneBitmask> &LiveRegs,
309 const MachineInstr &MI, UseDistancePair &Furthest,
310 UseDistancePair *FurthestSubreg = nullptr,
311 DenseMap<const MachineOperand *, UseDistancePair>
312 *RelevantUses = nullptr) const;
313};
314
315//==============================================================================
316// AMDGPUNextUseAnalysisLegacyPass - Legacy and New pass wrapper around
317// AMDGPUNextUseAnalysis
318//==============================================================================
319class AMDGPUNextUseAnalysisLegacyPass : public MachineFunctionPass {
320
321public:
322 static char ID;
323
324 AMDGPUNextUseAnalysisLegacyPass();
325
326 AMDGPUNextUseAnalysis &getNextUseAnalysis() { return *NUA; }
327 const AMDGPUNextUseAnalysis &getNextUseAnalysis() const { return *NUA; }
328 StringRef getPassName() const override;
329
330protected:
331 bool runOnMachineFunction(MachineFunction &) override;
332 void getAnalysisUsage(AnalysisUsage &AU) const override;
333
334private:
335 std::unique_ptr<AMDGPUNextUseAnalysis> NUA;
336};
337
338class AMDGPUNextUseAnalysisPass
339 : public AnalysisInfoMixin<AMDGPUNextUseAnalysisPass> {
340 friend AnalysisInfoMixin<AMDGPUNextUseAnalysisPass>;
341 static AnalysisKey Key;
342
343public:
344 using Result = AMDGPUNextUseAnalysis;
345 Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
346};
347
348//==============================================================================
349// AMDGPUNextUseAnalysisPrinterLegacyPass - Legacy Pass for printing
350// AMDGPUNextUseAnalysis results as JSON.
351//==============================================================================
352class AMDGPUNextUseAnalysisPrinterLegacyPass : public MachineFunctionPass {
353
354public:
355 static char ID;
356
357 AMDGPUNextUseAnalysisPrinterLegacyPass();
358
359 StringRef getPassName() const override;
360
361protected:
362 bool runOnMachineFunction(MachineFunction &) override;
363 void getAnalysisUsage(AnalysisUsage &AU) const override;
364};
365
366class AMDGPUNextUseAnalysisPrinterPass
367 : public RequiredPassInfoMixin<AMDGPUNextUseAnalysisPrinterPass> {
368 raw_ostream &OS;
369
370public:
371 explicit AMDGPUNextUseAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
372 PreservedAnalyses run(MachineFunction &MF,
373 MachineFunctionAnalysisManager &MFAM);
374};
375
376} // namespace llvm
377#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H
378