1//===- GCNRegPressure.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the GCNRegPressure class, which tracks registry pressure
11/// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It
12/// also implements a compare function, which compares different register
13/// pressures, and declares one with max occupancy as winner.
14///
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
18#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
19
20#include "GCNSubtarget.h"
21#include "llvm/CodeGen/LiveIntervals.h"
22#include "llvm/CodeGen/RegisterPressure.h"
23#include <algorithm>
24
25namespace llvm {
26
27class MachineRegisterInfo;
28class raw_ostream;
29class SlotIndex;
30
31struct GCNRegPressure {
32 enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
33
34 GCNRegPressure() {
35 clear();
36 }
37
38 bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
39
40 void clear() { std::fill(first: &Value[0], last: &Value[ValueArraySize], value: 0); }
41
42 /// \returns the SGPR32 pressure
43 unsigned getSGPRNum() const { return Value[SGPR]; }
44 /// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p
45 /// UnifiedVGPRFile
46 unsigned getVGPRNum(bool UnifiedVGPRFile) const {
47 if (UnifiedVGPRFile) {
48 return Value[AGPR] ? getUnifiedVGPRNum(NumArchVGPRs: Value[VGPR], NumAGPRs: Value[AGPR])
49 : Value[VGPR];
50 }
51 return std::max(a: Value[VGPR], b: Value[AGPR]);
52 }
53
54 /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
55 /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
56 inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
57 unsigned NumAGPRs) {
58 return alignTo(Value: NumArchVGPRs, Align: AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
59 NumAGPRs;
60 }
61
62 /// \returns the ArchVGPR32 pressure
63 unsigned getArchVGPRNum() const { return Value[VGPR]; }
64 /// \returns the AccVGPR32 pressure
65 unsigned getAGPRNum() const { return Value[AGPR]; }
66
67 unsigned getVGPRTuplesWeight() const {
68 return std::max(a: Value[TOTAL_KINDS + VGPR], b: Value[TOTAL_KINDS + AGPR]);
69 }
70 unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
71
72 unsigned getOccupancy(const GCNSubtarget &ST,
73 unsigned DynamicVGPRBlockSize) const {
74 return std::min(a: ST.getOccupancyWithNumSGPRs(SGPRs: getSGPRNum()),
75 b: ST.getOccupancyWithNumVGPRs(VGPRs: getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()),
76 DynamicVGPRBlockSize));
77 }
78
79 void inc(unsigned Reg,
80 LaneBitmask PrevMask,
81 LaneBitmask NewMask,
82 const MachineRegisterInfo &MRI);
83
84 bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
85 unsigned DynamicVGPRBlockSize) const {
86 return getOccupancy(ST, DynamicVGPRBlockSize) >
87 O.getOccupancy(ST, DynamicVGPRBlockSize);
88 }
89
90 /// Compares \p this GCNRegpressure to \p O, returning true if \p this is
91 /// less. Since GCNRegpressure contains different types of pressures, and due
92 /// to target-specific pecularities (e.g. we care about occupancy rather than
93 /// raw register usage), we determine if \p this GCNRegPressure is less than
94 /// \p O based on the following tiered comparisons (in order order of
95 /// precedence):
96 /// 1. Better occupancy
97 /// 2. Less spilling (first preference to VGPR spills, then to SGPR spills)
98 /// 3. Less tuple register pressure (first preference to VGPR tuples if we
99 /// determine that SGPR pressure is not important)
100 /// 4. Less raw register pressure (first preference to VGPR tuples if we
101 /// determine that SGPR pressure is not important)
102 bool less(const MachineFunction &MF, const GCNRegPressure &O,
103 unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
104
105 bool operator==(const GCNRegPressure &O) const {
106 return std::equal(first1: &Value[0], last1: &Value[ValueArraySize], first2: O.Value);
107 }
108
109 bool operator!=(const GCNRegPressure &O) const {
110 return !(*this == O);
111 }
112
113 GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
114 for (unsigned I = 0; I < ValueArraySize; ++I)
115 Value[I] += RHS.Value[I];
116 return *this;
117 }
118
119 GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
120 for (unsigned I = 0; I < ValueArraySize; ++I)
121 Value[I] -= RHS.Value[I];
122 return *this;
123 }
124
125 void dump() const;
126
127private:
128 static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2;
129
130 /// Pressure for all register kinds (first all regular registers kinds, then
131 /// all tuple register kinds).
132 unsigned Value[ValueArraySize];
133
134 static unsigned getRegKind(const TargetRegisterClass *RC,
135 const SIRegisterInfo *STI);
136
137 friend GCNRegPressure max(const GCNRegPressure &P1,
138 const GCNRegPressure &P2);
139
140 friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
141 unsigned DynamicVGPRBlockSize);
142};
143
144inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
145 GCNRegPressure Res;
146 for (unsigned I = 0; I < GCNRegPressure::ValueArraySize; ++I)
147 Res.Value[I] = std::max(a: P1.Value[I], b: P2.Value[I]);
148 return Res;
149}
150
151inline GCNRegPressure operator+(const GCNRegPressure &P1,
152 const GCNRegPressure &P2) {
153 GCNRegPressure Sum = P1;
154 Sum += P2;
155 return Sum;
156}
157
158inline GCNRegPressure operator-(const GCNRegPressure &P1,
159 const GCNRegPressure &P2) {
160 GCNRegPressure Diff = P1;
161 Diff -= P2;
162 return Diff;
163}
164
165////////////////////////////////////////////////////////////////////////////////
166// GCNRPTarget
167
168/// Models a register pressure target, allowing to evaluate and track register
169/// savings against that target from a starting \ref GCNRegPressure.
170class GCNRPTarget {
171public:
172 /// Sets up the target such that the register pressure starting at \p RP does
173 /// not show register spilling on function \p MF (w.r.t. the function's
174 /// mininum target occupancy).
175 GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
176 bool CombineVGPRSavings = false);
177
178 /// Sets up the target such that the register pressure starting at \p RP does
179 /// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p
180 /// MF.
181 GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF,
182 const GCNRegPressure &RP, bool CombineVGPRSavings = false);
183
184 /// Sets up the target such that the register pressure starting at \p RP does
185 /// not prevent achieving an occupancy of at least \p Occupancy on function
186 /// \p MF.
187 GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
188 const GCNRegPressure &RP, bool CombineVGPRSavings = false);
189
190 const GCNRegPressure &getCurrentRP() const { return RP; }
191
192 void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
193
194 /// Determines whether saving virtual register \p Reg will be beneficial
195 /// towards achieving the RP target.
196 bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
197
198 /// Saves virtual register \p Reg with lanemask \p Mask.
199 void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
200 RP.inc(Reg, PrevMask: Mask, NewMask: LaneBitmask::getNone(), MRI);
201 }
202
203 /// Whether the current RP is at or below the defined pressure target.
204 bool satisfied() const;
205
206#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207 friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
208 OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
209 << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
210 << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
211 << " AGPRs";
212
213 if (Target.MaxUnifiedVGPRs) {
214 OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
215 << " VGPRs (unified)";
216 } else if (Target.CombineVGPRSavings) {
217 OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
218 << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
219 }
220 return OS;
221 }
222#endif
223
224private:
225 /// Current register pressure.
226 GCNRegPressure RP;
227
228 /// Target number of SGPRs.
229 unsigned MaxSGPRs;
230 /// Target number of ArchVGPRs and AGPRs.
231 unsigned MaxVGPRs;
232 /// Target number of overall VGPRs for subtargets with unified RFs. Always 0
233 /// for subtargets with non-unified RFs.
234 unsigned MaxUnifiedVGPRs;
235 /// Whether we consider that the register allocator will be able to swap
236 /// between ArchVGPRs and AGPRs by copying them to a super register class.
237 /// Concretely, this allows savings in one of the VGPR banks to help toward
238 /// savings in the other VGPR bank.
239 bool CombineVGPRSavings;
240
241 inline bool satisifiesVGPRBanksTarget() const {
242 assert(CombineVGPRSavings && "only makes sense with combined savings");
243 return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
244 }
245
246 /// Always satisified when the subtarget doesn't have a unified RF.
247 inline bool satisfiesUnifiedTarget() const {
248 return !MaxUnifiedVGPRs || RP.getVGPRNum(UnifiedVGPRFile: true) <= MaxUnifiedVGPRs;
249 }
250
251 inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
252 return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
253 (CombineVGPRSavings && !satisifiesVGPRBanksTarget());
254 }
255
256 void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
257 const MachineFunction &MF);
258};
259
260///////////////////////////////////////////////////////////////////////////////
261// GCNRPTracker
262
263class GCNRPTracker {
264public:
265 using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
266
267protected:
268 const LiveIntervals &LIS;
269 LiveRegSet LiveRegs;
270 GCNRegPressure CurPressure, MaxPressure;
271 const MachineInstr *LastTrackedMI = nullptr;
272 mutable const MachineRegisterInfo *MRI = nullptr;
273
274 GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
275
276 void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy,
277 bool After);
278
279 /// Mostly copy/paste from CodeGen/RegisterPressure.cpp
280 void bumpDeadDefs(ArrayRef<VRegMaskOrUnit> DeadDefs);
281
282 LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const;
283
284public:
285 // reset tracker and set live register set to the specified value.
286 void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
287 // live regs for the current state
288 const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
289 const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
290
291 void clearMaxPressure() { MaxPressure.clear(); }
292
293 GCNRegPressure getPressure() const { return CurPressure; }
294
295 decltype(LiveRegs) moveLiveRegs() {
296 return std::move(LiveRegs);
297 }
298};
299
300GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
301 const MachineRegisterInfo &MRI);
302
303////////////////////////////////////////////////////////////////////////////////
304// GCNUpwardRPTracker
305
306class GCNUpwardRPTracker : public GCNRPTracker {
307public:
308 GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
309
310 using GCNRPTracker::reset;
311
312 /// reset tracker at the specified slot index \p SI.
313 void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
314 GCNRPTracker::reset(MRI_: MRI, LiveRegs_: llvm::getLiveRegs(SI, LIS, MRI));
315 }
316
317 /// reset tracker to the end of the \p MBB.
318 void reset(const MachineBasicBlock &MBB) {
319 reset(MRI: MBB.getParent()->getRegInfo(),
320 SI: LIS.getSlotIndexes()->getMBBEndIdx(mbb: &MBB));
321 }
322
323 /// reset tracker to the point just after \p MI (in program order).
324 void reset(const MachineInstr &MI) {
325 reset(MRI: MI.getMF()->getRegInfo(), SI: LIS.getInstructionIndex(Instr: MI).getDeadSlot());
326 }
327
328 /// Move to the state of RP just before the \p MI . If \p UseInternalIterator
329 /// is set, also update the internal iterators. Setting \p UseInternalIterator
330 /// to false allows for an externally managed iterator / program order.
331 void recede(const MachineInstr &MI);
332
333 /// \p returns whether the tracker's state after receding MI corresponds
334 /// to reported by LIS.
335 bool isValid() const;
336
337 const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
338
339 void resetMaxPressure() { MaxPressure = CurPressure; }
340
341 GCNRegPressure getMaxPressureAndReset() {
342 GCNRegPressure RP = MaxPressure;
343 resetMaxPressure();
344 return RP;
345 }
346};
347
348////////////////////////////////////////////////////////////////////////////////
349// GCNDownwardRPTracker
350
351class GCNDownwardRPTracker : public GCNRPTracker {
352 // Last position of reset or advanceBeforeNext
353 MachineBasicBlock::const_iterator NextMI;
354
355 MachineBasicBlock::const_iterator MBBEnd;
356
357public:
358 GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
359
360 using GCNRPTracker::reset;
361
362 MachineBasicBlock::const_iterator getNext() const { return NextMI; }
363
364 /// \p return MaxPressure and clear it.
365 GCNRegPressure moveMaxPressure() {
366 auto Res = MaxPressure;
367 MaxPressure.clear();
368 return Res;
369 }
370
371 /// Reset tracker to the point before the \p MI
372 /// filling \p LiveRegs upon this point using LIS.
373 /// \p returns false if block is empty except debug values.
374 bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
375
376 /// Move to the state right before the next MI or after the end of MBB.
377 /// \p returns false if reached end of the block.
378 /// If \p UseInternalIterator is true, then internal iterators are used and
379 /// set to process in program order. If \p UseInternalIterator is false, then
380 /// it is assumed that the tracker is using an externally managed iterator,
381 /// and advance* calls will not update the state of the iterator. In such
382 /// cases, the tracker will move to the state right before the provided \p MI
383 /// and use LIS for RP calculations.
384 bool advanceBeforeNext(MachineInstr *MI = nullptr,
385 bool UseInternalIterator = true);
386
387 /// Move to the state at the MI, advanceBeforeNext has to be called first.
388 /// If \p UseInternalIterator is true, then internal iterators are used and
389 /// set to process in program order. If \p UseInternalIterator is false, then
390 /// it is assumed that the tracker is using an externally managed iterator,
391 /// and advance* calls will not update the state of the iterator. In such
392 /// cases, the tracker will move to the state at the provided \p MI .
393 void advanceToNext(MachineInstr *MI = nullptr,
394 bool UseInternalIterator = true);
395
396 /// Move to the state at the next MI. \p returns false if reached end of
397 /// block. If \p UseInternalIterator is true, then internal iterators are used
398 /// and set to process in program order. If \p UseInternalIterator is false,
399 /// then it is assumed that the tracker is using an externally managed
400 /// iterator, and advance* calls will not update the state of the iterator. In
401 /// such cases, the tracker will move to the state right before the provided
402 /// \p MI and use LIS for RP calculations.
403 bool advance(MachineInstr *MI = nullptr, bool UseInternalIterator = true);
404
405 /// Advance instructions until before \p End.
406 bool advance(MachineBasicBlock::const_iterator End);
407
408 /// Reset to \p Begin and advance to \p End.
409 bool advance(MachineBasicBlock::const_iterator Begin,
410 MachineBasicBlock::const_iterator End,
411 const LiveRegSet *LiveRegsCopy = nullptr);
412
413 /// Mostly copy/paste from CodeGen/RegisterPressure.cpp
414 /// Calculate the impact \p MI will have on CurPressure and \return the
415 /// speculated pressure. In order to support RP Speculation, this does not
416 /// rely on the implicit program ordering in the LiveIntervals.
417 GCNRegPressure bumpDownwardPressure(const MachineInstr *MI,
418 const SIRegisterInfo *TRI) const;
419};
420
421/// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the
422/// active lanes of \p LaneMaskFilter will be set in the return value. This is
423/// used, for example, to limit the live lanes to a specific subreg when
424/// calculating use masks.
425LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
426 const LiveIntervals &LIS,
427 const MachineRegisterInfo &MRI,
428 LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
429
430LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
431 const MachineRegisterInfo &MRI,
432 LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
433
434GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
435 const MachineRegisterInfo &MRI);
436
437/// creates a map MachineInstr -> LiveRegSet
438/// R - range of iterators on instructions
439/// After - upon entry or exit of every instruction
440/// Note: there is no entry in the map for instructions with empty live reg set
441/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
442template <typename Range>
443DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
444getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
445 std::vector<SlotIndex> Indexes;
446 Indexes.reserve(n: std::distance(R.begin(), R.end()));
447 auto &SII = *LIS.getSlotIndexes();
448 for (MachineInstr *I : R) {
449 auto SI = SII.getInstructionIndex(MI: *I);
450 Indexes.push_back(x: After ? SI.getDeadSlot() : SI.getBaseIndex());
451 }
452 llvm::sort(C&: Indexes);
453
454 auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
455 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
456 SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
457 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
458 auto Reg = Register::index2VirtReg(Index: I);
459 if (!LIS.hasInterval(Reg))
460 continue;
461 auto &LI = LIS.getInterval(Reg);
462 LiveIdxs.clear();
463 if (!LI.findIndexesLiveAt(R&: Indexes, O: std::back_inserter(x&: LiveIdxs)))
464 continue;
465 if (!LI.hasSubRanges()) {
466 for (auto SI : LiveIdxs)
467 LiveRegMap[SII.getInstructionFromIndex(index: SI)][Reg] =
468 MRI.getMaxLaneMaskForVReg(Reg);
469 } else
470 for (const auto &S : LI.subranges()) {
471 // constrain search for subranges by indexes live at main range
472 SRLiveIdxs.clear();
473 S.findIndexesLiveAt(R&: LiveIdxs, O: std::back_inserter(x&: SRLiveIdxs));
474 for (auto SI : SRLiveIdxs)
475 LiveRegMap[SII.getInstructionFromIndex(index: SI)][Reg] |= S.LaneMask;
476 }
477 }
478 return LiveRegMap;
479}
480
481inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
482 const LiveIntervals &LIS) {
483 return getLiveRegs(SI: LIS.getInstructionIndex(Instr: MI).getDeadSlot(), LIS,
484 MRI: MI.getParent()->getParent()->getRegInfo());
485}
486
487inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI,
488 const LiveIntervals &LIS) {
489 return getLiveRegs(SI: LIS.getInstructionIndex(Instr: MI).getBaseIndex(), LIS,
490 MRI: MI.getParent()->getParent()->getRegInfo());
491}
492
493template <typename Range>
494GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
495 Range &&LiveRegs) {
496 GCNRegPressure Res;
497 for (const auto &RM : LiveRegs)
498 Res.inc(Reg: RM.first, PrevMask: LaneBitmask::getNone(), NewMask: RM.second, MRI);
499 return Res;
500}
501
502bool isEqual(const GCNRPTracker::LiveRegSet &S1,
503 const GCNRPTracker::LiveRegSet &S2);
504
505Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr,
506 unsigned DynamicVGPRBlockSize = 0);
507
508Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
509 const MachineRegisterInfo &MRI);
510
511Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
512 const GCNRPTracker::LiveRegSet &TrackedL,
513 const TargetRegisterInfo *TRI, StringRef Pfx = " ");
514
515struct GCNRegPressurePrinter : public MachineFunctionPass {
516 static char ID;
517
518public:
519 GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
520
521 bool runOnMachineFunction(MachineFunction &MF) override;
522
523 void getAnalysisUsage(AnalysisUsage &AU) const override {
524 AU.addRequired<LiveIntervalsWrapperPass>();
525 AU.setPreservesAll();
526 MachineFunctionPass::getAnalysisUsage(AU);
527 }
528};
529
530} // end namespace llvm
531
532#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
533