1//===- GCNRegPressure.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the GCNRegPressure class.
11///
12//===----------------------------------------------------------------------===//
13
14#include "GCNRegPressure.h"
15#include "AMDGPU.h"
16#include "SIMachineFunctionInfo.h"
17#include "llvm/CodeGen/MachineLoopInfo.h"
18#include "llvm/CodeGen/RegisterPressure.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "machine-scheduler"
23
24bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
25 const GCNRPTracker::LiveRegSet &S2) {
26 if (S1.size() != S2.size())
27 return false;
28
29 for (const auto &P : S1) {
30 auto I = S2.find(Val: P.first);
31 if (I == S2.end() || I->second != P.second)
32 return false;
33 }
34 return true;
35}
36
37///////////////////////////////////////////////////////////////////////////////
38// GCNRegPressure
39
40unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
41 const SIRegisterInfo *STI) {
42 return STI->isSGPRClass(RC)
43 ? SGPR
44 : (STI->isAGPRClass(RC)
45 ? AGPR
46 : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
47}
48
49void GCNRegPressure::inc(unsigned Reg,
50 LaneBitmask PrevMask,
51 LaneBitmask NewMask,
52 const MachineRegisterInfo &MRI) {
53 unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(LM: NewMask);
54 unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(LM: PrevMask);
55 if (NewNumCoveredRegs == PrevNumCoveredRegs)
56 return;
57
58 int Sign = 1;
59 if (NewMask < PrevMask) {
60 std::swap(a&: NewMask, b&: PrevMask);
61 std::swap(a&: NewNumCoveredRegs, b&: PrevNumCoveredRegs);
62 Sign = -1;
63 }
64 assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
65 "prev mask should always be lesser than new");
66
67 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
68 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
69 const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
70 unsigned RegKind = getRegKind(RC, STI);
71 if (TRI->getRegSizeInBits(RC: *RC) != 32) {
72 // Reg is from a tuple register class.
73 if (PrevMask.none()) {
74 unsigned TupleIdx = TOTAL_KINDS + RegKind;
75 Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
76 }
77 // Pressure scales with number of new registers covered by the new mask.
78 // Note when true16 is enabled, we can no longer safely use the following
79 // approach to calculate the difference in the number of 32-bit registers
80 // between two masks:
81 //
82 // Sign *= SIRegisterInfo::getNumCoveredRegs(~PrevMask & NewMask);
83 //
84 // The issue is that the mask calculation `~PrevMask & NewMask` doesn't
85 // properly account for partial usage of a 32-bit register when dealing with
86 // 16-bit registers.
87 //
88 // Consider this example:
89 // Assume PrevMask = 0b0010 and NewMask = 0b1111. Here, the correct register
90 // usage difference should be 1, because even though PrevMask uses only half
91 // of a 32-bit register, it should still be counted as a full register use.
92 // However, the mask calculation yields `~PrevMask & NewMask = 0b1101`, and
93 // calling `getNumCoveredRegs` returns 2 instead of 1. This incorrect
94 // calculation can lead to integer overflow when Sign = -1.
95 Sign *= NewNumCoveredRegs - PrevNumCoveredRegs;
96 }
97 Value[RegKind] += Sign;
98}
99
100namespace {
101struct RegExcess {
102 unsigned SGPR = 0;
103 unsigned VGPR = 0;
104 unsigned ArchVGPR = 0;
105 unsigned AGPR = 0;
106
107 bool anyExcess() const { return SGPR || VGPR || ArchVGPR || AGPR; }
108 bool hasVectorRegisterExcess() const { return VGPR || ArchVGPR || AGPR; }
109
110 RegExcess(const MachineFunction &MF, const GCNRegPressure &RP)
111 : RegExcess(MF, RP, GCNRPTarget(MF, RP)) {}
112 RegExcess(const MachineFunction &MF, const GCNRegPressure &RP,
113 const GCNRPTarget &Target) {
114 unsigned MaxSGPRs = Target.getMaxSGPRs();
115 unsigned MaxVGPRs = Target.getMaxVGPRs();
116
117 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
118 SGPR = std::max(a: static_cast<int>(RP.getSGPRNum() - MaxSGPRs), b: 0);
119
120 // The number of virtual VGPRs required to handle excess SGPR
121 unsigned WaveSize = ST.getWavefrontSize();
122 unsigned VGPRForSGPRSpills = divideCeil(Numerator: SGPR, Denominator: WaveSize);
123
124 unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
125
126 // Unified excess pressure conditions, accounting for VGPRs used for SGPR
127 // spills
128 VGPR = std::max(a: static_cast<int>(RP.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) +
129 VGPRForSGPRSpills - MaxVGPRs),
130 b: 0);
131
132 unsigned ArchVGPRLimit = ST.hasGFX90AInsts() ? MaxArchVGPRs : MaxVGPRs;
133 // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
134 // spills
135 ArchVGPR = std::max(a: static_cast<int>(RP.getArchVGPRNum() +
136 VGPRForSGPRSpills - ArchVGPRLimit),
137 b: 0);
138
139 // AGPR excess pressure conditions
140 AGPR = std::max(a: static_cast<int>(RP.getAGPRNum() - ArchVGPRLimit), b: 0);
141 }
142};
143} // namespace
144
145bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
146 unsigned MaxOccupancy) const {
147 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
148 unsigned DynamicVGPRBlockSize =
149 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
150
151 const auto SGPROcc = std::min(a: MaxOccupancy,
152 b: ST.getOccupancyWithNumSGPRs(SGPRs: getSGPRNum()));
153 const auto VGPROcc = std::min(
154 a: MaxOccupancy, b: ST.getOccupancyWithNumVGPRs(VGPRs: getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()),
155 DynamicVGPRBlockSize));
156 const auto OtherSGPROcc = std::min(a: MaxOccupancy,
157 b: ST.getOccupancyWithNumSGPRs(SGPRs: O.getSGPRNum()));
158 const auto OtherVGPROcc =
159 std::min(a: MaxOccupancy,
160 b: ST.getOccupancyWithNumVGPRs(VGPRs: O.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()),
161 DynamicVGPRBlockSize));
162
163 const auto Occ = std::min(a: SGPROcc, b: VGPROcc);
164 const auto OtherOcc = std::min(a: OtherSGPROcc, b: OtherVGPROcc);
165
166 // Give first precedence to the better occupancy.
167 if (Occ != OtherOcc)
168 return Occ > OtherOcc;
169
170 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
171
172 RegExcess Excess(MF, *this);
173 RegExcess OtherExcess(MF, O);
174
175 unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
176
177 bool ExcessRP = Excess.anyExcess();
178 bool OtherExcessRP = OtherExcess.anyExcess();
179
180 // Give second precedence to the reduced number of spills to hold the register
181 // pressure.
182 if (ExcessRP || OtherExcessRP) {
183 // The difference in excess VGPR pressure, after including VGPRs used for
184 // SGPR spills
185 int VGPRDiff =
186 ((OtherExcess.VGPR + OtherExcess.ArchVGPR + OtherExcess.AGPR) -
187 (Excess.VGPR + Excess.ArchVGPR + Excess.AGPR));
188
189 int SGPRDiff = OtherExcess.SGPR - Excess.SGPR;
190
191 if (VGPRDiff != 0)
192 return VGPRDiff > 0;
193 if (SGPRDiff != 0) {
194 unsigned PureExcessVGPR =
195 std::max(a: static_cast<int>(getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) - MaxVGPRs),
196 b: 0) +
197 std::max(a: static_cast<int>(getVGPRNum(UnifiedVGPRFile: false) - MaxArchVGPRs), b: 0);
198 unsigned OtherPureExcessVGPR =
199 std::max(
200 a: static_cast<int>(O.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) - MaxVGPRs),
201 b: 0) +
202 std::max(a: static_cast<int>(O.getVGPRNum(UnifiedVGPRFile: false) - MaxArchVGPRs), b: 0);
203
204 // If we have a special case where there is a tie in excess VGPR, but one
205 // of the pressures has VGPR usage from SGPR spills, prefer the pressure
206 // with SGPR spills.
207 if (PureExcessVGPR != OtherPureExcessVGPR)
208 return SGPRDiff < 0;
209 // If both pressures have the same excess pressure before and after
210 // accounting for SGPR spills, prefer fewer SGPR spills.
211 return SGPRDiff > 0;
212 }
213 }
214
215 bool SGPRImportant = SGPROcc < VGPROcc;
216 const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
217
218 // If both pressures disagree on what is more important compare vgprs.
219 if (SGPRImportant != OtherSGPRImportant) {
220 SGPRImportant = false;
221 }
222
223 // Give third precedence to lower register tuple pressure.
224 bool SGPRFirst = SGPRImportant;
225 for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
226 if (SGPRFirst) {
227 auto SW = getSGPRTuplesWeight();
228 auto OtherSW = O.getSGPRTuplesWeight();
229 if (SW != OtherSW)
230 return SW < OtherSW;
231 } else {
232 auto VW = getVGPRTuplesWeight();
233 auto OtherVW = O.getVGPRTuplesWeight();
234 if (VW != OtherVW)
235 return VW < OtherVW;
236 }
237 }
238
239 // Give final precedence to lower general RP.
240 return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
241 (getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) <
242 O.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()));
243}
244
245Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
246 unsigned DynamicVGPRBlockSize) {
247 return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
248 OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
249 << "AGPRs: " << RP.getAGPRNum();
250 if (ST)
251 OS << "(O"
252 << ST->getOccupancyWithNumVGPRs(VGPRs: RP.getVGPRNum(UnifiedVGPRFile: ST->hasGFX90AInsts()),
253 DynamicVGPRBlockSize)
254 << ')';
255 OS << ", SGPRs: " << RP.getSGPRNum();
256 if (ST)
257 OS << "(O" << ST->getOccupancyWithNumSGPRs(SGPRs: RP.getSGPRNum()) << ')';
258 OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
259 << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
260 if (ST)
261 OS << " -> Occ: " << RP.getOccupancy(ST: *ST, DynamicVGPRBlockSize);
262 OS << '\n';
263 });
264}
265
266static LaneBitmask getDefRegMask(const MachineOperand &MO,
267 const MachineRegisterInfo &MRI) {
268 assert(MO.isDef() && MO.isReg() && MO.getReg().isVirtual());
269
270 // We don't rely on read-undef flag because in case of tentative schedule
271 // tracking it isn't set correctly yet. This works correctly however since
272 // use mask has been tracked before using LIS.
273 return MO.getSubReg() == 0 ?
274 MRI.getMaxLaneMaskForVReg(Reg: MO.getReg()) :
275 MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubIdx: MO.getSubReg());
276}
277
278static void
279collectVirtualRegUses(SmallVectorImpl<VRegMaskOrUnit> &VRegMaskOrUnits,
280 const MachineInstr &MI, const LiveIntervals &LIS,
281 const MachineRegisterInfo &MRI) {
282
283 auto &TRI = *MRI.getTargetRegisterInfo();
284 for (const auto &MO : MI.operands()) {
285 if (!MO.isReg() || !MO.getReg().isVirtual())
286 continue;
287 if (!MO.isUse() || !MO.readsReg())
288 continue;
289
290 Register Reg = MO.getReg();
291 auto I = llvm::find_if(Range&: VRegMaskOrUnits, P: [Reg](const VRegMaskOrUnit &RM) {
292 return RM.VRegOrUnit.asVirtualReg() == Reg;
293 });
294
295 auto &P = I == VRegMaskOrUnits.end()
296 ? VRegMaskOrUnits.emplace_back(Args: VirtRegOrUnit(Reg),
297 Args: LaneBitmask::getNone())
298 : *I;
299
300 P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(SubIdx: MO.getSubReg())
301 : MRI.getMaxLaneMaskForVReg(Reg);
302 }
303
304 SlotIndex InstrSI;
305 for (auto &P : VRegMaskOrUnits) {
306 auto &LI = LIS.getInterval(Reg: P.VRegOrUnit.asVirtualReg());
307 if (!LI.hasSubRanges())
308 continue;
309
310 // For a tentative schedule LIS isn't updated yet but livemask should
311 // remain the same on any schedule. Subreg defs can be reordered but they
312 // all must dominate uses anyway.
313 if (!InstrSI)
314 InstrSI = LIS.getInstructionIndex(Instr: MI).getBaseIndex();
315
316 P.LaneMask = getLiveLaneMask(LI, SI: InstrSI, MRI, LaneMaskFilter: P.LaneMask);
317 }
318}
319
320/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
321static LaneBitmask getLanesWithProperty(
322 const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
323 bool TrackLaneMasks, Register Reg, SlotIndex Pos,
324 function_ref<bool(const LiveRange &LR, SlotIndex Pos)> Property) {
325 assert(Reg.isVirtual());
326 const LiveInterval &LI = LIS.getInterval(Reg);
327 LaneBitmask Result;
328 if (TrackLaneMasks && LI.hasSubRanges()) {
329 for (const LiveInterval::SubRange &SR : LI.subranges()) {
330 if (Property(SR, Pos))
331 Result |= SR.LaneMask;
332 }
333 } else if (Property(LI, Pos)) {
334 Result =
335 TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(Reg) : LaneBitmask::getAll();
336 }
337
338 return Result;
339}
340
341/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
342/// Helper to find a vreg use between two indices {PriorUseIdx, NextUseIdx}.
343/// The query starts with a lane bitmask which gets lanes/bits removed for every
344/// use we find.
345static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
346 SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
347 const MachineRegisterInfo &MRI,
348 const SIRegisterInfo *TRI,
349 const LiveIntervals *LIS,
350 bool Upward = false) {
351 for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
352 if (MO.isUndef())
353 continue;
354 const MachineInstr *MI = MO.getParent();
355 SlotIndex InstSlot = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
356 bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
357 : (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
358 if (!InRange)
359 continue;
360
361 unsigned SubRegIdx = MO.getSubReg();
362 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubIdx: SubRegIdx);
363 LastUseMask &= ~UseMask;
364 if (LastUseMask.none())
365 return LaneBitmask::getNone();
366 }
367 return LastUseMask;
368}
369
370////////////////////////////////////////////////////////////////////////////////
371// GCNRPTarget
372
373GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP)
374 : GCNRPTarget(RP, MF) {
375 const Function &F = MF.getFunction();
376 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
377 setTarget(NumSGPRs: ST.getMaxNumSGPRs(F), NumVGPRs: ST.getMaxNumVGPRs(F));
378}
379
380GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
381 const MachineFunction &MF, const GCNRegPressure &RP)
382 : GCNRPTarget(RP, MF) {
383 setTarget(NumSGPRs, NumVGPRs);
384}
385
386GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
387 const GCNRegPressure &RP)
388 : GCNRPTarget(RP, MF) {
389 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
390 unsigned DynamicVGPRBlockSize =
391 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
392 setTarget(NumSGPRs: ST.getMaxNumSGPRs(WavesPerEU: Occupancy, /*Addressable=*/false),
393 NumVGPRs: ST.getMaxNumVGPRs(WavesPerEU: Occupancy, DynamicVGPRBlockSize));
394}
395
396void GCNRPTarget::setTarget(unsigned NumSGPRs, unsigned NumVGPRs) {
397 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
398 MaxSGPRs = std::min(a: ST.getAddressableNumSGPRs(), b: NumSGPRs);
399 MaxVGPRs = std::min(a: ST.getAddressableNumArchVGPRs(), b: NumVGPRs);
400 if (UnifiedRF) {
401 unsigned DynamicVGPRBlockSize =
402 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
403 MaxUnifiedVGPRs =
404 std::min(a: ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), b: NumVGPRs);
405 } else {
406 MaxUnifiedVGPRs = 0;
407 }
408}
409
410bool GCNRPTarget::isSaveBeneficial(Register Reg) const {
411 const MachineRegisterInfo &MRI = MF.getRegInfo();
412 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
413 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
414 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
415
416 RegExcess Excess(MF, RP, *this);
417
418 if (SRI->isSGPRClass(RC))
419 return Excess.SGPR;
420
421 if (SRI->isAGPRClass(RC))
422 return (UnifiedRF && Excess.VGPR) || Excess.AGPR;
423
424 return (UnifiedRF && Excess.VGPR) || Excess.ArchVGPR;
425}
426
427unsigned GCNRPTarget::getNumRegsBenefit(const GCNRegPressure &SaveRP) const {
428 RegExcess Excess(MF, RP, *this);
429 const unsigned NumVGPRAboveAddrLimit =
430 std::min(a: Excess.ArchVGPR, b: SaveRP.getArchVGPRNum()) +
431 std::min(a: Excess.AGPR, b: SaveRP.getAGPRNum());
432 unsigned NumRegsSaved =
433 std::min(a: Excess.SGPR, b: SaveRP.getSGPRNum()) + NumVGPRAboveAddrLimit;
434
435 if (UnifiedRF && Excess.VGPR) {
436 // We have already accounted for excess pressure above addressive limits for
437 // the individual VGPR classes. However for targets with unified RFs there
438 // is also a unified VGPR pressure (ArchVGPR + AGPR combination) limit to
439 // honor that may be more restrictive that the per-VGPR-class limits. We
440 // must also be careful not to double-count VGPR saves that may contribute
441 // to lowering pressure both above the addressable limit in their respective
442 // class as well as in the unified VGPR limit.
443 const unsigned VGPRSave = SaveRP.getArchVGPRNum() + SaveRP.getAGPRNum();
444 if (NumVGPRAboveAddrLimit < VGPRSave)
445 NumRegsSaved += std::min(a: Excess.VGPR, b: VGPRSave - NumVGPRAboveAddrLimit);
446 }
447
448 return NumRegsSaved;
449}
450
451bool GCNRPTarget::satisfied(const GCNRegPressure &TestRP) const {
452 if (TestRP.getSGPRNum() > MaxSGPRs || TestRP.getVGPRNum(UnifiedVGPRFile: false) > MaxVGPRs)
453 return false;
454 if (UnifiedRF && TestRP.getVGPRNum(UnifiedVGPRFile: true) > MaxUnifiedVGPRs)
455 return false;
456 return true;
457}
458
459bool GCNRPTarget::hasVectorRegisterExcess() const {
460 RegExcess Excess(MF, RP, *this);
461 return Excess.hasVectorRegisterExcess();
462}
463
464///////////////////////////////////////////////////////////////////////////////
465// GCNRPTracker
466
467LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
468 const LiveIntervals &LIS,
469 const MachineRegisterInfo &MRI,
470 LaneBitmask LaneMaskFilter) {
471 return getLiveLaneMask(LI: LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
472}
473
474LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
475 const MachineRegisterInfo &MRI,
476 LaneBitmask LaneMaskFilter) {
477 LaneBitmask LiveMask;
478 if (LI.hasSubRanges()) {
479 for (const auto &S : LI.subranges())
480 if ((S.LaneMask & LaneMaskFilter).any() && S.liveAt(index: SI)) {
481 LiveMask |= S.LaneMask;
482 assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
483 }
484 } else if (LI.liveAt(index: SI)) {
485 LiveMask = MRI.getMaxLaneMaskForVReg(Reg: LI.reg());
486 }
487 LiveMask &= LaneMaskFilter;
488 return LiveMask;
489}
490
491GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
492 const LiveIntervals &LIS,
493 const MachineRegisterInfo &MRI,
494 GCNRegPressure::RegKind RegKind) {
495 GCNRPTracker::LiveRegSet LiveRegs;
496 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
497 auto Reg = Register::index2VirtReg(Index: I);
498 if (RegKind != GCNRegPressure::TOTAL_KINDS &&
499 GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
500 continue;
501 if (!LIS.hasInterval(Reg))
502 continue;
503 auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
504 if (LiveMask.any())
505 LiveRegs[Reg] = LiveMask;
506 }
507 return LiveRegs;
508}
509
510void GCNRPTracker::reset(const MachineInstr &MI,
511 const LiveRegSet *LiveRegsCopy,
512 bool After) {
513 const MachineFunction &MF = *MI.getMF();
514 MRI = &MF.getRegInfo();
515 if (LiveRegsCopy) {
516 if (&LiveRegs != LiveRegsCopy)
517 LiveRegs = *LiveRegsCopy;
518 } else {
519 LiveRegs = After ? getLiveRegsAfter(MI, LIS)
520 : getLiveRegsBefore(MI, LIS);
521 }
522
523 MaxPressure = CurPressure = getRegPressure(MRI: *MRI, LiveRegs);
524}
525
526void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
527 const LiveRegSet &LiveRegs_) {
528 MRI = &MRI_;
529 LiveRegs = LiveRegs_;
530 LastTrackedMI = nullptr;
531 MaxPressure = CurPressure = getRegPressure(MRI: MRI_, LiveRegs: LiveRegs_);
532}
533
534/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
535LaneBitmask GCNRPTracker::getLastUsedLanes(Register Reg, SlotIndex Pos) const {
536 return getLanesWithProperty(
537 LIS, MRI: *MRI, TrackLaneMasks: true, Reg, Pos: Pos.getBaseIndex(),
538 Property: [](const LiveRange &LR, SlotIndex Pos) {
539 const LiveRange::Segment *S = LR.getSegmentContaining(Idx: Pos);
540 return S != nullptr && S->end == Pos.getRegSlot();
541 });
542}
543
544////////////////////////////////////////////////////////////////////////////////
545// GCNUpwardRPTracker
546
547void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
548 assert(MRI && "call reset first");
549
550 LastTrackedMI = &MI;
551
552 if (MI.isDebugInstr())
553 return;
554
555 // Kill all defs.
556 GCNRegPressure DefPressure, ECDefPressure;
557 bool HasECDefs = false;
558 for (const MachineOperand &MO : MI.all_defs()) {
559 if (!MO.getReg().isVirtual())
560 continue;
561
562 Register Reg = MO.getReg();
563 LaneBitmask DefMask = getDefRegMask(MO, MRI: *MRI);
564
565 // Treat a def as fully live at the moment of definition: keep a record.
566 if (MO.isEarlyClobber()) {
567 ECDefPressure.inc(Reg, PrevMask: LaneBitmask::getNone(), NewMask: DefMask, MRI: *MRI);
568 HasECDefs = true;
569 } else
570 DefPressure.inc(Reg, PrevMask: LaneBitmask::getNone(), NewMask: DefMask, MRI: *MRI);
571
572 auto I = LiveRegs.find(Val: Reg);
573 if (I == LiveRegs.end())
574 continue;
575
576 LaneBitmask &LiveMask = I->second;
577 LaneBitmask PrevMask = LiveMask;
578 LiveMask &= ~DefMask;
579 CurPressure.inc(Reg, PrevMask, NewMask: LiveMask, MRI: *MRI);
580 if (LiveMask.none())
581 LiveRegs.erase(I);
582 }
583
584 // Update MaxPressure with defs pressure.
585 DefPressure += CurPressure;
586 if (HasECDefs)
587 DefPressure += ECDefPressure;
588 MaxPressure = max(P1: DefPressure, P2: MaxPressure);
589
590 // Make uses alive.
591 SmallVector<VRegMaskOrUnit, 8> RegUses;
592 collectVirtualRegUses(VRegMaskOrUnits&: RegUses, MI, LIS, MRI: *MRI);
593 for (const VRegMaskOrUnit &U : RegUses) {
594 LaneBitmask &LiveMask = LiveRegs[U.VRegOrUnit.asVirtualReg()];
595 LaneBitmask PrevMask = LiveMask;
596 LiveMask |= U.LaneMask;
597 CurPressure.inc(Reg: U.VRegOrUnit.asVirtualReg(), PrevMask, NewMask: LiveMask, MRI: *MRI);
598 }
599
600 // Update MaxPressure with uses plus early-clobber defs pressure.
601 MaxPressure = HasECDefs ? max(P1: CurPressure + ECDefPressure, P2: MaxPressure)
602 : max(P1: CurPressure, P2: MaxPressure);
603
604 assert(CurPressure == getRegPressure(*MRI, LiveRegs));
605}
606
607////////////////////////////////////////////////////////////////////////////////
608// GCNDownwardRPTracker
609
610bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
611 const LiveRegSet *LiveRegsCopy) {
612 MRI = &MI.getMF()->getRegInfo();
613 LastTrackedMI = nullptr;
614 MBBEnd = MI.getParent()->end();
615 NextMI = &MI;
616 NextMI = skipDebugInstructionsForward(It: NextMI, End: MBBEnd);
617 if (NextMI == MBBEnd)
618 return false;
619 GCNRPTracker::reset(MI: *NextMI, LiveRegsCopy, After: false);
620 return true;
621}
622
623bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
624 bool UseInternalIterator) {
625 assert(MRI && "call reset first");
626 SlotIndex SI;
627 const MachineInstr *CurrMI;
628 if (UseInternalIterator) {
629 if (!LastTrackedMI)
630 return NextMI == MBBEnd;
631
632 assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
633 CurrMI = LastTrackedMI;
634
635 SI = NextMI == MBBEnd
636 ? LIS.getInstructionIndex(Instr: *LastTrackedMI).getDeadSlot()
637 : LIS.getInstructionIndex(Instr: *NextMI).getBaseIndex();
638 } else { //! UseInternalIterator
639 SI = LIS.getInstructionIndex(Instr: *MI).getBaseIndex();
640 CurrMI = MI;
641 }
642
643 assert(SI.isValid());
644
645 // Remove dead registers or mask bits.
646 SmallSet<Register, 8> SeenRegs;
647 for (auto &MO : CurrMI->operands()) {
648 if (!MO.isReg() || !MO.getReg().isVirtual())
649 continue;
650 if (MO.isUse() && !MO.readsReg())
651 continue;
652 if (!UseInternalIterator && MO.isDef())
653 continue;
654 if (!SeenRegs.insert(V: MO.getReg()).second)
655 continue;
656 const LiveInterval &LI = LIS.getInterval(Reg: MO.getReg());
657 if (LI.hasSubRanges()) {
658 auto It = LiveRegs.end();
659 for (const auto &S : LI.subranges()) {
660 if (!S.liveAt(index: SI)) {
661 if (It == LiveRegs.end()) {
662 It = LiveRegs.find(Val: MO.getReg());
663 if (It == LiveRegs.end())
664 llvm_unreachable("register isn't live");
665 }
666 auto PrevMask = It->second;
667 It->second &= ~S.LaneMask;
668 CurPressure.inc(Reg: MO.getReg(), PrevMask, NewMask: It->second, MRI: *MRI);
669 }
670 }
671 if (It != LiveRegs.end() && It->second.none())
672 LiveRegs.erase(I: It);
673 } else if (!LI.liveAt(index: SI)) {
674 auto It = LiveRegs.find(Val: MO.getReg());
675 if (It == LiveRegs.end())
676 llvm_unreachable("register isn't live");
677 CurPressure.inc(Reg: MO.getReg(), PrevMask: It->second, NewMask: LaneBitmask::getNone(), MRI: *MRI);
678 LiveRegs.erase(I: It);
679 }
680 }
681
682 MaxPressure = max(P1: MaxPressure, P2: CurPressure);
683
684 LastTrackedMI = nullptr;
685
686 return UseInternalIterator && (NextMI == MBBEnd);
687}
688
689void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
690 bool UseInternalIterator) {
691 if (UseInternalIterator) {
692 LastTrackedMI = &*NextMI++;
693 NextMI = skipDebugInstructionsForward(It: NextMI, End: MBBEnd);
694 } else {
695 LastTrackedMI = MI;
696 }
697
698 const MachineInstr *CurrMI = LastTrackedMI;
699
700 // Add new registers or mask bits.
701 for (const auto &MO : CurrMI->all_defs()) {
702 Register Reg = MO.getReg();
703 if (!Reg.isVirtual())
704 continue;
705 auto &LiveMask = LiveRegs[Reg];
706 auto PrevMask = LiveMask;
707 LiveMask |= getDefRegMask(MO, MRI: *MRI);
708 CurPressure.inc(Reg, PrevMask, NewMask: LiveMask, MRI: *MRI);
709 }
710
711 MaxPressure = max(P1: MaxPressure, P2: CurPressure);
712}
713
714bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
715 if (UseInternalIterator && NextMI == MBBEnd)
716 return false;
717
718 advanceBeforeNext(MI, UseInternalIterator);
719 advanceToNext(MI, UseInternalIterator);
720 if (!UseInternalIterator) {
721 // We must remove any dead def lanes from the current RP
722 advanceBeforeNext(MI, UseInternalIterator: true);
723 }
724 return true;
725}
726
727bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator End) {
728 while (NextMI != End)
729 if (!advance()) return false;
730 return true;
731}
732
733bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
734 MachineBasicBlock::const_iterator End,
735 const LiveRegSet *LiveRegsCopy) {
736 reset(MI: *Begin, LiveRegsCopy);
737 return advance(End);
738}
739
740Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
741 const GCNRPTracker::LiveRegSet &TrackedLR,
742 const TargetRegisterInfo *TRI, StringRef Pfx) {
743 return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
744 for (auto const &P : TrackedLR) {
745 auto I = LISLR.find(Val: P.first);
746 if (I == LISLR.end()) {
747 OS << Pfx << printReg(Reg: P.first, TRI) << ":L" << PrintLaneMask(LaneMask: P.second)
748 << " isn't found in LIS reported set\n";
749 } else if (I->second != P.second) {
750 OS << Pfx << printReg(Reg: P.first, TRI)
751 << " masks doesn't match: LIS reported " << PrintLaneMask(LaneMask: I->second)
752 << ", tracked " << PrintLaneMask(LaneMask: P.second) << '\n';
753 }
754 }
755 for (auto const &P : LISLR) {
756 auto I = TrackedLR.find(Val: P.first);
757 if (I == TrackedLR.end()) {
758 OS << Pfx << printReg(Reg: P.first, TRI) << ":L" << PrintLaneMask(LaneMask: P.second)
759 << " isn't found in tracked set\n";
760 }
761 }
762 });
763}
764
765GCNRegPressure
766GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
767 const SIRegisterInfo *TRI) const {
768 assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
769
770 SlotIndex SlotIdx;
771 SlotIdx = LIS.getInstructionIndex(Instr: *MI).getRegSlot();
772
773 // Account for register pressure similar to RegPressureTracker::recede().
774 RegisterOperands RegOpers;
775 RegOpers.collect(MI: *MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: true, /*IgnoreDead=*/false);
776 RegOpers.adjustLaneLiveness(LIS, MRI: *MRI, Pos: SlotIdx);
777 GCNRegPressure TempPressure = CurPressure;
778
779 for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
780 if (!Use.VRegOrUnit.isVirtualReg())
781 continue;
782 Register Reg = Use.VRegOrUnit.asVirtualReg();
783 LaneBitmask LastUseMask = getLastUsedLanes(Reg, Pos: SlotIdx);
784 if (LastUseMask.none())
785 continue;
786 // The LastUseMask is queried from the liveness information of instruction
787 // which may be further down the schedule. Some lanes may actually not be
788 // last uses for the current position.
789 // FIXME: allow the caller to pass in the list of vreg uses that remain
790 // to be bottom-scheduled to avoid searching uses at each query.
791 SlotIndex CurrIdx;
792 const MachineBasicBlock *MBB = MI->getParent();
793 MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
794 It: LastTrackedMI ? LastTrackedMI : MBB->begin(), End: MBB->end());
795 if (IdxPos == MBB->end()) {
796 CurrIdx = LIS.getMBBEndIdx(mbb: MBB);
797 } else {
798 CurrIdx = LIS.getInstructionIndex(Instr: *IdxPos).getRegSlot();
799 }
800
801 LastUseMask =
802 findUseBetween(Reg, LastUseMask, PriorUseIdx: CurrIdx, NextUseIdx: SlotIdx, MRI: *MRI, TRI, LIS: &LIS);
803 if (LastUseMask.none())
804 continue;
805
806 auto It = LiveRegs.find(Val: Reg);
807 LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
808 LaneBitmask NewMask = LiveMask & ~LastUseMask;
809 TempPressure.inc(Reg, PrevMask: LiveMask, NewMask, MRI: *MRI);
810 }
811
812 // Generate liveness for defs.
813 for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
814 if (!Def.VRegOrUnit.isVirtualReg())
815 continue;
816 Register Reg = Def.VRegOrUnit.asVirtualReg();
817 auto It = LiveRegs.find(Val: Reg);
818 LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
819 LaneBitmask NewMask = LiveMask | Def.LaneMask;
820 TempPressure.inc(Reg, PrevMask: LiveMask, NewMask, MRI: *MRI);
821 }
822
823 return TempPressure;
824}
825
826bool GCNUpwardRPTracker::isValid() const {
827 const auto &SI = LIS.getInstructionIndex(Instr: *LastTrackedMI).getBaseIndex();
828 const auto LISLR = llvm::getLiveRegs(SI, LIS, MRI: *MRI);
829 const auto &TrackedLR = LiveRegs;
830
831 if (!isEqual(S1: LISLR, S2: TrackedLR)) {
832 dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
833 " LIS reported livesets mismatch:\n"
834 << print(LiveRegs: LISLR, MRI: *MRI);
835 reportMismatch(LISLR, TrackedLR, TRI: MRI->getTargetRegisterInfo());
836 return false;
837 }
838
839 auto LISPressure = getRegPressure(MRI: *MRI, LiveRegs: LISLR);
840 if (LISPressure != CurPressure) {
841 dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: "
842 << print(RP: CurPressure) << "LIS rpt: " << print(RP: LISPressure);
843 return false;
844 }
845 return true;
846}
847
848Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
849 const MachineRegisterInfo &MRI) {
850 return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
851 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
852 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
853 Register Reg = Register::index2VirtReg(Index: I);
854 auto It = LiveRegs.find(Val: Reg);
855 if (It != LiveRegs.end() && It->second.any())
856 OS << ' ' << printReg(Reg, TRI) << ':' << PrintLaneMask(LaneMask: It->second);
857 }
858 OS << '\n';
859 });
860}
861
862void GCNRegPressure::dump() const { dbgs() << print(RP: *this); }
863
864static cl::opt<bool> UseDownwardTracker(
865 "amdgpu-print-rp-downward",
866 cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
867 cl::init(Val: false), cl::Hidden);
868
869char llvm::GCNRegPressurePrinter::ID = 0;
870char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
871
872INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
873
874// Return lanemask of Reg's subregs that are live-through at [Begin, End] and
875// are fully covered by Mask.
876static LaneBitmask
877getRegLiveThroughMask(const MachineRegisterInfo &MRI, const LiveIntervals &LIS,
878 Register Reg, SlotIndex Begin, SlotIndex End,
879 LaneBitmask Mask = LaneBitmask::getAll()) {
880
881 auto IsInOneSegment = [Begin, End](const LiveRange &LR) -> bool {
882 auto *Segment = LR.getSegmentContaining(Idx: Begin);
883 return Segment && Segment->contains(I: End);
884 };
885
886 LaneBitmask LiveThroughMask;
887 const LiveInterval &LI = LIS.getInterval(Reg);
888 if (LI.hasSubRanges()) {
889 for (auto &SR : LI.subranges()) {
890 if ((SR.LaneMask & Mask) == SR.LaneMask && IsInOneSegment(SR))
891 LiveThroughMask |= SR.LaneMask;
892 }
893 } else {
894 LaneBitmask RegMask = MRI.getMaxLaneMaskForVReg(Reg);
895 if ((RegMask & Mask) == RegMask && IsInOneSegment(LI))
896 LiveThroughMask = RegMask;
897 }
898
899 return LiveThroughMask;
900}
901
902bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
903 const MachineRegisterInfo &MRI = MF.getRegInfo();
904 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
905 const LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
906
907 auto &OS = dbgs();
908
909// Leading spaces are important for YAML syntax.
910#define PFX " "
911
912 OS << "---\nname: " << MF.getName() << "\nbody: |\n";
913
914 auto printRP = [](const GCNRegPressure &RP) {
915 return Printable([&RP](raw_ostream &OS) {
916 OS << format(PFX " %-5d", Vals: RP.getSGPRNum())
917 << format(Fmt: " %-5d", Vals: RP.getVGPRNum(UnifiedVGPRFile: false));
918 });
919 };
920
921 auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
922 const GCNRPTracker::LiveRegSet &LISLR) {
923 if (LISLR != TrackedLR) {
924 OS << PFX " mis LIS: " << llvm::print(LiveRegs: LISLR, MRI)
925 << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
926 }
927 };
928
929 // Register pressure before and at an instruction (in program order).
930 SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
931
932 for (auto &MBB : MF) {
933 RP.clear();
934 RP.reserve(N: MBB.size());
935
936 OS << PFX;
937 MBB.printName(os&: OS);
938 OS << ":\n";
939
940 SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(mbb: &MBB);
941 SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(MBB: &MBB);
942
943 GCNRPTracker::LiveRegSet LiveIn, LiveOut;
944 GCNRegPressure RPAtMBBEnd;
945
946 if (UseDownwardTracker) {
947 if (MBB.empty()) {
948 LiveIn = LiveOut = getLiveRegs(SI: MBBStartSlot, LIS, MRI);
949 RPAtMBBEnd = getRegPressure(MRI, LiveRegs&: LiveIn);
950 } else {
951 GCNDownwardRPTracker RPT(LIS);
952 RPT.reset(MI: MBB.front());
953
954 LiveIn = RPT.getLiveRegs();
955
956 while (!RPT.advanceBeforeNext()) {
957 GCNRegPressure RPBeforeMI = RPT.getPressure();
958 RPT.advanceToNext();
959 RP.emplace_back(Args&: RPBeforeMI, Args: RPT.getPressure());
960 }
961
962 LiveOut = RPT.getLiveRegs();
963 RPAtMBBEnd = RPT.getPressure();
964 }
965 } else {
966 GCNUpwardRPTracker RPT(LIS);
967 RPT.reset(MRI, SI: MBBLastSlot);
968
969 LiveOut = RPT.getLiveRegs();
970 RPAtMBBEnd = RPT.getPressure();
971
972 for (auto &MI : reverse(C&: MBB)) {
973 RPT.resetMaxPressure();
974 RPT.recede(MI);
975 if (!MI.isDebugInstr())
976 RP.emplace_back(Args: RPT.getPressure(), Args: RPT.getMaxPressure());
977 }
978
979 LiveIn = RPT.getLiveRegs();
980 }
981
982 OS << PFX " Live-in: " << llvm::print(LiveRegs: LiveIn, MRI);
983 if (!UseDownwardTracker)
984 ReportLISMismatchIfAny(LiveIn, getLiveRegs(SI: MBBStartSlot, LIS, MRI));
985
986 OS << PFX " SGPR VGPR\n";
987 int I = 0;
988 for (auto &MI : MBB) {
989 if (!MI.isDebugInstr()) {
990 auto &[RPBeforeInstr, RPAtInstr] =
991 RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
992 ++I;
993 OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
994 } else
995 OS << PFX " ";
996 MI.print(OS);
997 }
998 OS << printRP(RPAtMBBEnd) << '\n';
999
1000 OS << PFX " Live-out:" << llvm::print(LiveRegs: LiveOut, MRI);
1001 if (UseDownwardTracker)
1002 ReportLISMismatchIfAny(LiveOut, getLiveRegs(SI: MBBLastSlot, LIS, MRI));
1003
1004 GCNRPTracker::LiveRegSet LiveThrough;
1005 for (auto [Reg, Mask] : LiveIn) {
1006 LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Val: Reg);
1007 if (MaskIntersection.any()) {
1008 LaneBitmask LTMask = getRegLiveThroughMask(
1009 MRI, LIS, Reg, Begin: MBBStartSlot, End: MBBLastSlot, Mask: MaskIntersection);
1010 if (LTMask.any())
1011 LiveThrough[Reg] = LTMask;
1012 }
1013 }
1014 OS << PFX " Live-thr:" << llvm::print(LiveRegs: LiveThrough, MRI);
1015 OS << printRP(getRegPressure(MRI, LiveRegs&: LiveThrough)) << '\n';
1016 }
1017 OS << "...\n";
1018 return false;
1019
1020#undef PFX
1021}
1022
1023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1024LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
1025 GCNRegPressure::RegKind Kind,
1026 LiveIntervals &LIS,
1027 const MachineLoopInfo *MLI) {
1028
1029 const MachineRegisterInfo &MRI = MF.getRegInfo();
1030 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1031 auto &OS = dbgs();
1032 const char *RegName = GCNRegPressure::getName(Kind);
1033
1034 unsigned MaxNumRegs = 0;
1035 const MachineInstr *MaxPressureMI = nullptr;
1036 GCNUpwardRPTracker RPT(LIS);
1037 for (const MachineBasicBlock &MBB : MF) {
1038 RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
1039 for (const MachineInstr &MI : reverse(MBB)) {
1040 RPT.recede(MI);
1041 unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
1042 if (NumRegs > MaxNumRegs) {
1043 MaxNumRegs = NumRegs;
1044 MaxPressureMI = &MI;
1045 }
1046 }
1047 }
1048
1049 SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
1050
1051 // Max pressure can occur at either the early-clobber or register slot.
1052 // Choose the maximum liveset between both slots. This is ugly but this is
1053 // diagnostic code.
1054 SlotIndex ECSlot = MISlot.getRegSlot(true);
1055 SlotIndex RSlot = MISlot.getRegSlot(false);
1056 GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
1057 GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
1058 unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
1059 unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
1060 GCNRPTracker::LiveRegSet *LiveSet =
1061 ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
1062 SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
1063 assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
1064
1065 // Split live registers into single-def and multi-def sets.
1066 GCNRegPressure SDefPressure, MDefPressure;
1067 SmallVector<Register, 16> SDefRegs, MDefRegs;
1068 for (auto [Reg, LaneMask] : *LiveSet) {
1069 assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
1070 LiveInterval &LI = LIS.getInterval(Reg);
1071 if (LI.getNumValNums() == 1 ||
1072 (LI.hasSubRanges() &&
1073 llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
1074 return SR.getNumValNums() == 1;
1075 }))) {
1076 SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1077 SDefRegs.push_back(Reg);
1078 } else {
1079 MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1080 MDefRegs.push_back(Reg);
1081 }
1082 }
1083 unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
1084 unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
1085 assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);
1086
1087 auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) {
1088 return Printable([&, MBB, SI](raw_ostream &OS) {
1089 OS << SI << ':' << printMBBReference(*MBB);
1090 if (MLI)
1091 if (const MachineLoop *ML = MLI->getLoopFor(MBB))
1092 OS << " (LoopHdr " << printMBBReference(*ML->getHeader())
1093 << ", Depth " << ML->getLoopDepth() << ")";
1094 });
1095 };
1096
1097 auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
1098 GCNRegPressure RegPressure;
1099 RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
1100 OS << " " << printReg(Reg, TRI) << ':'
1101 << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
1102 << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
1103 << RegName << "s)\n";
1104
1105 // Use std::map to sort def/uses by SlotIndex.
1106 std::map<SlotIndex, const MachineInstr *> Instrs;
1107 for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) {
1108 Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
1109 }
1110
1111 for (const auto &[SI, MI] : Instrs) {
1112 OS << " ";
1113 if (MI->definesRegister(Reg, TRI))
1114 OS << "def ";
1115 if (MI->readsRegister(Reg, TRI))
1116 OS << "use ";
1117 OS << printLoc(MI->getParent(), SI) << ": " << *MI;
1118 }
1119 };
1120
1121 OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
1122 << " ***\n";
1123 OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
1124 << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
1125 << *MaxPressureMI;
1126
1127 OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
1128 << RegName << "s):\n";
1129
1130 // Sort SDefRegs by number of uses (smallest first)
1131 llvm::sort(SDefRegs, [&](Register A, Register B) {
1132 return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) <
1133 std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end());
1134 });
1135
1136 for (const Register Reg : SDefRegs) {
1137 PrintRegInfo(Reg, LiveSet->lookup(Reg));
1138 }
1139
1140 OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
1141 << RegName << "s):\n";
1142 for (const Register Reg : MDefRegs) {
1143 PrintRegInfo(Reg, LiveSet->lookup(Reg));
1144 }
1145}
1146#endif
1147