1//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIRegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16
17#include "llvm/ADT/BitVector.h"
18
19#define GET_REGINFO_HEADER
20#include "AMDGPUGenRegisterInfo.inc"
21
22#include "SIDefines.h"
23
24namespace llvm {
25
26class GCNSubtarget;
27class LiveIntervals;
28class LiveRegUnits;
29class MachineInstrBuilder;
30class RegisterBank;
31struct SGPRSpillBuilder;
32
33/// Register allocation hint types. Helps eliminate unneeded COPY with True16
34namespace AMDGPURI {
35
36enum { Size16 = 1, Size32 = 2 };
37
38} // end namespace AMDGPURI
39
40class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41private:
42 const GCNSubtarget &ST;
43 bool SpillSGPRToVGPR;
44 bool isWave32;
45 BitVector RegPressureIgnoredUnits;
46
47 /// Sub reg indexes for getRegSplitParts.
48 /// First index represents subreg size from 1 to 32 Half DWORDS.
49 /// The inner vector is sorted by bit offset.
50 /// Provided a register can be fully split with given subregs,
51 /// all elements of the inner vector combined give a full lane mask.
52 static std::array<std::vector<int16_t>, 32> RegSplitParts;
53
54 // Table representing sub reg of given width and offset.
55 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56 // Second index is 32 different dword offsets.
57 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
58
59 void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60
61public:
62 SIRegisterInfo(const GCNSubtarget &ST);
63
64 struct SpilledReg {
65 Register VGPR;
66 int Lane = -1;
67
68 SpilledReg() = default;
69 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
70
71 bool hasLane() { return Lane != -1; }
72 bool hasReg() { return VGPR != 0; }
73 };
74
75 /// \returns the sub reg enum value for the given \p Channel
76 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
78
79 bool spillSGPRToVGPR() const {
80 return SpillSGPRToVGPR;
81 }
82
83 bool isCFISavedRegsSpillEnabled() const;
84
85 /// Return the largest available SGPR aligned to \p Align for the register
86 /// class \p RC.
87 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
88 const unsigned Align,
89 const TargetRegisterClass *RC) const;
90
91 /// Return the end register initially reserved for the scratch buffer in case
92 /// spilling is needed.
93 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
94
95 BitVector getReservedRegs(const MachineFunction &MF) const override;
96 bool isAsmClobberable(const MachineFunction &MF,
97 MCRegister PhysReg) const override;
98
99 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
100 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
101 const uint32_t *getCallPreservedMask(const MachineFunction &MF,
102 CallingConv::ID) const override;
103 const uint32_t *getNoPreservedMask() const override;
104
105 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
106 // conventions are free to use certain VGPRs without saving and restoring any
107 // lanes (not even inactive ones).
108 static bool isChainScratchRegister(Register VGPR);
109
110 // Stack access is very expensive. CSRs are also the high registers, and we
111 // want to minimize the number of used registers.
112 unsigned getCSRCost() const override { return 100; }
113
114 // When building a block VGPR load, we only really transfer a subset of the
115 // registers in the block, based on a mask. Liveness analysis is not aware of
116 // the mask, so it might consider that any register in the block is available
117 // before the load and may therefore be scavenged. This is not ok for CSRs
118 // that are not clobbered, since the caller will expect them to be preserved.
119 // This method will add artificial implicit uses for those registers on the
120 // load instruction, so liveness analysis knows they're unavailable.
121 void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
122 Register BlockReg) const;
123
124 // Iterate over all VGPRs in the given BlockReg and emit CFI for each VGPR
125 // as-needed depending on the (statically known) mask, relative to the given
126 // base Offset.
127 void buildCFIForBlockCSRStore(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
129 Register BlockReg, int64_t Offset) const;
130
131 const TargetRegisterClass *
132 getLargestLegalSuperClass(const TargetRegisterClass *RC,
133 const MachineFunction &MF) const override;
134
135 Register getFrameRegister(const MachineFunction &MF) const override;
136
137 bool hasBasePointer(const MachineFunction &MF) const;
138 Register getBaseRegister() const;
139
140 bool shouldRealignStack(const MachineFunction &MF) const override;
141 bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
142
143 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
144 bool requiresFrameIndexReplacementScavenging(
145 const MachineFunction &MF) const override;
146 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
147
148 int64_t getScratchInstrOffset(const MachineInstr *MI) const;
149
150 int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
151 int Idx) const override;
152
153 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
154
155 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
156 int64_t Offset) const override;
157
158 void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
159 int64_t Offset) const override;
160
161 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
162 int64_t Offset) const override;
163
164 const TargetRegisterClass *
165 getPointerRegClass(unsigned Kind = 0) const override;
166
167 /// Returns a legal register class to copy a register in the specified class
168 /// to or from. If it is possible to copy the register directly without using
169 /// a cross register class copy, return the specified RC. Returns NULL if it
170 /// is not possible to copy between two registers of the specified class.
171 const TargetRegisterClass *
172 getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
173
174 const TargetRegisterClass *
175 getRegClassForBlockOp(const MachineFunction &MF) const {
176 return &AMDGPU::VReg_1024RegClass;
177 }
178
179 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
180 bool IsLoad, bool IsKill = true) const;
181
182 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
183 /// free VGPR lane to spill.
184 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
185 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
186 bool OnlyToVGPR = false, bool SpillToPhysVGPRLane = false,
187 bool NeedsCFI = false) const;
188
189 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
190 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
191 bool OnlyToVGPR = false,
192 bool SpillToPhysVGPRLane = false) const;
193
194 bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
195 MachineBasicBlock &RestoreMBB, Register SGPR,
196 RegScavenger *RS) const;
197
198 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
199 unsigned FIOperandNum,
200 RegScavenger *RS) const override;
201
202 bool eliminateSGPRToVGPRSpillFrameIndex(
203 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
204 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
205 bool SpillToPhysVGPRLane = false) const;
206
207 StringRef getRegAsmName(MCRegister Reg) const override;
208
209 // Pseudo regs are not allowed
210 unsigned getHWRegIndex(MCRegister Reg) const;
211
212 LLVM_READONLY
213 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
214
215 LLVM_READONLY const TargetRegisterClass *
216 getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const;
217
218 LLVM_READONLY
219 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
220
221 LLVM_READONLY
222 const TargetRegisterClass *
223 getVectorSuperClassForBitWidth(unsigned BitWidth) const;
224
225 LLVM_READONLY
226 const TargetRegisterClass *
227 getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
228
229 LLVM_READONLY
230 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
231
232 /// \returns true if this class contains only SGPR registers
233 static bool isSGPRClass(const TargetRegisterClass *RC) {
234 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
235 }
236
237 /// \returns true if this class ID contains only SGPR registers
238 bool isSGPRClassID(unsigned RCID) const {
239 return isSGPRClass(RC: getRegClass(i: RCID));
240 }
241
242 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
243 bool isSGPRPhysReg(Register Reg) const {
244 return isSGPRClass(RC: getPhysRegBaseClass(Reg));
245 }
246
247 bool isVGPRPhysReg(Register Reg) const {
248 return isVGPRClass(RC: getPhysRegBaseClass(Reg));
249 }
250
251 /// \returns true if this class contains only VGPR registers
252 static bool isVGPRClass(const TargetRegisterClass *RC) {
253 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
254 }
255
256 /// \returns true if this class contains only AGPR registers
257 static bool isAGPRClass(const TargetRegisterClass *RC) {
258 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
259 }
260
261 /// \returns true only if this class contains both VGPR and AGPR registers
262 bool isVectorSuperClass(const TargetRegisterClass *RC) const {
263 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
264 }
265
266 /// \returns true only if this class contains both VGPR and SGPR registers
267 bool isVSSuperClass(const TargetRegisterClass *RC) const {
268 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
269 }
270
271 /// \returns true if this class contains VGPR registers.
272 static bool hasVGPRs(const TargetRegisterClass *RC) {
273 return RC->TSFlags & SIRCFlags::HasVGPR;
274 }
275
276 /// \returns true if this class contains AGPR registers.
277 static bool hasAGPRs(const TargetRegisterClass *RC) {
278 return RC->TSFlags & SIRCFlags::HasAGPR;
279 }
280
281 /// \returns true if this class contains SGPR registers.
282 static bool hasSGPRs(const TargetRegisterClass *RC) {
283 return RC->TSFlags & SIRCFlags::HasSGPR;
284 }
285
286 /// \returns true if this class contains any vector registers.
287 static bool hasVectorRegisters(const TargetRegisterClass *RC) {
288 return hasVGPRs(RC) || hasAGPRs(RC);
289 }
290
291 /// \returns A VGPR reg class with the same width as \p SRC
292 const TargetRegisterClass *
293 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
294
295 /// \returns An AGPR reg class with the same width as \p SRC
296 const TargetRegisterClass *
297 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
298
299 /// \returns An AGPR+VGPR super reg class with the same width as \p SRC
300 const TargetRegisterClass *
301 getEquivalentAVClass(const TargetRegisterClass *SRC) const;
302
303 /// \returns A SGPR reg class with the same width as \p SRC
304 const TargetRegisterClass *
305 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
306
307 /// Returns a register class which is compatible with \p SuperRC, such that a
308 /// subregister exists with class \p SubRC with subregister index \p
309 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
310 /// a register tuple), return null.
311 const TargetRegisterClass *
312 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
313 const TargetRegisterClass *SubRC,
314 unsigned SubIdx) const;
315
316 /// \returns True if operands defined with this operand type can accept
317 /// a literal constant (i.e. any 32-bit immediate).
318 bool opCanUseLiteralConstant(unsigned OpType) const;
319
320 /// \returns True if operands defined with this operand type can accept
321 /// an inline constant. i.e. An integer value in the range (-16, 64) or
322 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
323 bool opCanUseInlineConstant(unsigned OpType) const;
324
325 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
326 const TargetRegisterClass *RC,
327 const MachineFunction &MF,
328 bool ReserveHighestVGPR = false) const;
329
330 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
331 Register Reg) const;
332 const TargetRegisterClass *
333 getRegClassForOperandReg(const MachineRegisterInfo &MRI,
334 const MachineOperand &MO) const;
335
336 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
337 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
338 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
339 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
340 }
341
342 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
343 // (such as VCC) which hold a wave-wide vector of boolean values. Examining
344 // just the register class is not suffcient; it needs to be combined with a
345 // value type. The next predicate isUniformReg() does this correctly.
346 bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
347 return !isSGPRClass(RC);
348 }
349
350 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
351 Register Reg) const override;
352
353 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
354 unsigned EltSize) const;
355
356 unsigned getRegPressureLimit(const TargetRegisterClass *RC,
357 MachineFunction &MF) const override;
358
359 unsigned getRegPressureSetLimit(const MachineFunction &MF,
360 unsigned Idx) const override;
361
362 bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
363 SmallVectorImpl<MCPhysReg> &Hints,
364 const MachineFunction &MF, const VirtRegMap *VRM,
365 const LiveRegMatrix *Matrix) const override;
366
367 const int *getRegUnitPressureSets(MCRegUnit RegUnit) const override;
368
369 MCRegister getReturnAddressReg(const MachineFunction &MF) const;
370
371 const TargetRegisterClass *
372 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
373
374 const TargetRegisterClass *
375 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
376 return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank);
377 }
378
379 const TargetRegisterClass *
380 getConstrainedRegClassForOperand(const MachineOperand &MO,
381 const MachineRegisterInfo &MRI) const override;
382
383 const TargetRegisterClass *getBoolRC() const {
384 return isWave32 ? &AMDGPU::SReg_32RegClass
385 : &AMDGPU::SReg_64RegClass;
386 }
387
388 const TargetRegisterClass *getWaveMaskRegClass() const {
389 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
390 : &AMDGPU::SReg_64_XEXECRegClass;
391 }
392
393 // Return the appropriate register class to use for 64-bit VGPRs for the
394 // subtarget.
395 const TargetRegisterClass *getVGPR64Class() const;
396
397 MCRegister getVCC() const;
398
399 MCRegister getExec() const;
400
401 // Find reaching register definition
402 MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
403 MachineInstr &Use,
404 MachineRegisterInfo &MRI,
405 LiveIntervals *LIS) const;
406
407 const uint32_t *getAllVGPRRegMask() const;
408 const uint32_t *getAllAGPRRegMask() const;
409 const uint32_t *getAllVectorRegMask() const;
410 const uint32_t *getAllAllocatableSRegMask() const;
411
412 // \returns number of 32 bit registers covered by a \p LM
413 static unsigned getNumCoveredRegs(LaneBitmask LM) {
414 // The assumption is that every lo16 subreg is an even bit and every hi16
415 // is an adjacent odd bit or vice versa.
416 uint64_t Mask = LM.getAsInteger();
417 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
418 Mask = (Even >> 1) | Mask;
419 uint64_t Odd = Mask & 0x5555555555555555ULL;
420 return llvm::popcount(Value: Odd);
421 }
422
423 // \returns a DWORD offset of a \p SubReg
424 unsigned getChannelFromSubReg(unsigned SubReg) const {
425 return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + 31) / 32 : 0;
426 }
427
428 // \returns a DWORD size of a \p SubReg
429 unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
430 return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg));
431 }
432
433 // For a given 16 bit \p Reg \returns a 32 bit register holding it.
434 // \returns \p Reg otherwise.
435 MCPhysReg get32BitRegister(MCPhysReg Reg) const;
436
437 // Returns true if a given register class is properly aligned for
438 // the subtarget.
439 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
440
441 /// Return all SGPR128 which satisfy the waves per execution unit requirement
442 /// of the subtarget.
443 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
444
445 /// Return all SGPR64 which satisfy the waves per execution unit requirement
446 /// of the subtarget.
447 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
448
449 /// Return all SGPR32 which satisfy the waves per execution unit requirement
450 /// of the subtarget.
451 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
452
453 // Insert spill or restore instructions.
454 // When lowering spill pseudos, the RegScavenger should be set.
455 // For creating spill instructions during frame lowering, where no scavenger
456 // is available, LiveUnits can be used.
457 void buildSpillLoadStore(MachineBasicBlock &MBB,
458 MachineBasicBlock::iterator MI, const DebugLoc &DL,
459 unsigned LoadStoreOp, int Index, Register ValueReg,
460 bool ValueIsKill, MCRegister ScratchOffsetReg,
461 int64_t InstrOffset, MachineMemOperand *MMO,
462 RegScavenger *RS, LiveRegUnits *LiveUnits = nullptr,
463 bool NeedsCFI = false) const;
464
465 // Return alignment in register file of first register in a register tuple.
466 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
467 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
468 }
469
470 // Check if register class RC has required alignment.
471 bool isRegClassAligned(const TargetRegisterClass *RC,
472 unsigned AlignNumBits) const {
473 assert(AlignNumBits != 0);
474 unsigned RCAlign = getRegClassAlignmentNumBits(RC);
475 return RCAlign == AlignNumBits ||
476 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
477 }
478
479 // Return alignment of a SubReg relative to start of a register in RC class.
480 // No check if the subreg is supported by the current RC is made.
481 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
482 unsigned SubReg) const;
483
484 // \returns a number of registers of a given \p RC used in a function.
485 // Does not go inside function calls. If \p IncludeCalls is true, it will
486 // include registers that may be clobbered by calls.
487 unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
488 const TargetRegisterClass &RC,
489 bool IncludeCalls = true) const;
490
491 std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
492 return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
493 : std::optional<uint8_t>{};
494 }
495
496 SmallVector<StringLiteral>
497 getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
498
499 float
500 getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override {
501 // Prioritize VGPR_32_Lo256 over other classes which may occupy registers
502 // beyond v256.
503 return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
504 ((RC == &AMDGPU::VGPR_32_Lo256RegClass ||
505 RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
506 ? 2.0
507 : 1.0);
508 }
509};
510
511namespace AMDGPU {
512/// Get the size in bits of a register from the register class \p RC.
513unsigned getRegBitWidth(const TargetRegisterClass &RC);
514} // namespace AMDGPU
515
516} // End namespace llvm
517
518#endif
519