1//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIRegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16
17#include "llvm/ADT/BitVector.h"
18
19#define GET_REGINFO_HEADER
20#include "AMDGPUGenRegisterInfo.inc"
21
22#include "SIDefines.h"
23
24namespace llvm {
25
26class GCNSubtarget;
27class LiveIntervals;
28class LiveRegUnits;
29class MachineInstrBuilder;
30class RegisterBank;
31struct SGPRSpillBuilder;
32
33/// Register allocation hint types. Helps eliminate unneeded COPY with True16
34namespace AMDGPURI {
35
36enum { Size16 = 1, Size32 = 2 };
37
38} // end namespace AMDGPURI
39
40class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41private:
42 const GCNSubtarget &ST;
43 bool SpillSGPRToVGPR;
44 bool isWave32;
45 BitVector RegPressureIgnoredUnits;
46
47 /// Sub reg indexes for getRegSplitParts.
48 /// First index represents subreg size from 1 to 32 Half DWORDS.
49 /// The inner vector is sorted by bit offset.
50 /// Provided a register can be fully split with given subregs,
51 /// all elements of the inner vector combined give a full lane mask.
52 static std::array<std::vector<int16_t>, 32> RegSplitParts;
53
54 // Table representing sub reg of given width and offset.
55 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56 // Second index is 32 different dword offsets.
57 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
58
59 void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60
61public:
62 SIRegisterInfo(const GCNSubtarget &ST);
63
64 struct SpilledReg {
65 Register VGPR;
66 int Lane = -1;
67
68 SpilledReg() = default;
69 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
70
71 bool hasLane() { return Lane != -1; }
72 bool hasReg() { return VGPR != 0; }
73 };
74
75 /// \returns the sub reg enum value for the given \p Channel
76 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
78
79 bool spillSGPRToVGPR() const {
80 return SpillSGPRToVGPR;
81 }
82
83 /// Return the largest available SGPR aligned to \p Align for the register
84 /// class \p RC.
85 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
86 const unsigned Align,
87 const TargetRegisterClass *RC) const;
88
89 /// Return the end register initially reserved for the scratch buffer in case
90 /// spilling is needed.
91 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
92
93 BitVector getReservedRegs(const MachineFunction &MF) const override;
94 bool isAsmClobberable(const MachineFunction &MF,
95 MCRegister PhysReg) const override;
96
97 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
98 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
99 const uint32_t *getCallPreservedMask(const MachineFunction &MF,
100 CallingConv::ID) const override;
101 const uint32_t *getNoPreservedMask() const override;
102
103 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
104 // conventions are free to use certain VGPRs without saving and restoring any
105 // lanes (not even inactive ones).
106 static bool isChainScratchRegister(Register VGPR);
107
108 // Stack access is very expensive. CSRs are also the high registers, and we
109 // want to minimize the number of used registers.
110 unsigned getCSRCost() const override { return 100; }
111
112 // When building a block VGPR load, we only really transfer a subset of the
113 // registers in the block, based on a mask. Liveness analysis is not aware of
114 // the mask, so it might consider that any register in the block is available
115 // before the load and may therefore be scavenged. This is not ok for CSRs
116 // that are not clobbered, since the caller will expect them to be preserved.
117 // This method will add artificial implicit uses for those registers on the
118 // load instruction, so liveness analysis knows they're unavailable.
119 void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
120 Register BlockReg) const;
121
122 const TargetRegisterClass *
123 getLargestLegalSuperClass(const TargetRegisterClass *RC,
124 const MachineFunction &MF) const override;
125
126 Register getFrameRegister(const MachineFunction &MF) const override;
127
128 bool hasBasePointer(const MachineFunction &MF) const;
129 Register getBaseRegister() const;
130
131 bool shouldRealignStack(const MachineFunction &MF) const override;
132 bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
133
134 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
135 bool requiresFrameIndexReplacementScavenging(
136 const MachineFunction &MF) const override;
137 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
138
139 int64_t getScratchInstrOffset(const MachineInstr *MI) const;
140
141 int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
142 int Idx) const override;
143
144 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
145
146 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
147 int64_t Offset) const override;
148
149 void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
150 int64_t Offset) const override;
151
152 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
153 int64_t Offset) const override;
154
155 const TargetRegisterClass *
156 getPointerRegClass(unsigned Kind = 0) const override;
157
158 /// Returns a legal register class to copy a register in the specified class
159 /// to or from. If it is possible to copy the register directly without using
160 /// a cross register class copy, return the specified RC. Returns NULL if it
161 /// is not possible to copy between two registers of the specified class.
162 const TargetRegisterClass *
163 getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
164
165 const TargetRegisterClass *
166 getRegClassForBlockOp(const MachineFunction &MF) const {
167 return &AMDGPU::VReg_1024RegClass;
168 }
169
170 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
171 bool IsLoad, bool IsKill = true) const;
172
173 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
174 /// free VGPR lane to spill.
175 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
176 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
177 bool OnlyToVGPR = false,
178 bool SpillToPhysVGPRLane = false) const;
179
180 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
181 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
182 bool OnlyToVGPR = false,
183 bool SpillToPhysVGPRLane = false) const;
184
185 bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
186 MachineBasicBlock &RestoreMBB, Register SGPR,
187 RegScavenger *RS) const;
188
189 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
190 unsigned FIOperandNum,
191 RegScavenger *RS) const override;
192
193 bool eliminateSGPRToVGPRSpillFrameIndex(
194 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
195 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
196 bool SpillToPhysVGPRLane = false) const;
197
198 StringRef getRegAsmName(MCRegister Reg) const override;
199
200 // Pseudo regs are not allowed
201 unsigned getHWRegIndex(MCRegister Reg) const;
202
203 LLVM_READONLY
204 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
205
206 LLVM_READONLY const TargetRegisterClass *
207 getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const;
208
209 LLVM_READONLY
210 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
211
212 LLVM_READONLY
213 const TargetRegisterClass *
214 getVectorSuperClassForBitWidth(unsigned BitWidth) const;
215
216 LLVM_READONLY
217 const TargetRegisterClass *
218 getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
219
220 LLVM_READONLY
221 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
222
223 /// \returns true if this class contains only SGPR registers
224 static bool isSGPRClass(const TargetRegisterClass *RC) {
225 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
226 }
227
228 /// \returns true if this class ID contains only SGPR registers
229 bool isSGPRClassID(unsigned RCID) const {
230 return isSGPRClass(RC: getRegClass(i: RCID));
231 }
232
233 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
234 bool isSGPRPhysReg(Register Reg) const {
235 return isSGPRClass(RC: getPhysRegBaseClass(Reg));
236 }
237
238 bool isVGPRPhysReg(Register Reg) const {
239 return isVGPRClass(RC: getPhysRegBaseClass(Reg));
240 }
241
242 /// \returns true if this class contains only VGPR registers
243 static bool isVGPRClass(const TargetRegisterClass *RC) {
244 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
245 }
246
247 /// \returns true if this class contains only AGPR registers
248 static bool isAGPRClass(const TargetRegisterClass *RC) {
249 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
250 }
251
252 /// \returns true only if this class contains both VGPR and AGPR registers
253 bool isVectorSuperClass(const TargetRegisterClass *RC) const {
254 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
255 }
256
257 /// \returns true only if this class contains both VGPR and SGPR registers
258 bool isVSSuperClass(const TargetRegisterClass *RC) const {
259 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
260 }
261
262 /// \returns true if this class contains VGPR registers.
263 static bool hasVGPRs(const TargetRegisterClass *RC) {
264 return RC->TSFlags & SIRCFlags::HasVGPR;
265 }
266
267 /// \returns true if this class contains AGPR registers.
268 static bool hasAGPRs(const TargetRegisterClass *RC) {
269 return RC->TSFlags & SIRCFlags::HasAGPR;
270 }
271
272 /// \returns true if this class contains SGPR registers.
273 static bool hasSGPRs(const TargetRegisterClass *RC) {
274 return RC->TSFlags & SIRCFlags::HasSGPR;
275 }
276
277 /// \returns true if this class contains any vector registers.
278 static bool hasVectorRegisters(const TargetRegisterClass *RC) {
279 return hasVGPRs(RC) || hasAGPRs(RC);
280 }
281
282 /// \returns A VGPR reg class with the same width as \p SRC
283 const TargetRegisterClass *
284 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
285
286 /// \returns An AGPR reg class with the same width as \p SRC
287 const TargetRegisterClass *
288 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
289
290 /// \returns An AGPR+VGPR super reg class with the same width as \p SRC
291 const TargetRegisterClass *
292 getEquivalentAVClass(const TargetRegisterClass *SRC) const;
293
294 /// \returns A SGPR reg class with the same width as \p SRC
295 const TargetRegisterClass *
296 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
297
298 /// Returns a register class which is compatible with \p SuperRC, such that a
299 /// subregister exists with class \p SubRC with subregister index \p
300 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
301 /// a register tuple), return null.
302 const TargetRegisterClass *
303 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
304 const TargetRegisterClass *SubRC,
305 unsigned SubIdx) const;
306
307 /// \returns True if operands defined with this operand type can accept
308 /// a literal constant (i.e. any 32-bit immediate).
309 bool opCanUseLiteralConstant(unsigned OpType) const;
310
311 /// \returns True if operands defined with this operand type can accept
312 /// an inline constant. i.e. An integer value in the range (-16, 64) or
313 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
314 bool opCanUseInlineConstant(unsigned OpType) const;
315
316 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
317 const TargetRegisterClass *RC,
318 const MachineFunction &MF,
319 bool ReserveHighestVGPR = false) const;
320
321 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
322 Register Reg) const;
323 const TargetRegisterClass *
324 getRegClassForOperandReg(const MachineRegisterInfo &MRI,
325 const MachineOperand &MO) const;
326
327 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
328 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
329 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
330 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
331 }
332
333 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
334 // (such as VCC) which hold a wave-wide vector of boolean values. Examining
335 // just the register class is not suffcient; it needs to be combined with a
336 // value type. The next predicate isUniformReg() does this correctly.
337 bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
338 return !isSGPRClass(RC);
339 }
340
341 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
342 Register Reg) const override;
343
344 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
345 unsigned EltSize) const;
346
347 unsigned getRegPressureLimit(const TargetRegisterClass *RC,
348 MachineFunction &MF) const override;
349
350 unsigned getRegPressureSetLimit(const MachineFunction &MF,
351 unsigned Idx) const override;
352
353 bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
354 SmallVectorImpl<MCPhysReg> &Hints,
355 const MachineFunction &MF, const VirtRegMap *VRM,
356 const LiveRegMatrix *Matrix) const override;
357
358 const int *getRegUnitPressureSets(MCRegUnit RegUnit) const override;
359
360 MCRegister getReturnAddressReg(const MachineFunction &MF) const;
361
362 const TargetRegisterClass *
363 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
364
365 const TargetRegisterClass *
366 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
367 return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank);
368 }
369
370 const TargetRegisterClass *
371 getConstrainedRegClassForOperand(const MachineOperand &MO,
372 const MachineRegisterInfo &MRI) const override;
373
374 const TargetRegisterClass *getBoolRC() const {
375 return isWave32 ? &AMDGPU::SReg_32RegClass
376 : &AMDGPU::SReg_64RegClass;
377 }
378
379 const TargetRegisterClass *getWaveMaskRegClass() const {
380 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
381 : &AMDGPU::SReg_64_XEXECRegClass;
382 }
383
384 // Return the appropriate register class to use for 64-bit VGPRs for the
385 // subtarget.
386 const TargetRegisterClass *getVGPR64Class() const;
387
388 MCRegister getVCC() const;
389
390 MCRegister getExec() const;
391
392 // Find reaching register definition
393 MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
394 MachineInstr &Use,
395 MachineRegisterInfo &MRI,
396 LiveIntervals *LIS) const;
397
398 const uint32_t *getAllVGPRRegMask() const;
399 const uint32_t *getAllAGPRRegMask() const;
400 const uint32_t *getAllVectorRegMask() const;
401 const uint32_t *getAllAllocatableSRegMask() const;
402
403 // \returns number of 32 bit registers covered by a \p LM
404 static unsigned getNumCoveredRegs(LaneBitmask LM) {
405 // The assumption is that every lo16 subreg is an even bit and every hi16
406 // is an adjacent odd bit or vice versa.
407 uint64_t Mask = LM.getAsInteger();
408 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
409 Mask = (Even >> 1) | Mask;
410 uint64_t Odd = Mask & 0x5555555555555555ULL;
411 return llvm::popcount(Value: Odd);
412 }
413
414 // \returns a DWORD offset of a \p SubReg
415 unsigned getChannelFromSubReg(unsigned SubReg) const {
416 return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + 31) / 32 : 0;
417 }
418
419 // \returns a DWORD size of a \p SubReg
420 unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
421 return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg));
422 }
423
424 // For a given 16 bit \p Reg \returns a 32 bit register holding it.
425 // \returns \p Reg otherwise.
426 MCPhysReg get32BitRegister(MCPhysReg Reg) const;
427
428 // Returns true if a given register class is properly aligned for
429 // the subtarget.
430 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
431
432 /// Return all SGPR128 which satisfy the waves per execution unit requirement
433 /// of the subtarget.
434 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
435
436 /// Return all SGPR64 which satisfy the waves per execution unit requirement
437 /// of the subtarget.
438 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
439
440 /// Return all SGPR32 which satisfy the waves per execution unit requirement
441 /// of the subtarget.
442 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
443
444 // Insert spill or restore instructions.
445 // When lowering spill pseudos, the RegScavenger should be set.
446 // For creating spill instructions during frame lowering, where no scavenger
447 // is available, LiveUnits can be used.
448 void buildSpillLoadStore(MachineBasicBlock &MBB,
449 MachineBasicBlock::iterator MI, const DebugLoc &DL,
450 unsigned LoadStoreOp, int Index, Register ValueReg,
451 bool ValueIsKill, MCRegister ScratchOffsetReg,
452 int64_t InstrOffset, MachineMemOperand *MMO,
453 RegScavenger *RS,
454 LiveRegUnits *LiveUnits = nullptr) const;
455
456 // Return alignment in register file of first register in a register tuple.
457 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
458 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
459 }
460
461 // Check if register class RC has required alignment.
462 bool isRegClassAligned(const TargetRegisterClass *RC,
463 unsigned AlignNumBits) const {
464 assert(AlignNumBits != 0);
465 unsigned RCAlign = getRegClassAlignmentNumBits(RC);
466 return RCAlign == AlignNumBits ||
467 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
468 }
469
470 // Return alignment of a SubReg relative to start of a register in RC class.
471 // No check if the subreg is supported by the current RC is made.
472 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
473 unsigned SubReg) const;
474
475 // \returns a number of registers of a given \p RC used in a function.
476 // Does not go inside function calls. If \p IncludeCalls is true, it will
477 // include registers that may be clobbered by calls.
478 unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
479 const TargetRegisterClass &RC,
480 bool IncludeCalls = true) const;
481
482 std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
483 return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
484 : std::optional<uint8_t>{};
485 }
486
487 SmallVector<StringLiteral>
488 getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
489
490 float
491 getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override {
492 // Prioritize VGPR_32_Lo256 over other classes which may occupy registers
493 // beyond v256.
494 return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
495 ((RC == &AMDGPU::VGPR_32_Lo256RegClass ||
496 RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
497 ? 2.0
498 : 1.0);
499 }
500};
501
502namespace AMDGPU {
503/// Get the size in bits of a register from the register class \p RC.
504unsigned getRegBitWidth(const TargetRegisterClass &RC);
505} // namespace AMDGPU
506
507} // End namespace llvm
508
509#endif
510