1//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIRegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16
17#include "llvm/ADT/BitVector.h"
18
19#define GET_REGINFO_HEADER
20#include "AMDGPUGenRegisterInfo.inc"
21
22#include "SIDefines.h"
23
24namespace llvm {
25
26class GCNSubtarget;
27class LiveIntervals;
28class LiveRegUnits;
29class MachineInstrBuilder;
30class RegisterBank;
31struct SGPRSpillBuilder;
32
33/// Register allocation hint types. Helps eliminate unneeded COPY with True16
34namespace AMDGPURI {
35
36enum { Size16 = 1, Size32 = 2 };
37
38} // end namespace AMDGPURI
39
40class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41private:
42 const GCNSubtarget &ST;
43 bool SpillSGPRToVGPR;
44 bool isWave32;
45 BitVector RegPressureIgnoredUnits;
46
47 /// Sub reg indexes for getRegSplitParts.
48 /// First index represents subreg size from 1 to 32 Half DWORDS.
49 /// The inner vector is sorted by bit offset.
50 /// Provided a register can be fully split with given subregs,
51 /// all elements of the inner vector combined give a full lane mask.
52 static std::array<std::vector<int16_t>, 32> RegSplitParts;
53
54 // Table representing sub reg of given width and offset.
55 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56 // Second index is 32 different dword offsets.
57 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
58
59 void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60
61public:
62 SIRegisterInfo(const GCNSubtarget &ST);
63
64 struct SpilledReg {
65 Register VGPR;
66 int Lane = -1;
67
68 SpilledReg() = default;
69 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
70
71 bool hasLane() { return Lane != -1; }
72 bool hasReg() { return VGPR != 0; }
73 };
74
75 /// \returns the sub reg enum value for the given \p Channel
76 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
78
79 bool spillSGPRToVGPR() const {
80 return SpillSGPRToVGPR;
81 }
82
83 /// Return the largest available SGPR aligned to \p Align for the register
84 /// class \p RC.
85 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
86 const unsigned Align,
87 const TargetRegisterClass *RC) const;
88
89 /// Return the end register initially reserved for the scratch buffer in case
90 /// spilling is needed.
91 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
92
93 BitVector getReservedRegs(const MachineFunction &MF) const override;
94 bool isAsmClobberable(const MachineFunction &MF,
95 MCRegister PhysReg) const override;
96
97 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
98 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
99 const uint32_t *getCallPreservedMask(const MachineFunction &MF,
100 CallingConv::ID) const override;
101 const uint32_t *getNoPreservedMask() const override;
102
103 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
104 // conventions are free to use certain VGPRs without saving and restoring any
105 // lanes (not even inactive ones).
106 static bool isChainScratchRegister(Register VGPR);
107
108 // Stack access is very expensive. CSRs are also the high registers, and we
109 // want to minimize the number of used registers.
110 unsigned getCSRFirstUseCost() const override {
111 return 100;
112 }
113
114 // When building a block VGPR load, we only really transfer a subset of the
115 // registers in the block, based on a mask. Liveness analysis is not aware of
116 // the mask, so it might consider that any register in the block is available
117 // before the load and may therefore be scavenged. This is not ok for CSRs
118 // that are not clobbered, since the caller will expect them to be preserved.
119 // This method will add artificial implicit uses for those registers on the
120 // load instruction, so liveness analysis knows they're unavailable.
121 void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
122 Register BlockReg) const;
123
124 const TargetRegisterClass *
125 getLargestLegalSuperClass(const TargetRegisterClass *RC,
126 const MachineFunction &MF) const override;
127
128 Register getFrameRegister(const MachineFunction &MF) const override;
129
130 bool hasBasePointer(const MachineFunction &MF) const;
131 Register getBaseRegister() const;
132
133 bool shouldRealignStack(const MachineFunction &MF) const override;
134 bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
135
136 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
137 bool requiresFrameIndexReplacementScavenging(
138 const MachineFunction &MF) const override;
139 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
140
141 int64_t getScratchInstrOffset(const MachineInstr *MI) const;
142
143 int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
144 int Idx) const override;
145
146 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
147
148 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
149 int64_t Offset) const override;
150
151 void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
152 int64_t Offset) const override;
153
154 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
155 int64_t Offset) const override;
156
157 const TargetRegisterClass *
158 getPointerRegClass(unsigned Kind = 0) const override;
159
160 /// Returns a legal register class to copy a register in the specified class
161 /// to or from. If it is possible to copy the register directly without using
162 /// a cross register class copy, return the specified RC. Returns NULL if it
163 /// is not possible to copy between two registers of the specified class.
164 const TargetRegisterClass *
165 getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
166
167 const TargetRegisterClass *
168 getRegClassForBlockOp(const MachineFunction &MF) const {
169 return &AMDGPU::VReg_1024RegClass;
170 }
171
172 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
173 bool IsLoad, bool IsKill = true) const;
174
175 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
176 /// free VGPR lane to spill.
177 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
178 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
179 bool OnlyToVGPR = false,
180 bool SpillToPhysVGPRLane = false) const;
181
182 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
183 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
184 bool OnlyToVGPR = false,
185 bool SpillToPhysVGPRLane = false) const;
186
187 bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
188 MachineBasicBlock &RestoreMBB, Register SGPR,
189 RegScavenger *RS) const;
190
191 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
192 unsigned FIOperandNum,
193 RegScavenger *RS) const override;
194
195 bool eliminateSGPRToVGPRSpillFrameIndex(
196 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
197 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
198 bool SpillToPhysVGPRLane = false) const;
199
200 StringRef getRegAsmName(MCRegister Reg) const override;
201
202 // Pseudo regs are not allowed
203 unsigned getHWRegIndex(MCRegister Reg) const;
204
205 LLVM_READONLY
206 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
207
208 LLVM_READONLY const TargetRegisterClass *
209 getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const;
210
211 LLVM_READONLY
212 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
213
214 LLVM_READONLY
215 const TargetRegisterClass *
216 getVectorSuperClassForBitWidth(unsigned BitWidth) const;
217
218 LLVM_READONLY
219 const TargetRegisterClass *
220 getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
221
222 LLVM_READONLY
223 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
224
225 /// \returns true if this class contains only SGPR registers
226 static bool isSGPRClass(const TargetRegisterClass *RC) {
227 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
228 }
229
230 /// \returns true if this class ID contains only SGPR registers
231 bool isSGPRClassID(unsigned RCID) const {
232 return isSGPRClass(RC: getRegClass(i: RCID));
233 }
234
235 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
236 bool isSGPRPhysReg(Register Reg) const {
237 return isSGPRClass(RC: getPhysRegBaseClass(Reg));
238 }
239
240 bool isVGPRPhysReg(Register Reg) const {
241 return isVGPRClass(RC: getPhysRegBaseClass(Reg));
242 }
243
244 /// \returns true if this class contains only VGPR registers
245 static bool isVGPRClass(const TargetRegisterClass *RC) {
246 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
247 }
248
249 /// \returns true if this class contains only AGPR registers
250 static bool isAGPRClass(const TargetRegisterClass *RC) {
251 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
252 }
253
254 /// \returns true only if this class contains both VGPR and AGPR registers
255 bool isVectorSuperClass(const TargetRegisterClass *RC) const {
256 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
257 }
258
259 /// \returns true only if this class contains both VGPR and SGPR registers
260 bool isVSSuperClass(const TargetRegisterClass *RC) const {
261 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
262 }
263
264 /// \returns true if this class contains VGPR registers.
265 static bool hasVGPRs(const TargetRegisterClass *RC) {
266 return RC->TSFlags & SIRCFlags::HasVGPR;
267 }
268
269 /// \returns true if this class contains AGPR registers.
270 static bool hasAGPRs(const TargetRegisterClass *RC) {
271 return RC->TSFlags & SIRCFlags::HasAGPR;
272 }
273
274 /// \returns true if this class contains SGPR registers.
275 static bool hasSGPRs(const TargetRegisterClass *RC) {
276 return RC->TSFlags & SIRCFlags::HasSGPR;
277 }
278
279 /// \returns true if this class contains any vector registers.
280 static bool hasVectorRegisters(const TargetRegisterClass *RC) {
281 return hasVGPRs(RC) || hasAGPRs(RC);
282 }
283
284 /// \returns A VGPR reg class with the same width as \p SRC
285 const TargetRegisterClass *
286 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
287
288 /// \returns An AGPR reg class with the same width as \p SRC
289 const TargetRegisterClass *
290 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
291
292 /// \returns An AGPR+VGPR super reg class with the same width as \p SRC
293 const TargetRegisterClass *
294 getEquivalentAVClass(const TargetRegisterClass *SRC) const;
295
296 /// \returns A SGPR reg class with the same width as \p SRC
297 const TargetRegisterClass *
298 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
299
300 /// Returns a register class which is compatible with \p SuperRC, such that a
301 /// subregister exists with class \p SubRC with subregister index \p
302 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
303 /// a register tuple), return null.
304 const TargetRegisterClass *
305 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
306 const TargetRegisterClass *SubRC,
307 unsigned SubIdx) const;
308
309 /// \returns True if operands defined with this operand type can accept
310 /// a literal constant (i.e. any 32-bit immediate).
311 bool opCanUseLiteralConstant(unsigned OpType) const;
312
313 /// \returns True if operands defined with this operand type can accept
314 /// an inline constant. i.e. An integer value in the range (-16, 64) or
315 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
316 bool opCanUseInlineConstant(unsigned OpType) const;
317
318 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
319 const TargetRegisterClass *RC,
320 const MachineFunction &MF,
321 bool ReserveHighestVGPR = false) const;
322
323 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
324 Register Reg) const;
325 const TargetRegisterClass *
326 getRegClassForOperandReg(const MachineRegisterInfo &MRI,
327 const MachineOperand &MO) const;
328
329 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
330 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
331 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
332 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
333 }
334
335 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
336 // (such as VCC) which hold a wave-wide vector of boolean values. Examining
337 // just the register class is not suffcient; it needs to be combined with a
338 // value type. The next predicate isUniformReg() does this correctly.
339 bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
340 return !isSGPRClass(RC);
341 }
342
343 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
344 Register Reg) const override;
345
346 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
347 unsigned EltSize) const;
348
349 unsigned getRegPressureLimit(const TargetRegisterClass *RC,
350 MachineFunction &MF) const override;
351
352 unsigned getRegPressureSetLimit(const MachineFunction &MF,
353 unsigned Idx) const override;
354
355 bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
356 SmallVectorImpl<MCPhysReg> &Hints,
357 const MachineFunction &MF, const VirtRegMap *VRM,
358 const LiveRegMatrix *Matrix) const override;
359
360 const int *getRegUnitPressureSets(MCRegUnit RegUnit) const override;
361
362 MCRegister getReturnAddressReg(const MachineFunction &MF) const;
363
364 const TargetRegisterClass *
365 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
366
367 const TargetRegisterClass *
368 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
369 return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank);
370 }
371
372 const TargetRegisterClass *
373 getConstrainedRegClassForOperand(const MachineOperand &MO,
374 const MachineRegisterInfo &MRI) const override;
375
376 const TargetRegisterClass *getBoolRC() const {
377 return isWave32 ? &AMDGPU::SReg_32RegClass
378 : &AMDGPU::SReg_64RegClass;
379 }
380
381 const TargetRegisterClass *getWaveMaskRegClass() const {
382 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
383 : &AMDGPU::SReg_64_XEXECRegClass;
384 }
385
386 // Return the appropriate register class to use for 64-bit VGPRs for the
387 // subtarget.
388 const TargetRegisterClass *getVGPR64Class() const;
389
390 MCRegister getVCC() const;
391
392 MCRegister getExec() const;
393
394 // Find reaching register definition
395 MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
396 MachineInstr &Use,
397 MachineRegisterInfo &MRI,
398 LiveIntervals *LIS) const;
399
400 const uint32_t *getAllVGPRRegMask() const;
401 const uint32_t *getAllAGPRRegMask() const;
402 const uint32_t *getAllVectorRegMask() const;
403 const uint32_t *getAllAllocatableSRegMask() const;
404
405 // \returns number of 32 bit registers covered by a \p LM
406 static unsigned getNumCoveredRegs(LaneBitmask LM) {
407 // The assumption is that every lo16 subreg is an even bit and every hi16
408 // is an adjacent odd bit or vice versa.
409 uint64_t Mask = LM.getAsInteger();
410 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
411 Mask = (Even >> 1) | Mask;
412 uint64_t Odd = Mask & 0x5555555555555555ULL;
413 return llvm::popcount(Value: Odd);
414 }
415
416 // \returns a DWORD offset of a \p SubReg
417 unsigned getChannelFromSubReg(unsigned SubReg) const {
418 return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + 31) / 32 : 0;
419 }
420
421 // \returns a DWORD size of a \p SubReg
422 unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
423 return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg));
424 }
425
426 // For a given 16 bit \p Reg \returns a 32 bit register holding it.
427 // \returns \p Reg otherwise.
428 MCPhysReg get32BitRegister(MCPhysReg Reg) const;
429
430 // Returns true if a given register class is properly aligned for
431 // the subtarget.
432 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
433
434 /// Return all SGPR128 which satisfy the waves per execution unit requirement
435 /// of the subtarget.
436 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
437
438 /// Return all SGPR64 which satisfy the waves per execution unit requirement
439 /// of the subtarget.
440 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
441
442 /// Return all SGPR32 which satisfy the waves per execution unit requirement
443 /// of the subtarget.
444 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
445
446 // Insert spill or restore instructions.
447 // When lowering spill pseudos, the RegScavenger should be set.
448 // For creating spill instructions during frame lowering, where no scavenger
449 // is available, LiveUnits can be used.
450 void buildSpillLoadStore(MachineBasicBlock &MBB,
451 MachineBasicBlock::iterator MI, const DebugLoc &DL,
452 unsigned LoadStoreOp, int Index, Register ValueReg,
453 bool ValueIsKill, MCRegister ScratchOffsetReg,
454 int64_t InstrOffset, MachineMemOperand *MMO,
455 RegScavenger *RS,
456 LiveRegUnits *LiveUnits = nullptr) const;
457
458 // Return alignment in register file of first register in a register tuple.
459 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
460 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
461 }
462
463 // Check if register class RC has required alignment.
464 bool isRegClassAligned(const TargetRegisterClass *RC,
465 unsigned AlignNumBits) const {
466 assert(AlignNumBits != 0);
467 unsigned RCAlign = getRegClassAlignmentNumBits(RC);
468 return RCAlign == AlignNumBits ||
469 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
470 }
471
472 // Return alignment of a SubReg relative to start of a register in RC class.
473 // No check if the subreg is supported by the current RC is made.
474 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
475 unsigned SubReg) const;
476
477 // \returns a number of registers of a given \p RC used in a function.
478 // Does not go inside function calls. If \p IncludeCalls is true, it will
479 // include registers that may be clobbered by calls.
480 unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
481 const TargetRegisterClass &RC,
482 bool IncludeCalls = true) const;
483
484 std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
485 return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
486 : std::optional<uint8_t>{};
487 }
488
489 SmallVector<StringLiteral>
490 getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
491
492 float
493 getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override {
494 // Prioritize VGPR_32_Lo256 over other classes which may occupy registers
495 // beyond v256.
496 return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
497 ((RC == &AMDGPU::VGPR_32_Lo256RegClass ||
498 RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
499 ? 2.0
500 : 1.0);
501 }
502};
503
504namespace AMDGPU {
505/// Get the size in bits of a register from the register class \p RC.
506unsigned getRegBitWidth(const TargetRegisterClass &RC);
507} // namespace AMDGPU
508
509} // End namespace llvm
510
511#endif
512