1//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIRegisterInfo
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16
17#include "llvm/ADT/BitVector.h"
18
19#define GET_REGINFO_HEADER
20#include "AMDGPUGenRegisterInfo.inc"
21
22#include "SIDefines.h"
23
24namespace llvm {
25
26class GCNSubtarget;
27class LiveIntervals;
28class LiveRegUnits;
29class MachineInstrBuilder;
30class RegisterBank;
31struct SGPRSpillBuilder;
32
33/// Register allocation hint types. Helps eliminate unneeded COPY with True16
34namespace AMDGPURI {
35
36enum { Size16 = 1, Size32 = 2 };
37
38} // end namespace AMDGPURI
39
40class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41private:
42 const GCNSubtarget &ST;
43 bool SpillSGPRToVGPR;
44 bool isWave32;
45 BitVector RegPressureIgnoredUnits;
46
47 /// Sub reg indexes for getRegSplitParts.
48 /// First index represents subreg size from 1 to 32 Half DWORDS.
49 /// The inner vector is sorted by bit offset.
50 /// Provided a register can be fully split with given subregs,
51 /// all elements of the inner vector combined give a full lane mask.
52 static std::array<std::vector<int16_t>, 32> RegSplitParts;
53
54 // Table representing sub reg of given width and offset.
55 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56 // Second index is 32 different dword offsets.
57 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
58
59 void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60
61public:
62 SIRegisterInfo(const GCNSubtarget &ST);
63
64 struct SpilledReg {
65 Register VGPR;
66 int Lane = -1;
67
68 SpilledReg() = default;
69 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
70
71 bool hasLane() { return Lane != -1; }
72 bool hasReg() { return VGPR != 0; }
73 };
74
75 /// \returns the sub reg enum value for the given \p Channel
76 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
78
79 bool spillSGPRToVGPR() const {
80 return SpillSGPRToVGPR;
81 }
82
83 /// Return the largest available SGPR aligned to \p Align for the register
84 /// class \p RC.
85 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
86 const unsigned Align,
87 const TargetRegisterClass *RC) const;
88
89 /// Return the end register initially reserved for the scratch buffer in case
90 /// spilling is needed.
91 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
92
93 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
94 /// of waves per execution unit required for the function \p MF.
95 std::pair<unsigned, unsigned>
96 getMaxNumVectorRegs(const MachineFunction &MF) const;
97
98 BitVector getReservedRegs(const MachineFunction &MF) const override;
99 bool isAsmClobberable(const MachineFunction &MF,
100 MCRegister PhysReg) const override;
101
102 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
103 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
104 const uint32_t *getCallPreservedMask(const MachineFunction &MF,
105 CallingConv::ID) const override;
106 const uint32_t *getNoPreservedMask() const override;
107
108 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
109 // conventions are free to use certain VGPRs without saving and restoring any
110 // lanes (not even inactive ones).
111 static bool isChainScratchRegister(Register VGPR);
112
113 // Stack access is very expensive. CSRs are also the high registers, and we
114 // want to minimize the number of used registers.
115 unsigned getCSRFirstUseCost() const override {
116 return 100;
117 }
118
119 // When building a block VGPR load, we only really transfer a subset of the
120 // registers in the block, based on a mask. Liveness analysis is not aware of
121 // the mask, so it might consider that any register in the block is available
122 // before the load and may therefore be scavenged. This is not ok for CSRs
123 // that are not clobbered, since the caller will expect them to be preserved.
124 // This method will add artificial implicit uses for those registers on the
125 // load instruction, so liveness analysis knows they're unavailable.
126 void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
127 Register BlockReg) const;
128
129 const TargetRegisterClass *
130 getLargestLegalSuperClass(const TargetRegisterClass *RC,
131 const MachineFunction &MF) const override;
132
133 Register getFrameRegister(const MachineFunction &MF) const override;
134
135 bool hasBasePointer(const MachineFunction &MF) const;
136 Register getBaseRegister() const;
137
138 bool shouldRealignStack(const MachineFunction &MF) const override;
139 bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
140
141 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
142 bool requiresFrameIndexReplacementScavenging(
143 const MachineFunction &MF) const override;
144 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
145
146 int64_t getScratchInstrOffset(const MachineInstr *MI) const;
147
148 int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
149 int Idx) const override;
150
151 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
152
153 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
154 int64_t Offset) const override;
155
156 void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
157 int64_t Offset) const override;
158
159 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
160 int64_t Offset) const override;
161
162 const TargetRegisterClass *getPointerRegClass(
163 const MachineFunction &MF, unsigned Kind = 0) const override;
164
165 /// Returns a legal register class to copy a register in the specified class
166 /// to or from. If it is possible to copy the register directly without using
167 /// a cross register class copy, return the specified RC. Returns NULL if it
168 /// is not possible to copy between two registers of the specified class.
169 const TargetRegisterClass *
170 getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
171
172 const TargetRegisterClass *
173 getRegClassForBlockOp(const MachineFunction &MF) const {
174 return &AMDGPU::VReg_1024RegClass;
175 }
176
177 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
178 bool IsLoad, bool IsKill = true) const;
179
180 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
181 /// free VGPR lane to spill.
182 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
183 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
184 bool OnlyToVGPR = false,
185 bool SpillToPhysVGPRLane = false) const;
186
187 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
188 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
189 bool OnlyToVGPR = false,
190 bool SpillToPhysVGPRLane = false) const;
191
192 bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
193 MachineBasicBlock &RestoreMBB, Register SGPR,
194 RegScavenger *RS) const;
195
196 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
197 unsigned FIOperandNum,
198 RegScavenger *RS) const override;
199
200 bool eliminateSGPRToVGPRSpillFrameIndex(
201 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
202 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
203 bool SpillToPhysVGPRLane = false) const;
204
205 StringRef getRegAsmName(MCRegister Reg) const override;
206
207 // Pseudo regs are not allowed
208 unsigned getHWRegIndex(MCRegister Reg) const {
209 return getEncodingValue(Reg) & 0xff;
210 }
211
212 LLVM_READONLY
213 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
214
215 LLVM_READONLY
216 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
217
218 LLVM_READONLY
219 const TargetRegisterClass *
220 getVectorSuperClassForBitWidth(unsigned BitWidth) const;
221
222 LLVM_READONLY
223 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
224
225 /// \returns true if this class contains only SGPR registers
226 static bool isSGPRClass(const TargetRegisterClass *RC) {
227 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
228 }
229
230 /// \returns true if this class ID contains only SGPR registers
231 bool isSGPRClassID(unsigned RCID) const {
232 return isSGPRClass(RC: getRegClass(RCID));
233 }
234
235 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
236 bool isSGPRPhysReg(Register Reg) const {
237 return isSGPRClass(RC: getPhysRegBaseClass(Reg));
238 }
239
240 bool isVGPRPhysReg(Register Reg) const {
241 return isVGPRClass(RC: getPhysRegBaseClass(Reg));
242 }
243
244 /// \returns true if this class contains only VGPR registers
245 static bool isVGPRClass(const TargetRegisterClass *RC) {
246 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
247 }
248
249 /// \returns true if this class contains only AGPR registers
250 static bool isAGPRClass(const TargetRegisterClass *RC) {
251 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
252 }
253
254 /// \returns true only if this class contains both VGPR and AGPR registers
255 bool isVectorSuperClass(const TargetRegisterClass *RC) const {
256 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
257 }
258
259 /// \returns true only if this class contains both VGPR and SGPR registers
260 bool isVSSuperClass(const TargetRegisterClass *RC) const {
261 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
262 }
263
264 /// \returns true if this class contains VGPR registers.
265 static bool hasVGPRs(const TargetRegisterClass *RC) {
266 return RC->TSFlags & SIRCFlags::HasVGPR;
267 }
268
269 /// \returns true if this class contains AGPR registers.
270 static bool hasAGPRs(const TargetRegisterClass *RC) {
271 return RC->TSFlags & SIRCFlags::HasAGPR;
272 }
273
274 /// \returns true if this class contains SGPR registers.
275 static bool hasSGPRs(const TargetRegisterClass *RC) {
276 return RC->TSFlags & SIRCFlags::HasSGPR;
277 }
278
279 /// \returns true if this class contains any vector registers.
280 static bool hasVectorRegisters(const TargetRegisterClass *RC) {
281 return hasVGPRs(RC) || hasAGPRs(RC);
282 }
283
284 /// \returns A VGPR reg class with the same width as \p SRC
285 const TargetRegisterClass *
286 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
287
288 /// \returns An AGPR reg class with the same width as \p SRC
289 const TargetRegisterClass *
290 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
291
292 /// \returns A SGPR reg class with the same width as \p SRC
293 const TargetRegisterClass *
294 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
295
296 /// Returns a register class which is compatible with \p SuperRC, such that a
297 /// subregister exists with class \p SubRC with subregister index \p
298 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
299 /// a register tuple), return null.
300 const TargetRegisterClass *
301 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
302 const TargetRegisterClass *SubRC,
303 unsigned SubIdx) const;
304
305 /// \returns True if operands defined with this operand type can accept
306 /// a literal constant (i.e. any 32-bit immediate).
307 bool opCanUseLiteralConstant(unsigned OpType) const;
308
309 /// \returns True if operands defined with this operand type can accept
310 /// an inline constant. i.e. An integer value in the range (-16, 64) or
311 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
312 bool opCanUseInlineConstant(unsigned OpType) const;
313
314 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
315 const TargetRegisterClass *RC,
316 const MachineFunction &MF,
317 bool ReserveHighestVGPR = false) const;
318
319 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
320 Register Reg) const;
321 const TargetRegisterClass *
322 getRegClassForOperandReg(const MachineRegisterInfo &MRI,
323 const MachineOperand &MO) const;
324
325 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
326 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
327 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
328 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
329 }
330
331 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
332 // (such as VCC) which hold a wave-wide vector of boolean values. Examining
333 // just the register class is not suffcient; it needs to be combined with a
334 // value type. The next predicate isUniformReg() does this correctly.
335 bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
336 return !isSGPRClass(RC);
337 }
338
339 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
340 Register Reg) const override;
341
342 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
343 unsigned EltSize) const;
344
345 bool shouldCoalesce(MachineInstr *MI,
346 const TargetRegisterClass *SrcRC,
347 unsigned SubReg,
348 const TargetRegisterClass *DstRC,
349 unsigned DstSubReg,
350 const TargetRegisterClass *NewRC,
351 LiveIntervals &LIS) const override;
352
353 unsigned getRegPressureLimit(const TargetRegisterClass *RC,
354 MachineFunction &MF) const override;
355
356 unsigned getRegPressureSetLimit(const MachineFunction &MF,
357 unsigned Idx) const override;
358
359 bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
360 SmallVectorImpl<MCPhysReg> &Hints,
361 const MachineFunction &MF, const VirtRegMap *VRM,
362 const LiveRegMatrix *Matrix) const override;
363
364 const int *getRegUnitPressureSets(unsigned RegUnit) const override;
365
366 MCRegister getReturnAddressReg(const MachineFunction &MF) const;
367
368 const TargetRegisterClass *
369 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
370
371 const TargetRegisterClass *
372 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
373 return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank);
374 }
375
376 const TargetRegisterClass *
377 getConstrainedRegClassForOperand(const MachineOperand &MO,
378 const MachineRegisterInfo &MRI) const override;
379
380 const TargetRegisterClass *getBoolRC() const {
381 return isWave32 ? &AMDGPU::SReg_32RegClass
382 : &AMDGPU::SReg_64RegClass;
383 }
384
385 const TargetRegisterClass *getWaveMaskRegClass() const {
386 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
387 : &AMDGPU::SReg_64_XEXECRegClass;
388 }
389
390 // Return the appropriate register class to use for 64-bit VGPRs for the
391 // subtarget.
392 const TargetRegisterClass *getVGPR64Class() const;
393
394 MCRegister getVCC() const;
395
396 MCRegister getExec() const;
397
398 const TargetRegisterClass *getRegClass(unsigned RCID) const;
399
400 // Find reaching register definition
401 MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
402 MachineInstr &Use,
403 MachineRegisterInfo &MRI,
404 LiveIntervals *LIS) const;
405
406 const uint32_t *getAllVGPRRegMask() const;
407 const uint32_t *getAllAGPRRegMask() const;
408 const uint32_t *getAllVectorRegMask() const;
409 const uint32_t *getAllAllocatableSRegMask() const;
410
411 // \returns number of 32 bit registers covered by a \p LM
412 static unsigned getNumCoveredRegs(LaneBitmask LM) {
413 // The assumption is that every lo16 subreg is an even bit and every hi16
414 // is an adjacent odd bit or vice versa.
415 uint64_t Mask = LM.getAsInteger();
416 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
417 Mask = (Even >> 1) | Mask;
418 uint64_t Odd = Mask & 0x5555555555555555ULL;
419 return llvm::popcount(Value: Odd);
420 }
421
422 // \returns a DWORD offset of a \p SubReg
423 unsigned getChannelFromSubReg(unsigned SubReg) const {
424 return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + 31) / 32 : 0;
425 }
426
427 // \returns a DWORD size of a \p SubReg
428 unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
429 return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg));
430 }
431
432 // For a given 16 bit \p Reg \returns a 32 bit register holding it.
433 // \returns \p Reg otherwise.
434 MCPhysReg get32BitRegister(MCPhysReg Reg) const;
435
436 // Returns true if a given register class is properly aligned for
437 // the subtarget.
438 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
439
440 // Given \p RC returns corresponding aligned register class if required
441 // by the subtarget.
442 const TargetRegisterClass *
443 getProperlyAlignedRC(const TargetRegisterClass *RC) const;
444
445 /// Return all SGPR128 which satisfy the waves per execution unit requirement
446 /// of the subtarget.
447 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
448
449 /// Return all SGPR64 which satisfy the waves per execution unit requirement
450 /// of the subtarget.
451 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
452
453 /// Return all SGPR32 which satisfy the waves per execution unit requirement
454 /// of the subtarget.
455 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
456
457 // Insert spill or restore instructions.
458 // When lowering spill pseudos, the RegScavenger should be set.
459 // For creating spill instructions during frame lowering, where no scavenger
460 // is available, LiveUnits can be used.
461 void buildSpillLoadStore(MachineBasicBlock &MBB,
462 MachineBasicBlock::iterator MI, const DebugLoc &DL,
463 unsigned LoadStoreOp, int Index, Register ValueReg,
464 bool ValueIsKill, MCRegister ScratchOffsetReg,
465 int64_t InstrOffset, MachineMemOperand *MMO,
466 RegScavenger *RS,
467 LiveRegUnits *LiveUnits = nullptr) const;
468
469 // Return alignment in register file of first register in a register tuple.
470 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
471 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
472 }
473
474 // Check if register class RC has required alignment.
475 bool isRegClassAligned(const TargetRegisterClass *RC,
476 unsigned AlignNumBits) const {
477 assert(AlignNumBits != 0);
478 unsigned RCAlign = getRegClassAlignmentNumBits(RC);
479 return RCAlign == AlignNumBits ||
480 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
481 }
482
483 // Return alignment of a SubReg relative to start of a register in RC class.
484 // No check if the subreg is supported by the current RC is made.
485 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
486 unsigned SubReg) const;
487
488 // \returns a number of registers of a given \p RC used in a function.
489 // Does not go inside function calls.
490 unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
491 const TargetRegisterClass &RC) const;
492
493 std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
494 return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
495 : std::optional<uint8_t>{};
496 }
497
498 SmallVector<StringLiteral>
499 getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
500};
501
502namespace AMDGPU {
503/// Get the size in bits of a register from the register class \p RC.
504unsigned getRegBitWidth(const TargetRegisterClass &RC);
505} // namespace AMDGPU
506
507} // End namespace llvm
508
509#endif
510