SIRegisterInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIRegisterInfo.h]

1	//===-- SIRegisterInfo.h - SI Register Info Interface ----------- C++ ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Interface definition for SIRegisterInfo
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15	#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16
17	#include "llvm/ADT/BitVector.h"
18
19	#define GET_REGINFO_HEADER
20	#include "AMDGPUGenRegisterInfo.inc"
21
22	#include "SIDefines.h"
23
24	namespace llvm {
25
26	class GCNSubtarget;
27	class LiveIntervals;
28	class LiveRegUnits;
29	class MachineInstrBuilder;
30	class RegisterBank;
31	struct SGPRSpillBuilder;
32
33	/// Register allocation hint types. Helps eliminate unneeded COPY with True16
34	namespace AMDGPURI {
35
36	enum { Size16 = `1`, Size32 = `2` };
37
38	} // end namespace AMDGPURI
39
40	class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41	private:
42	const GCNSubtarget &ST;
43	bool SpillSGPRToVGPR;
44	bool isWave32;
45	BitVector RegPressureIgnoredUnits;
46
47	/// Sub reg indexes for getRegSplitParts.
48	/// First index represents subreg size from 1 to 32 Half DWORDS.
49	/// The inner vector is sorted by bit offset.
50	/// Provided a register can be fully split with given subregs,
51	/// all elements of the inner vector combined give a full lane mask.
52	static std::array<std::vector<int16_t>, `32`> RegSplitParts;
53
54	// Table representing sub reg of given width and offset.
55	// First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56	// Second index is 32 different dword offsets.
57	static std::array<std::array<uint16_t, `32`>, `9`> SubRegFromChannelTable;
58
59	void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60
61	public:
62	SIRegisterInfo(const GCNSubtarget &ST);
63
64	struct SpilledReg {
65	Register VGPR;
66	int Lane = -`1`;
67
68	SpilledReg() = default;
69	SpilledReg(Register R, int L) : VGPR (R), Lane(L) {}
70
71	bool hasLane() { return Lane != -`1`; }
72	bool hasReg() { return VGPR != `0`; }
73	};
74
75	/// \returns the sub reg enum value for the given \p Channel
76	/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77	static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = `1`);
78
79	bool spillSGPRToVGPR() const {
80	return SpillSGPRToVGPR;
81	}
82
83	/// Return the largest available SGPR aligned to \p Align for the register
84	/// class \p RC.
85	MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
86	const unsigned Align,
87	const TargetRegisterClass RC) const*;
88
89	/// Return the end register initially reserved for the scratch buffer in case
90	/// spilling is needed.
91	MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
92
93	BitVector getReservedRegs(const MachineFunction &MF) const override;
94	bool isAsmClobberable(const MachineFunction &MF,
95	MCRegister PhysReg) const override;
96
97	const MCPhysReg getCalleeSavedRegs(const* MachineFunction MF) const* override;
98	const MCPhysReg getCalleeSavedRegsViaCopy(const* MachineFunction MF) const*;
99	const uint32_t getCallPreservedMask(const* MachineFunction &MF,
100	CallingConv::ID) const override;
101	const uint32_t getNoPreservedMask() const* override;
102
103	// Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
104	// conventions are free to use certain VGPRs without saving and restoring any
105	// lanes (not even inactive ones).
106	static bool isChainScratchRegister(Register VGPR);
107
108	// Stack access is very expensive. CSRs are also the high registers, and we
109	// want to minimize the number of used registers.
110	unsigned getCSRCost() const override { return `100`; }
111
112	// When building a block VGPR load, we only really transfer a subset of the
113	// registers in the block, based on a mask. Liveness analysis is not aware of
114	// the mask, so it might consider that any register in the block is available
115	// before the load and may therefore be scavenged. This is not ok for CSRs
116	// that are not clobbered, since the caller will expect them to be preserved.
117	// This method will add artificial implicit uses for those registers on the
118	// load instruction, so liveness analysis knows they're unavailable.
119	void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
120	Register BlockReg) const;
121
122	const TargetRegisterClass *
123	getLargestLegalSuperClass(const TargetRegisterClass *RC,
124	const MachineFunction &MF) const override;
125
126	Register getFrameRegister(const MachineFunction &MF) const override;
127
128	bool hasBasePointer(const MachineFunction &MF) const;
129	Register getBaseRegister() const;
130
131	bool shouldRealignStack(const MachineFunction &MF) const override;
132	bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
133
134	bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
135	bool requiresFrameIndexReplacementScavenging(
136	const MachineFunction &MF) const override;
137	bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
138
139	int64_t getScratchInstrOffset(const MachineInstr MI) const*;
140
141	int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
142	int Idx) const override;
143
144	bool needsFrameBaseReg(MachineInstr MI, int64_t Offset) const* override;
145
146	Register materializeFrameBaseRegister(MachineBasicBlock MBB, int* FrameIdx,
147	int64_t Offset) const override;
148
149	void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
150	int64_t Offset) const override;
151
152	bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
153	int64_t Offset) const override;
154
155	const TargetRegisterClass *
156	getPointerRegClass(unsigned Kind = `0`) const override;
157
158	/// Returns a legal register class to copy a register in the specified class
159	/// to or from. If it is possible to copy the register directly without using
160	/// a cross register class copy, return the specified RC. Returns NULL if it
161	/// is not possible to copy between two registers of the specified class.
162	const TargetRegisterClass *
163	getCrossCopyRegClass(const TargetRegisterClass RC) const* override;
164
165	const TargetRegisterClass *
166	getRegClassForBlockOp(const MachineFunction &MF) const {
167	return &AMDGPU::VReg_1024RegClass;
168	}
169
170	void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
171	bool IsLoad, bool IsKill = true) const;
172
173	/// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
174	/// free VGPR lane to spill.
175	bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
176	SlotIndexes Indexes = nullptr, LiveIntervals LIS = nullptr,
177	bool OnlyToVGPR = false,
178	bool SpillToPhysVGPRLane = false) const;
179
180	bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
181	SlotIndexes Indexes = nullptr, LiveIntervals LIS = nullptr,
182	bool OnlyToVGPR = false,
183	bool SpillToPhysVGPRLane = false) const;
184
185	bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
186	MachineBasicBlock &RestoreMBB, Register SGPR,
187	RegScavenger RS) const*;
188
189	bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
190	unsigned FIOperandNum,
191	RegScavenger RS) const* override;
192
193	bool eliminateSGPRToVGPRSpillFrameIndex(
194	MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
195	SlotIndexes Indexes = nullptr, LiveIntervals LIS = nullptr,
196	bool SpillToPhysVGPRLane = false) const;
197
198	StringRef getRegAsmName(MCRegister Reg) const override;
199
200	// Pseudo regs are not allowed
201	unsigned getHWRegIndex(MCRegister Reg) const;
202
203	LLVM_READONLY
204	const TargetRegisterClass getVGPRClassForBitWidth(unsigned* BitWidth) const;
205
206	LLVM_READONLY const TargetRegisterClass *
207	getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const;
208
209	LLVM_READONLY
210	const TargetRegisterClass getAGPRClassForBitWidth(unsigned* BitWidth) const;
211
212	LLVM_READONLY
213	const TargetRegisterClass *
214	getVectorSuperClassForBitWidth(unsigned BitWidth) const;
215
216	LLVM_READONLY
217	const TargetRegisterClass *
218	getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
219
220	LLVM_READONLY
221	static const TargetRegisterClass getSGPRClassForBitWidth(unsigned* BitWidth);
222
223	/// \returns true if this class contains only SGPR registers
224	static bool isSGPRClass(const TargetRegisterClass *RC) {
225	return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
226	}
227
228	/// \returns true if this class ID contains only SGPR registers
229	bool isSGPRClassID(unsigned RCID) const {
230	return isSGPRClass(RC: getRegClass(i: RCID));
231	}
232
233	bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
234	bool isSGPRPhysReg(Register Reg) const {
235	return isSGPRClass(RC: getPhysRegBaseClass(Reg));
236	}
237
238	bool isVGPRPhysReg(Register Reg) const {
239	return isVGPRClass(RC: getPhysRegBaseClass(Reg));
240	}
241
242	/// \returns true if this class contains only VGPR registers
243	static bool isVGPRClass(const TargetRegisterClass *RC) {
244	return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
245	}
246
247	/// \returns true if this class contains only AGPR registers
248	static bool isAGPRClass(const TargetRegisterClass *RC) {
249	return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
250	}
251
252	/// \returns true only if this class contains both VGPR and AGPR registers
253	bool isVectorSuperClass(const TargetRegisterClass RC) const* {
254	return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
255	}
256
257	/// \returns true only if this class contains both VGPR and SGPR registers
258	bool isVSSuperClass(const TargetRegisterClass RC) const* {
259	return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
260	}
261
262	/// \returns true if this class contains VGPR registers.
263	static bool hasVGPRs(const TargetRegisterClass *RC) {
264	return RC->TSFlags & SIRCFlags::HasVGPR;
265	}
266
267	/// \returns true if this class contains AGPR registers.
268	static bool hasAGPRs(const TargetRegisterClass *RC) {
269	return RC->TSFlags & SIRCFlags::HasAGPR;
270	}
271
272	/// \returns true if this class contains SGPR registers.
273	static bool hasSGPRs(const TargetRegisterClass *RC) {
274	return RC->TSFlags & SIRCFlags::HasSGPR;
275	}
276
277	/// \returns true if this class contains any vector registers.
278	static bool hasVectorRegisters(const TargetRegisterClass *RC) {
279	return hasVGPRs(RC) \|\| hasAGPRs(RC);
280	}
281
282	/// \returns A VGPR reg class with the same width as \p SRC
283	const TargetRegisterClass *
284	getEquivalentVGPRClass(const TargetRegisterClass SRC) const*;
285
286	/// \returns An AGPR reg class with the same width as \p SRC
287	const TargetRegisterClass *
288	getEquivalentAGPRClass(const TargetRegisterClass SRC) const*;
289
290	/// \returns An AGPR+VGPR super reg class with the same width as \p SRC
291	const TargetRegisterClass *
292	getEquivalentAVClass(const TargetRegisterClass SRC) const*;
293
294	/// \returns A SGPR reg class with the same width as \p SRC
295	const TargetRegisterClass *
296	getEquivalentSGPRClass(const TargetRegisterClass VRC) const*;
297
298	/// Returns a register class which is compatible with \p SuperRC, such that a
299	/// subregister exists with class \p SubRC with subregister index \p
300	/// SubIdx. If this is impossible (e.g., an unaligned subregister index within
301	/// a register tuple), return null.
302	const TargetRegisterClass *
303	getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
304	const TargetRegisterClass *SubRC,
305	unsigned SubIdx) const;
306
307	/// \returns True if operands defined with this operand type can accept
308	/// a literal constant (i.e. any 32-bit immediate).
309	bool opCanUseLiteralConstant(unsigned OpType) const;
310
311	/// \returns True if operands defined with this operand type can accept
312	/// an inline constant. i.e. An integer value in the range (-16, 64) or
313	/// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
314	bool opCanUseInlineConstant(unsigned OpType) const;
315
316	MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
317	const TargetRegisterClass *RC,
318	const MachineFunction &MF,
319	bool ReserveHighestVGPR = false) const;
320
321	const TargetRegisterClass getRegClassForReg(const* MachineRegisterInfo &MRI,
322	Register Reg) const;
323	const TargetRegisterClass *
324	getRegClassForOperandReg(const MachineRegisterInfo &MRI,
325	const MachineOperand &MO) const;
326
327	bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
328	bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
329	bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
330	return isVGPR(MRI, Reg) \|\| isAGPR(MRI, Reg);
331	}
332
333	// FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
334	// (such as VCC) which hold a wave-wide vector of boolean values. Examining
335	// just the register class is not suffcient; it needs to be combined with a
336	// value type. The next predicate isUniformReg() does this correctly.
337	bool isDivergentRegClass(const TargetRegisterClass RC) const* override {
338	return !isSGPRClass(RC);
339	}
340
341	bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
342	Register Reg) const override;
343
344	ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
345	unsigned EltSize) const;
346
347	unsigned getRegPressureLimit(const TargetRegisterClass *RC,
348	MachineFunction &MF) const override;
349
350	unsigned getRegPressureSetLimit(const MachineFunction &MF,
351	unsigned Idx) const override;
352
353	bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
354	SmallVectorImpl<MCPhysReg> &Hints,
355	const MachineFunction &MF, const VirtRegMap *VRM,
356	const LiveRegMatrix Matrix) const* override;
357
358	const int getRegUnitPressureSets(MCRegUnit RegUnit) const* override;
359
360	MCRegister getReturnAddressReg(const MachineFunction &MF) const;
361
362	const TargetRegisterClass *
363	getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
364
365	const TargetRegisterClass *
366	getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
367	return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank);
368	}
369
370	const TargetRegisterClass *
371	getConstrainedRegClassForOperand(const MachineOperand &MO,
372	const MachineRegisterInfo &MRI) const override;
373
374	const TargetRegisterClass getBoolRC() const* {
375	return isWave32 ? &AMDGPU::SReg_32RegClass
376	: &AMDGPU::SReg_64RegClass;
377	}
378
379	const TargetRegisterClass getWaveMaskRegClass() const* {
380	return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
381	: &AMDGPU::SReg_64_XEXECRegClass;
382	}
383
384	// Return the appropriate register class to use for 64-bit VGPRs for the
385	// subtarget.
386	const TargetRegisterClass getVGPR64Class() const*;
387
388	MCRegister getVCC() const;
389
390	MCRegister getExec() const;
391
392	// Find reaching register definition
393	MachineInstr findReachingDef(Register Reg, unsigned* SubReg,
394	MachineInstr &Use,
395	MachineRegisterInfo &MRI,
396	LiveIntervals LIS) const*;
397
398	const uint32_t getAllVGPRRegMask() const*;
399	const uint32_t getAllAGPRRegMask() const*;
400	const uint32_t getAllVectorRegMask() const*;
401	const uint32_t getAllAllocatableSRegMask() const*;
402
403	// \returns number of 32 bit registers covered by a \p LM
404	static unsigned getNumCoveredRegs(LaneBitmask LM) {
405	// The assumption is that every lo16 subreg is an even bit and every hi16
406	// is an adjacent odd bit or vice versa.
407	uint64_t Mask = LM.getAsInteger();
408	uint64_t Even = Mask & `0xAAAAAAAAAAAAAAAAULL`;
409	Mask = (Even >> `1`) \| Mask;
410	uint64_t Odd = Mask & `0x5555555555555555ULL`;
411	return llvm::popcount(Value: Odd);
412	}
413
414	// \returns a DWORD offset of a \p SubReg
415	unsigned getChannelFromSubReg(unsigned SubReg) const {
416	return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + `31`) / `32` : `0`;
417	}
418
419	// \returns a DWORD size of a \p SubReg
420	unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
421	return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg));
422	}
423
424	// For a given 16 bit \p Reg \returns a 32 bit register holding it.
425	// \returns \p Reg otherwise.
426	MCPhysReg get32BitRegister(MCPhysReg Reg) const;
427
428	// Returns true if a given register class is properly aligned for
429	// the subtarget.
430	bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
431
432	/// Return all SGPR128 which satisfy the waves per execution unit requirement
433	/// of the subtarget.
434	ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
435
436	/// Return all SGPR64 which satisfy the waves per execution unit requirement
437	/// of the subtarget.
438	ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
439
440	/// Return all SGPR32 which satisfy the waves per execution unit requirement
441	/// of the subtarget.
442	ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
443
444	// Insert spill or restore instructions.
445	// When lowering spill pseudos, the RegScavenger should be set.
446	// For creating spill instructions during frame lowering, where no scavenger
447	// is available, LiveUnits can be used.
448	void buildSpillLoadStore(MachineBasicBlock &MBB,
449	MachineBasicBlock::iterator MI, const DebugLoc &DL,
450	unsigned LoadStoreOp, int Index, Register ValueReg,
451	bool ValueIsKill, MCRegister ScratchOffsetReg,
452	int64_t InstrOffset, MachineMemOperand *MMO,
453	RegScavenger *RS,
454	LiveRegUnits LiveUnits = nullptr) const*;
455
456	// Return alignment in register file of first register in a register tuple.
457	unsigned getRegClassAlignmentNumBits(const TargetRegisterClass RC) const* {
458	return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * `32`;
459	}
460
461	// Check if register class RC has required alignment.
462	bool isRegClassAligned(const TargetRegisterClass *RC,
463	unsigned AlignNumBits) const {
464	assert(AlignNumBits != `0`);
465	unsigned RCAlign = getRegClassAlignmentNumBits(RC);
466	return RCAlign == AlignNumBits \|\|
467	(RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == `0`);
468	}
469
470	// Return alignment of a SubReg relative to start of a register in RC class.
471	// No check if the subreg is supported by the current RC is made.
472	unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
473	unsigned SubReg) const;
474
475	// \returns a number of registers of a given \p RC used in a function.
476	// Does not go inside function calls. If \p IncludeCalls is true, it will
477	// include registers that may be clobbered by calls.
478	unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
479	const TargetRegisterClass &RC,
480	bool IncludeCalls = true) const;
481
482	std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
483	return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
484	: std::optional<uint8_t>{};
485	}
486
487	SmallVector<StringLiteral>
488	getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
489
490	float
491	getSpillWeightScaleFactor(const TargetRegisterClass RC) const* override {
492	// Prioritize VGPR_32_Lo256 over other classes which may occupy registers
493	// beyond v256.
494	return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
495	((RC == &AMDGPU::VGPR_32_Lo256RegClass \|\|
496	RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
497	? `2.0`
498	: `1.0`);
499	}
500	};
501
502	namespace AMDGPU {
503	/// Get the size in bits of a register from the register class \p RC.
504	unsigned getRegBitWidth(const TargetRegisterClass &RC);
505	} // namespace AMDGPU
506
507	} // End namespace llvm
508
509	#endif
510

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIRegisterInfo.h