SIMachineFunctionInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h]

1	//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14	#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
16	#include "AMDGPUArgumentUsageInfo.h"
17	#include "AMDGPUMachineFunction.h"
18	#include "AMDGPUTargetMachine.h"
19	#include "GCNSubtarget.h"
20	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21	#include "SIInstrInfo.h"
22	#include "SIModeRegisterDefaults.h"
23	#include "llvm/ADT/SetVector.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/CodeGen/MIRYamlMapping.h"
26	#include "llvm/CodeGen/PseudoSourceValue.h"
27	#include "llvm/Support/raw_ostream.h"
28	#include <optional>
29
30	namespace llvm {
31
32	class MachineFrameInfo;
33	class MachineFunction;
34	class SIMachineFunctionInfo;
35	class SIRegisterInfo;
36	class TargetRegisterClass;
37
38	class AMDGPUPseudoSourceValue : public PseudoSourceValue {
39	public:
40	enum AMDGPUPSVKind : unsigned {
41	PSVImage = PseudoSourceValue::TargetCustom,
42	GWSResource
43	};
44
45	protected:
46	AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
47	: PseudoSourceValue (Kind, TM) {}
48
49	public:
50	bool isConstant(const MachineFrameInfo ) const* override {
51	// This should probably be true for most images, but we will start by being
52	// conservative.
53	return false;
54	}
55
56	bool isAliased(const MachineFrameInfo ) const* override {
57	return true;
58	}
59
60	bool mayAlias(const MachineFrameInfo ) const* override {
61	return true;
62	}
63	};
64
65	class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
66	public:
67	explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
68	: AMDGPUPseudoSourceValue (GWSResource, TM) {}
69
70	static bool classof(const PseudoSourceValue *V) {
71	return V->kind() == GWSResource;
72	}
73
74	// These are inaccessible memory from IR.
75	bool isAliased(const MachineFrameInfo ) const* override {
76	return false;
77	}
78
79	// These are inaccessible memory from IR.
80	bool mayAlias(const MachineFrameInfo ) const* override {
81	return false;
82	}
83
84	void printCustom(raw_ostream &OS) const override {
85	OS << "GWSResource";
86	}
87	};
88
89	namespace yaml {
90
91	struct SIArgument {
92	bool IsRegister;
93	union {
94	StringValue RegisterName;
95	unsigned StackOffset;
96	};
97	std::optional<unsigned> Mask;
98
99	// Default constructor, which creates a stack argument.
100	SIArgument() : IsRegister(false), StackOffset(`0`) {}
101	SIArgument(const SIArgument &Other) {
102	IsRegister = Other.IsRegister;
103	if (IsRegister)
104	new (&RegisterName) StringValue (Other.RegisterName);
105	else
106	StackOffset = Other.StackOffset;
107	Mask = Other.Mask;
108	}
109	SIArgument &operator=(const SIArgument &Other) {
110	// Default-construct or destruct the old RegisterName in case of switching
111	// union members
112	if (IsRegister != Other.IsRegister) {
113	if (Other.IsRegister)
114	new (&RegisterName) StringValue ();
115	else
116	RegisterName.~StringValue();
117	}
118	IsRegister = Other.IsRegister;
119	if (IsRegister)
120	RegisterName = Other.RegisterName;
121	else
122	StackOffset = Other.StackOffset;
123	Mask = Other.Mask;
124	return *this;
125	}
126	~SIArgument() {
127	if (IsRegister)
128	RegisterName.~StringValue();
129	}
130
131	// Helper to create a register or stack argument.
132	static inline SIArgument createArgument(bool IsReg) {
133	if (IsReg)
134	return SIArgument (IsReg);
135	return SIArgument ();
136	}
137
138	private:
139	// Construct a register argument.
140	SIArgument(bool) : IsRegister(true), RegisterName () {}
141	};
142
143	template <> struct MappingTraits<SIArgument> {
144	static void mapping(IO &YamlIO, SIArgument &A) {
145	if (YamlIO.outputting()) {
146	if (A.IsRegister)
147	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
148	else
149	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
150	} else {
151	auto Keys = YamlIO.keys();
152	if (is_contained(Range&: Keys, Element: "reg")) {
153	A = SIArgument::createArgument(IsReg: true);
154	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
155	} else if (is_contained(Range&: Keys, Element: "offset"))
156	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
157	else
158	YamlIO.setError("missing required key 'reg' or 'offset'");
159	}
160	YamlIO.mapOptional(Key: "mask", Val&: A.Mask);
161	}
162	static const bool flow = true;
163	};
164
165	struct SIArgumentInfo {
166	std::optional<SIArgument> PrivateSegmentBuffer;
167	std::optional<SIArgument> DispatchPtr;
168	std::optional<SIArgument> QueuePtr;
169	std::optional<SIArgument> KernargSegmentPtr;
170	std::optional<SIArgument> DispatchID;
171	std::optional<SIArgument> FlatScratchInit;
172	std::optional<SIArgument> PrivateSegmentSize;
173	std::optional<SIArgument> FirstKernArgPreloadReg;
174
175	std::optional<SIArgument> WorkGroupIDX;
176	std::optional<SIArgument> WorkGroupIDY;
177	std::optional<SIArgument> WorkGroupIDZ;
178	std::optional<SIArgument> WorkGroupInfo;
179	std::optional<SIArgument> LDSKernelId;
180	std::optional<SIArgument> PrivateSegmentWaveByteOffset;
181
182	std::optional<SIArgument> ImplicitArgPtr;
183	std::optional<SIArgument> ImplicitBufferPtr;
184
185	std::optional<SIArgument> WorkItemIDX;
186	std::optional<SIArgument> WorkItemIDY;
187	std::optional<SIArgument> WorkItemIDZ;
188	};
189
190	template <> struct MappingTraits<SIArgumentInfo> {
191	static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
192	YamlIO.mapOptional(Key: "privateSegmentBuffer", Val&: AI.PrivateSegmentBuffer);
193	YamlIO.mapOptional(Key: "dispatchPtr", Val&: AI.DispatchPtr);
194	YamlIO.mapOptional(Key: "queuePtr", Val&: AI.QueuePtr);
195	YamlIO.mapOptional(Key: "kernargSegmentPtr", Val&: AI.KernargSegmentPtr);
196	YamlIO.mapOptional(Key: "dispatchID", Val&: AI.DispatchID);
197	YamlIO.mapOptional(Key: "flatScratchInit", Val&: AI.FlatScratchInit);
198	YamlIO.mapOptional(Key: "privateSegmentSize", Val&: AI.PrivateSegmentSize);
199	YamlIO.mapOptional(Key: "firstKernArgPreloadReg", Val&: AI.FirstKernArgPreloadReg);
200
201	YamlIO.mapOptional(Key: "workGroupIDX", Val&: AI.WorkGroupIDX);
202	YamlIO.mapOptional(Key: "workGroupIDY", Val&: AI.WorkGroupIDY);
203	YamlIO.mapOptional(Key: "workGroupIDZ", Val&: AI.WorkGroupIDZ);
204	YamlIO.mapOptional(Key: "workGroupInfo", Val&: AI.WorkGroupInfo);
205	YamlIO.mapOptional(Key: "LDSKernelId", Val&: AI.LDSKernelId);
206	YamlIO.mapOptional(Key: "privateSegmentWaveByteOffset",
207	Val&: AI.PrivateSegmentWaveByteOffset);
208
209	YamlIO.mapOptional(Key: "implicitArgPtr", Val&: AI.ImplicitArgPtr);
210	YamlIO.mapOptional(Key: "implicitBufferPtr", Val&: AI.ImplicitBufferPtr);
211
212	YamlIO.mapOptional(Key: "workItemIDX", Val&: AI.WorkItemIDX);
213	YamlIO.mapOptional(Key: "workItemIDY", Val&: AI.WorkItemIDY);
214	YamlIO.mapOptional(Key: "workItemIDZ", Val&: AI.WorkItemIDZ);
215	}
216	};
217
218	// Default to default mode for default calling convention.
219	struct SIMode {
220	bool IEEE = true;
221	bool DX10Clamp = true;
222	bool FP32InputDenormals = true;
223	bool FP32OutputDenormals = true;
224	bool FP64FP16InputDenormals = true;
225	bool FP64FP16OutputDenormals = true;
226
227	SIMode() = default;
228
229	SIMode(const SIModeRegisterDefaults &Mode) {
230	IEEE = Mode.IEEE;
231	DX10Clamp = Mode.DX10Clamp;
232	FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
233	FP32OutputDenormals =
234	Mode.FP32Denormals.Output != DenormalMode::PreserveSign;
235	FP64FP16InputDenormals =
236	Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign;
237	FP64FP16OutputDenormals =
238	Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign;
239	}
240
241	bool operator ==(const SIMode Other) const {
242	return IEEE == Other.IEEE &&
243	DX10Clamp == Other.DX10Clamp &&
244	FP32InputDenormals == Other.FP32InputDenormals &&
245	FP32OutputDenormals == Other.FP32OutputDenormals &&
246	FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
247	FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
248	}
249	};
250
251	template <> struct MappingTraits<SIMode> {
252	static void mapping(IO &YamlIO, SIMode &Mode) {
253	YamlIO.mapOptional(Key: "ieee", Val&: Mode.IEEE, Default: true);
254	YamlIO.mapOptional(Key: "dx10-clamp", Val&: Mode.DX10Clamp, Default: true);
255	YamlIO.mapOptional(Key: "fp32-input-denormals", Val&: Mode.FP32InputDenormals, Default: true);
256	YamlIO.mapOptional(Key: "fp32-output-denormals", Val&: Mode.FP32OutputDenormals, Default: true);
257	YamlIO.mapOptional(Key: "fp64-fp16-input-denormals", Val&: Mode.FP64FP16InputDenormals, Default: true);
258	YamlIO.mapOptional(Key: "fp64-fp16-output-denormals", Val&: Mode.FP64FP16OutputDenormals, Default: true);
259	}
260	};
261
262	struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
263	uint64_t ExplicitKernArgSize = `0`;
264	Align MaxKernArgAlign;
265	uint32_t LDSSize = `0`;
266	uint32_t GDSSize = `0`;
267	Align DynLDSAlign;
268	bool IsEntryFunction = false;
269	bool IsChainFunction = false;
270	bool MemoryBound = false;
271	bool WaveLimiter = false;
272	bool HasSpilledSGPRs = false;
273	bool HasSpilledVGPRs = false;
274	uint16_t NumWaveDispatchSGPRs = `0`;
275	uint16_t NumWaveDispatchVGPRs = `0`;
276	uint32_t HighBitsOf32BitAddress = `0`;
277
278	// TODO: 10 may be a better default since it's the maximum.
279	unsigned Occupancy = `0`;
280
281	SmallVector<StringValue, `2`> SpillPhysVGPRS;
282	SmallVector<StringValue> WWMReservedRegs;
283
284	StringValue ScratchRSrcReg = "$private_rsrc_reg";
285	StringValue FrameOffsetReg = "$fp_reg";
286	StringValue StackPtrOffsetReg = "$sp_reg";
287
288	unsigned BytesInStackArgArea = `0`;
289	bool ReturnsVoid = true;
290
291	std::optional<SIArgumentInfo> ArgInfo;
292
293	unsigned PSInputAddr = `0`;
294	unsigned PSInputEnable = `0`;
295	unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
296
297	SIMode Mode;
298	std::optional<FrameIndex> ScavengeFI;
299	StringValue VGPRForAGPRCopy;
300	StringValue SGPRForEXECCopy;
301	StringValue LongBranchReservedReg;
302
303	bool HasInitWholeWave = false;
304	bool IsWholeWaveFunction = false;
305
306	unsigned DynamicVGPRBlockSize = `0`;
307	unsigned ScratchReservedForDynamicVGPRs = `0`;
308
309	unsigned NumKernargPreloadSGPRs = `0`;
310
311	SIMachineFunctionInfo() = default;
312	SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
313	const TargetRegisterInfo &TRI,
314	const llvm::MachineFunction &MF);
315
316	void mappingImpl(yaml::IO &YamlIO) override;
317	~SIMachineFunctionInfo() override = default;
318	};
319
320	template <> struct MappingTraits<SIMachineFunctionInfo> {
321	static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
322	YamlIO.mapOptional(Key: "explicitKernArgSize", Val&: MFI.ExplicitKernArgSize,
323	UINT64_C(`0`));
324	YamlIO.mapOptional(Key: "maxKernArgAlign", Val&: MFI.MaxKernArgAlign);
325	YamlIO.mapOptional(Key: "ldsSize", Val&: MFI.LDSSize, Default: `0u`);
326	YamlIO.mapOptional(Key: "gdsSize", Val&: MFI.GDSSize, Default: `0u`);
327	YamlIO.mapOptional(Key: "dynLDSAlign", Val&: MFI.DynLDSAlign, Default: Align ());
328	YamlIO.mapOptional(Key: "isEntryFunction", Val&: MFI.IsEntryFunction, Default: false);
329	YamlIO.mapOptional(Key: "isChainFunction", Val&: MFI.IsChainFunction, Default: false);
330	YamlIO.mapOptional(Key: "memoryBound", Val&: MFI.MemoryBound, Default: false);
331	YamlIO.mapOptional(Key: "waveLimiter", Val&: MFI.WaveLimiter, Default: false);
332	YamlIO.mapOptional(Key: "hasSpilledSGPRs", Val&: MFI.HasSpilledSGPRs, Default: false);
333	YamlIO.mapOptional(Key: "hasSpilledVGPRs", Val&: MFI.HasSpilledVGPRs, Default: false);
334	YamlIO.mapOptional(Key: "numWaveDispatchSGPRs", Val&: MFI.NumWaveDispatchSGPRs, Default: false);
335	YamlIO.mapOptional(Key: "numWaveDispatchVGPRs", Val&: MFI.NumWaveDispatchVGPRs, Default: false);
336	YamlIO.mapOptional(Key: "scratchRSrcReg", Val&: MFI.ScratchRSrcReg,
337	Default: StringValue ("$private_rsrc_reg"));
338	YamlIO.mapOptional(Key: "frameOffsetReg", Val&: MFI.FrameOffsetReg,
339	Default: StringValue ("$fp_reg"));
340	YamlIO.mapOptional(Key: "stackPtrOffsetReg", Val&: MFI.StackPtrOffsetReg,
341	Default: StringValue ("$sp_reg"));
342	YamlIO.mapOptional(Key: "bytesInStackArgArea", Val&: MFI.BytesInStackArgArea, Default: `0u`);
343	YamlIO.mapOptional(Key: "returnsVoid", Val&: MFI.ReturnsVoid, Default: true);
344	YamlIO.mapOptional(Key: "argumentInfo", Val&: MFI.ArgInfo);
345	YamlIO.mapOptional(Key: "psInputAddr", Val&: MFI.PSInputAddr, Default: `0u`);
346	YamlIO.mapOptional(Key: "psInputEnable", Val&: MFI.PSInputEnable, Default: `0u`);
347	YamlIO.mapOptional(Key: "maxMemoryClusterDWords", Val&: MFI.MaxMemoryClusterDWords,
348	Default: DefaultMemoryClusterDWordsLimit);
349	YamlIO.mapOptional(Key: "mode", Val&: MFI.Mode, Default: SIMode ());
350	YamlIO.mapOptional(Key: "highBitsOf32BitAddress",
351	Val&: MFI.HighBitsOf32BitAddress, Default: `0u`);
352	YamlIO.mapOptional(Key: "occupancy", Val&: MFI.Occupancy, Default: `0`);
353	YamlIO.mapOptional(Key: "spillPhysVGPRs", Val&: MFI.SpillPhysVGPRS);
354	YamlIO.mapOptional(Key: "wwmReservedRegs", Val&: MFI.WWMReservedRegs);
355	YamlIO.mapOptional(Key: "scavengeFI", Val&: MFI.ScavengeFI);
356	YamlIO.mapOptional(Key: "vgprForAGPRCopy", Val&: MFI.VGPRForAGPRCopy,
357	Default: StringValue ()); // Don't print out when it's empty.
358	YamlIO.mapOptional(Key: "sgprForEXECCopy", Val&: MFI.SGPRForEXECCopy,
359	Default: StringValue ()); // Don't print out when it's empty.
360	YamlIO.mapOptional(Key: "longBranchReservedReg", Val&: MFI.LongBranchReservedReg,
361	Default: StringValue ());
362	YamlIO.mapOptional(Key: "hasInitWholeWave", Val&: MFI.HasInitWholeWave, Default: false);
363	YamlIO.mapOptional(Key: "dynamicVGPRBlockSize", Val&: MFI.DynamicVGPRBlockSize, Default: false);
364	YamlIO.mapOptional(Key: "scratchReservedForDynamicVGPRs",
365	Val&: MFI.ScratchReservedForDynamicVGPRs, Default: `0`);
366	YamlIO.mapOptional(Key: "numKernargPreloadSGPRs", Val&: MFI.NumKernargPreloadSGPRs, Default: `0`);
367	YamlIO.mapOptional(Key: "isWholeWaveFunction", Val&: MFI.IsWholeWaveFunction, Default: false);
368	}
369	};
370
371	} // end namespace yaml
372
373	// A CSR SGPR value can be preserved inside a callee using one of the following
374	// methods.
375	// 1. Copy to an unused scratch SGPR.
376	// 2. Spill to a VGPR lane.
377	// 3. Spill to memory via. a scratch VGPR.
378	// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
379	// for an SGPR at function prolog/epilog.
380	enum class SGPRSaveKind : uint8_t {
381	COPY_TO_SCRATCH_SGPR,
382	SPILL_TO_VGPR_LANE,
383	SPILL_TO_MEM
384	};
385
386	class PrologEpilogSGPRSaveRestoreInfo {
387	SGPRSaveKind Kind;
388	union {
389	int Index;
390	Register Reg;
391	};
392
393	public:
394	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {}
395	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
396	: Kind(K), Reg (R) {}
397	Register getReg() const { return Reg; }
398	int getIndex() const { return Index; }
399	SGPRSaveKind getKind() const { return Kind; }
400	};
401
402	struct VGPRBlock2IndexFunctor {
403	using argument_type = Register;
404	unsigned operator()(Register Reg) const {
405	assert(AMDGPU::VReg_1024RegClass.contains(Reg) && "Expecting a VGPR block");
406
407	const MCRegister FirstVGPRBlock = AMDGPU::VReg_1024RegClass.getRegister(i: `0`);
408	return Reg - FirstVGPRBlock;
409	}
410	};
411
412	/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
413	/// tells the hardware which interpolation parameters to load.
414	class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
415	private MachineRegisterInfo::Delegate {
416	friend class GCNTargetMachine;
417
418	// State of MODE register, assumed FP mode.
419	SIModeRegisterDefaults Mode;
420
421	// Registers that may be reserved for spilling purposes. These may be the same
422	// as the input registers.
423	Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
424
425	// This is the unswizzled offset from the current dispatch's scratch wave
426	// base to the beginning of the current function's frame.
427	Register FrameOffsetReg = AMDGPU::FP_REG;
428
429	// This is an ABI register used in the non-entry calling convention to
430	// communicate the unswizzled offset from the current dispatch's scratch wave
431	// base to the beginning of the new function's frame.
432	Register StackPtrOffsetReg = AMDGPU::SP_REG;
433
434	// Registers that may be reserved when RA doesn't allocate enough
435	// registers to plan for the case where an indirect branch ends up
436	// being needed during branch relaxation.
437	Register LongBranchReservedReg;
438
439	AMDGPUFunctionArgInfo ArgInfo;
440
441	// Graphics info.
442	unsigned PSInputAddr = `0`;
443	unsigned PSInputEnable = `0`;
444
445	/// Number of bytes of arguments this function has on the stack. If the callee
446	/// is expected to restore the argument stack this should be a multiple of 16,
447	/// all usable during a tail call.
448	///
449	/// The alternative would forbid tail call optimisation in some cases: if we
450	/// want to transfer control from a function with 8-bytes of stack-argument
451	/// space to a function with 16-bytes then misalignment of this value would
452	/// make a stack adjustment necessary, which could not be undone by the
453	/// callee.
454	unsigned BytesInStackArgArea = `0`;
455
456	bool ReturnsVoid = true;
457
458	// A pair of default/requested minimum/maximum flat work group sizes.
459	// Minimum - first, maximum - second.
460	std::pair<unsigned, unsigned> FlatWorkGroupSizes = {`0`, `0`};
461
462	// A pair of default/requested minimum/maximum number of waves per execution
463	// unit. Minimum - first, maximum - second.
464	std::pair<unsigned, unsigned> WavesPerEU = {`0`, `0`};
465
466	const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
467
468	// Default/requested number of work groups for the function.
469	SmallVector<unsigned> MaxNumWorkGroups = {`0`, `0`, `0`};
470
471	// Requested cluster dimensions.
472	AMDGPU::ClusterDimsAttr ClusterDims;
473
474	private:
475	unsigned NumUserSGPRs = `0`;
476	unsigned NumSystemSGPRs = `0`;
477
478	unsigned NumWaveDispatchSGPRs = `0`;
479	unsigned NumWaveDispatchVGPRs = `0`;
480
481	bool HasSpilledSGPRs = false;
482	bool HasSpilledVGPRs = false;
483	bool HasNonSpillStackObjects = false;
484	bool IsStackRealigned = false;
485
486	unsigned NumSpilledSGPRs = `0`;
487	unsigned NumSpilledVGPRs = `0`;
488
489	unsigned DynamicVGPRBlockSize = `0`;
490
491	// The size in bytes of the scratch space reserved for the CWSR trap handler
492	// to spill some of the dynamic VGPRs.
493	unsigned ScratchReservedForDynamicVGPRs = `0`;
494
495	// Tracks information about user SGPRs that will be setup by hardware which
496	// will apply to all wavefronts of the grid.
497	GCNUserSGPRUsageInfo UserSGPRInfo;
498
499	// Feature bits required for inputs passed in system SGPRs.
500	bool WorkGroupIDX : `1`; // Always initialized.
501	bool WorkGroupIDY : `1`;
502	bool WorkGroupIDZ : `1`;
503	bool WorkGroupInfo : `1`;
504	bool LDSKernelId : `1`;
505	bool PrivateSegmentWaveByteOffset : `1`;
506
507	bool WorkItemIDX : `1`; // Always initialized.
508	bool WorkItemIDY : `1`;
509	bool WorkItemIDZ : `1`;
510
511	// Pointer to where the ABI inserts special kernel arguments separate from the
512	// user arguments. This is an offset from the KernargSegmentPtr.
513	bool ImplicitArgPtr : `1`;
514
515	/// Minimum number of AGPRs required to allocate in the function. Only
516	/// relevant for gfx90a-gfx950. For gfx908, this should be infinite.
517	unsigned MinNumAGPRs = ~`0u`;
518
519	// The hard-wired high half of the address of the global information table
520	// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
521	// current hardware only allows a 16 bit value.
522	unsigned GITPtrHigh;
523
524	unsigned HighBitsOf32BitAddress;
525
526	// Flags associated with the virtual registers.
527	IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags;
528
529	// Current recorded maximum possible occupancy.
530	unsigned Occupancy;
531
532	// Maximum number of dwords that can be clusterred during instruction
533	// scheduler stage.
534	unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
535
536	MCPhysReg getNextUserSGPR() const;
537
538	MCPhysReg getNextSystemSGPR() const;
539
540	// MachineRegisterInfo callback functions to notify events.
541	void MRI_NoteNewVirtualRegister(Register Reg) override;
542	void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
543
544	public:
545	static bool MFMAVGPRForm;
546
547	struct VGPRSpillToAGPR {
548	SmallVector<MCPhysReg, `32`> Lanes;
549	bool FullyAllocated = false;
550	bool IsDead = false;
551	};
552
553	private:
554	// To track virtual VGPR + lane index for each subregister of the SGPR spilled
555	// to frameindex key during SILowerSGPRSpills pass.
556	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
557	SGPRSpillsToVirtualVGPRLanes;
558	// To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
559	// like Frame Pointer identified during PrologEpilogInserter.
560	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
561	SGPRSpillsToPhysicalVGPRLanes;
562	unsigned NumVirtualVGPRSpillLanes = `0`;
563	unsigned NumPhysicalVGPRSpillLanes = `0`;
564	SmallVector<Register, `2`> SpillVGPRs;
565	SmallVector<Register, `2`> SpillPhysVGPRs;
566	using WWMSpillsMap = MapVector<Register, int>;
567	// To track the registers used in instructions that can potentially modify the
568	// inactive lanes. The WWM instructions and the writelane instructions for
569	// spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
570	// modified by them should be spilled/restored at function prolog/epilog to
571	// avoid any undesired outcome. Each entry in this map holds a pair of values,
572	// the VGPR and its stack slot index.
573	WWMSpillsMap WWMSpills;
574
575	// Before allocation, the VGPR registers are partitioned into two distinct
576	// sets, the first one for WWM-values and the second set for non-WWM values.
577	// The latter set should be reserved during WWM-regalloc.
578	BitVector NonWWMRegMask;
579
580	using ReservedRegSet = SmallSetVector<Register, `8`>;
581	// To track the VGPRs reserved for WWM instructions. They get stack slots
582	// later during PrologEpilogInserter and get added into the superset WWMSpills
583	// for actual spilling. A separate set makes the register reserved part and
584	// the serialization easier.
585	ReservedRegSet WWMReservedRegs;
586
587	bool IsWholeWaveFunction = false;
588
589	using PrologEpilogSGPRSpill =
590	std::pair<Register, PrologEpilogSGPRSaveRestoreInfo>;
591	// To track the SGPR spill method used for a CSR SGPR register during
592	// frame lowering. Even though the SGPR spills are handled during
593	// SILowerSGPRSpills pass, some special handling needed later during the
594	// PrologEpilogInserter.
595	SmallVector<PrologEpilogSGPRSpill, `3`> PrologEpilogSGPRSpills;
596
597	// To save/restore EXEC MASK around WWM spills and copies.
598	Register SGPRForEXECCopy;
599
600	DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
601
602	// AGPRs used for VGPR spills.
603	SmallVector<MCPhysReg, `32`> SpillAGPR;
604
605	// VGPRs used for AGPR spills.
606	SmallVector<MCPhysReg, `32`> SpillVGPR;
607
608	// Emergency stack slot. Sometimes, we create this before finalizing the stack
609	// frame, so save it here and add it to the RegScavenger later.
610	std::optional<int> ScavengeFI;
611
612	// Map each VGPR CSR to the mask needed to save and restore it using block
613	// load/store instructions. Only used if the subtarget feature for VGPR block
614	// load/store is enabled.
615	IndexedMap<uint32_t, VGPRBlock2IndexFunctor> MaskForVGPRBlockOps;
616
617	private:
618	Register VGPRForAGPRCopy;
619
620	bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
621	unsigned LaneIndex);
622	bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
623	unsigned LaneIndex,
624	bool IsPrologEpilog);
625
626	public:
627	Register getVGPRForAGPRCopy() const {
628	return VGPRForAGPRCopy;
629	}
630
631	void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
632	VGPRForAGPRCopy = NewVGPRForAGPRCopy;
633	}
634
635	bool isCalleeSavedReg(const MCPhysReg CSRegs, MCPhysReg Reg) const*;
636
637	void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask) {
638	MaskForVGPRBlockOps.grow(N: RegisterBlock);
639	MaskForVGPRBlockOps [RegisterBlock] = Mask;
640	}
641
642	uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const {
643	return MaskForVGPRBlockOps [RegisterBlock];
644	}
645
646	bool hasMaskForVGPRBlockOps(Register RegisterBlock) const {
647	return MaskForVGPRBlockOps.inBounds(N: RegisterBlock);
648	}
649
650	public:
651	SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default;
652	SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI);
653
654	MachineFunctionInfo *
655	clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
656	const DenseMap<MachineBasicBlock , MachineBasicBlock > &Src2DstMBB)
657	const override;
658
659	bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
660	const MachineFunction &MF,
661	PerFunctionMIParsingState &PFS,
662	SMDiagnostic &Error, SMRange &SourceRange);
663
664	void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(X: Reg); }
665	bool isWWMReg(Register Reg) const {
666	return Reg.isVirtual() ? checkFlag(Reg, Flag: AMDGPU::VirtRegFlag::WWM_REG)
667	: WWMReservedRegs.contains(key: Reg);
668	}
669
670	void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
671	BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
672	void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); }
673
674	SIModeRegisterDefaults getMode() const { return Mode; }
675
676	ArrayRef<SIRegisterInfo::SpilledReg>
677	getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
678	auto I = SGPRSpillsToVirtualVGPRLanes.find(Val: FrameIndex);
679	return (I == SGPRSpillsToVirtualVGPRLanes.end())
680	? ArrayRef<SIRegisterInfo::SpilledReg>()
681	: ArrayRef(I ->second);
682	}
683
684	ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
685	ArrayRef<Register> getSGPRSpillPhysVGPRs() const { return SpillPhysVGPRs; }
686
687	const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
688	const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
689
690	bool isWWMReservedRegister(Register Reg) const {
691	return WWMReservedRegs.contains(key: Reg);
692	}
693
694	bool isWholeWaveFunction() const { return IsWholeWaveFunction; }
695
696	ArrayRef<PrologEpilogSGPRSpill> getPrologEpilogSGPRSpills() const {
697	assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first()));
698	return PrologEpilogSGPRSpills;
699	}
700
701	GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; }
702
703	const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
704
705	void addToPrologEpilogSGPRSpills(Register Reg,
706	PrologEpilogSGPRSaveRestoreInfo SI) {
707	assert(!hasPrologEpilogSGPRSpillEntry(Reg));
708
709	// Insert a new entry in the right place to keep the vector in sorted order.
710	// This should be cheap since the vector is expected to be very short.
711	PrologEpilogSGPRSpills.insert(
712	I: upper_bound(
713	Range&: PrologEpilogSGPRSpills, Value&: Reg,
714	C: [](const auto &LHS, const auto &RHS) { return LHS < RHS.first; }),
715	Elt: std::make_pair(x&: Reg, y&: SI));
716	}
717
718	// Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
719	// on success and false otherwise.
720	bool hasPrologEpilogSGPRSpillEntry(Register Reg) const {
721	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
722	return Spill.first == Reg;
723	});
724	return I != PrologEpilogSGPRSpills.end();
725	}
726
727	// Get the scratch SGPR if allocated to save/restore \p Reg.
728	Register getScratchSGPRCopyDstReg(Register Reg) const {
729	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
730	return Spill.first == Reg;
731	});
732	if (I != PrologEpilogSGPRSpills.end() &&
733	I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
734	return I->second.getReg();
735
736	return AMDGPU::NoRegister;
737	}
738
739	// Get all scratch SGPRs allocated to copy/restore the SGPR spills.
740	void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const {
741	for (const auto &SI : PrologEpilogSGPRSpills) {
742	if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
743	Regs.push_back(Elt: SI.second.getReg());
744	}
745	}
746
747	// Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
748	bool checkIndexInPrologEpilogSGPRSpills(int FI) const {
749	return find_if(Range: PrologEpilogSGPRSpills,
750	P: [FI](const std::pair<Register,
751	PrologEpilogSGPRSaveRestoreInfo> &SI) {
752	return SI.second.getKind() ==
753	SGPRSaveKind::SPILL_TO_VGPR_LANE &&
754	SI.second.getIndex() == FI;
755	}) != PrologEpilogSGPRSpills.end();
756	}
757
758	const PrologEpilogSGPRSaveRestoreInfo &
759	getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
760	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
761	return Spill.first == Reg;
762	});
763	assert(I != PrologEpilogSGPRSpills.end());
764
765	return I->second;
766	}
767
768	ArrayRef<SIRegisterInfo::SpilledReg>
769	getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
770	auto I = SGPRSpillsToPhysicalVGPRLanes.find(Val: FrameIndex);
771	return (I == SGPRSpillsToPhysicalVGPRLanes.end())
772	? ArrayRef<SIRegisterInfo::SpilledReg>()
773	: ArrayRef(I ->second);
774	}
775
776	void setFlag(Register Reg, uint8_t Flag) {
777	assert(Reg.isVirtual());
778	if (VRegFlags.inBounds(N: Reg))
779	VRegFlags [Reg] \|= Flag;
780	}
781
782	bool checkFlag(Register Reg, uint8_t Flag) const {
783	if (Reg.isPhysical())
784	return false;
785
786	return VRegFlags.inBounds(N: Reg) && VRegFlags [Reg] & Flag;
787	}
788
789	bool hasVRegFlags() { return VRegFlags.size(); }
790
791	void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = `4`,
792	Align Alignment = Align (`4`));
793
794	void splitWWMSpillRegisters(
795	MachineFunction &MF,
796	SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
797	SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const;
798
799	ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
800	return SpillAGPR;
801	}
802
803	Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; }
804
805	void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; }
806
807	ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
808	return SpillVGPR;
809	}
810
811	MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
812	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
813	return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
814	: I ->second.Lanes [Lane];
815	}
816
817	void setVGPRToAGPRSpillDead(int FrameIndex) {
818	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
819	if (I != VGPRToAGPRSpills.end())
820	I ->second.IsDead = true;
821	}
822
823	// To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
824	// range.
825	void shiftWwmVGPRsToLowestRange(MachineFunction &MF,
826	SmallVectorImpl<Register> &WWMVGPRs,
827	BitVector &SavedVGPRs);
828
829	bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
830	bool SpillToPhysVGPRLane = false,
831	bool IsPrologEpilog = false);
832	bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
833
834	/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
835	/// to the default stack.
836	bool removeDeadFrameIndices(MachineFrameInfo &MFI,
837	bool ResetSGPRSpillStackIDs);
838
839	int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
840	std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
841
842	unsigned getBytesInStackArgArea() const {
843	return BytesInStackArgArea;
844	}
845
846	void setBytesInStackArgArea(unsigned Bytes) {
847	BytesInStackArgArea = Bytes;
848	}
849
850	bool isDynamicVGPREnabled() const { return DynamicVGPRBlockSize != `0`; }
851	unsigned getDynamicVGPRBlockSize() const { return DynamicVGPRBlockSize; }
852
853	// This is only used if we need to save any dynamic VGPRs in scratch.
854	unsigned getScratchReservedForDynamicVGPRs() const {
855	return ScratchReservedForDynamicVGPRs;
856	}
857
858	void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes) {
859	ScratchReservedForDynamicVGPRs = SizeInBytes;
860	}
861
862	// Add user SGPRs.
863	Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
864	Register addDispatchPtr(const SIRegisterInfo &TRI);
865	Register addQueuePtr(const SIRegisterInfo &TRI);
866	Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
867	Register addDispatchID(const SIRegisterInfo &TRI);
868	Register addFlatScratchInit(const SIRegisterInfo &TRI);
869	Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
870	Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
871	Register addLDSKernelId();
872	SmallVectorImpl<MCRegister> *
873	addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
874	unsigned AllocSizeDWord, int KernArgIdx,
875	int PaddingSGPRs);
876
877	/// Increment user SGPRs used for padding the argument list only.
878	Register addReservedUserSGPR() {
879	Register Next = getNextUserSGPR();
880	++NumUserSGPRs;
881	return Next;
882	}
883
884	// Add system SGPRs.
885	Register addWorkGroupIDX() {
886	ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
887	NumSystemSGPRs += `1`;
888	return ArgInfo.WorkGroupIDX.getRegister();
889	}
890
891	Register addWorkGroupIDY() {
892	ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
893	NumSystemSGPRs += `1`;
894	return ArgInfo.WorkGroupIDY.getRegister();
895	}
896
897	Register addWorkGroupIDZ() {
898	ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
899	NumSystemSGPRs += `1`;
900	return ArgInfo.WorkGroupIDZ.getRegister();
901	}
902
903	Register addWorkGroupInfo() {
904	ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
905	NumSystemSGPRs += `1`;
906	return ArgInfo.WorkGroupInfo.getRegister();
907	}
908
909	bool hasLDSKernelId() const { return LDSKernelId; }
910
911	// Add special VGPR inputs
912	void setWorkItemIDX(ArgDescriptor Arg) {
913	ArgInfo.WorkItemIDX = Arg;
914	}
915
916	void setWorkItemIDY(ArgDescriptor Arg) {
917	ArgInfo.WorkItemIDY = Arg;
918	}
919
920	void setWorkItemIDZ(ArgDescriptor Arg) {
921	ArgInfo.WorkItemIDZ = Arg;
922	}
923
924	Register addPrivateSegmentWaveByteOffset() {
925	ArgInfo.PrivateSegmentWaveByteOffset
926	= ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
927	NumSystemSGPRs += `1`;
928	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
929	}
930
931	void setPrivateSegmentWaveByteOffset(Register Reg) {
932	ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
933	}
934
935	bool hasWorkGroupIDX() const {
936	return WorkGroupIDX;
937	}
938
939	bool hasWorkGroupIDY() const {
940	return WorkGroupIDY;
941	}
942
943	bool hasWorkGroupIDZ() const {
944	return WorkGroupIDZ;
945	}
946
947	bool hasWorkGroupInfo() const {
948	return WorkGroupInfo;
949	}
950
951	bool hasPrivateSegmentWaveByteOffset() const {
952	return PrivateSegmentWaveByteOffset;
953	}
954
955	bool hasWorkItemIDX() const {
956	return WorkItemIDX;
957	}
958
959	bool hasWorkItemIDY() const {
960	return WorkItemIDY;
961	}
962
963	bool hasWorkItemIDZ() const {
964	return WorkItemIDZ;
965	}
966
967	bool hasImplicitArgPtr() const {
968	return ImplicitArgPtr;
969	}
970
971	AMDGPUFunctionArgInfo &getArgInfo() {
972	return ArgInfo;
973	}
974
975	const AMDGPUFunctionArgInfo &getArgInfo() const {
976	return ArgInfo;
977	}
978
979	std::tuple<const ArgDescriptor , const* TargetRegisterClass *, LLT>
980	getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
981	return ArgInfo.getPreloadedValue(Value);
982	}
983
984	MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
985	const auto *Arg = std::get<`0`>(t: ArgInfo.getPreloadedValue(Value));
986	return Arg ? Arg->getRegister() : MCRegister ();
987	}
988
989	unsigned getGITPtrHigh() const {
990	return GITPtrHigh;
991	}
992
993	Register getGITPtrLoReg(const MachineFunction &MF) const;
994
995	uint32_t get32BitAddressHighBits() const {
996	return HighBitsOf32BitAddress;
997	}
998
999	unsigned getNumUserSGPRs() const {
1000	return NumUserSGPRs;
1001	}
1002
1003	unsigned getNumPreloadedSGPRs() const {
1004	return NumUserSGPRs + NumSystemSGPRs;
1005	}
1006
1007	unsigned getNumKernargPreloadedSGPRs() const {
1008	return UserSGPRInfo.getNumKernargPreloadSGPRs();
1009	}
1010
1011	unsigned getNumWaveDispatchSGPRs() const { return NumWaveDispatchSGPRs; }
1012
1013	void setNumWaveDispatchSGPRs(unsigned Count) { NumWaveDispatchSGPRs = Count; }
1014
1015	unsigned getNumWaveDispatchVGPRs() const { return NumWaveDispatchVGPRs; }
1016
1017	void setNumWaveDispatchVGPRs(unsigned Count) { NumWaveDispatchVGPRs = Count; }
1018
1019	Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
1020	if (ArgInfo.PrivateSegmentWaveByteOffset)
1021	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
1022	return MCRegister ();
1023	}
1024
1025	/// Returns the physical register reserved for use as the resource
1026	/// descriptor for scratch accesses.
1027	Register getScratchRSrcReg() const {
1028	return ScratchRSrcReg;
1029	}
1030
1031	void setScratchRSrcReg(Register Reg) {
1032	assert(Reg != `0` && "Should never be unset");
1033	ScratchRSrcReg = Reg;
1034	}
1035
1036	Register getFrameOffsetReg() const {
1037	return FrameOffsetReg;
1038	}
1039
1040	void setFrameOffsetReg(Register Reg) {
1041	assert(Reg != `0` && "Should never be unset");
1042	FrameOffsetReg = Reg;
1043	}
1044
1045	void setStackPtrOffsetReg(Register Reg) {
1046	assert(Reg != `0` && "Should never be unset");
1047	StackPtrOffsetReg = Reg;
1048	}
1049
1050	void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
1051
1052	// Note the unset value for this is AMDGPU::SP_REG rather than
1053	// NoRegister. This is mostly a workaround for MIR tests where state that
1054	// can't be directly computed from the function is not preserved in serialized
1055	// MIR.
1056	Register getStackPtrOffsetReg() const {
1057	return StackPtrOffsetReg;
1058	}
1059
1060	Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
1061
1062	Register getQueuePtrUserSGPR() const {
1063	return ArgInfo.QueuePtr.getRegister();
1064	}
1065
1066	Register getImplicitBufferPtrUserSGPR() const {
1067	return ArgInfo.ImplicitBufferPtr.getRegister();
1068	}
1069
1070	bool hasSpilledSGPRs() const {
1071	return HasSpilledSGPRs;
1072	}
1073
1074	void setHasSpilledSGPRs(bool Spill = true) {
1075	HasSpilledSGPRs = Spill;
1076	}
1077
1078	bool hasSpilledVGPRs() const {
1079	return HasSpilledVGPRs;
1080	}
1081
1082	void setHasSpilledVGPRs(bool Spill = true) {
1083	HasSpilledVGPRs = Spill;
1084	}
1085
1086	bool hasNonSpillStackObjects() const {
1087	return HasNonSpillStackObjects;
1088	}
1089
1090	void setHasNonSpillStackObjects(bool StackObject = true) {
1091	HasNonSpillStackObjects = StackObject;
1092	}
1093
1094	bool isStackRealigned() const {
1095	return IsStackRealigned;
1096	}
1097
1098	void setIsStackRealigned(bool Realigned = true) {
1099	IsStackRealigned = Realigned;
1100	}
1101
1102	unsigned getNumSpilledSGPRs() const {
1103	return NumSpilledSGPRs;
1104	}
1105
1106	unsigned getNumSpilledVGPRs() const {
1107	return NumSpilledVGPRs;
1108	}
1109
1110	void addToSpilledSGPRs(unsigned num) {
1111	NumSpilledSGPRs += num;
1112	}
1113
1114	void addToSpilledVGPRs(unsigned num) {
1115	NumSpilledVGPRs += num;
1116	}
1117
1118	unsigned getPSInputAddr() const {
1119	return PSInputAddr;
1120	}
1121
1122	unsigned getPSInputEnable() const {
1123	return PSInputEnable;
1124	}
1125
1126	bool isPSInputAllocated(unsigned Index) const {
1127	return PSInputAddr & (`1` << Index);
1128	}
1129
1130	void markPSInputAllocated(unsigned Index) {
1131	PSInputAddr \|= `1` << Index;
1132	}
1133
1134	void markPSInputEnabled(unsigned Index) {
1135	PSInputEnable \|= `1` << Index;
1136	}
1137
1138	bool returnsVoid() const {
1139	return ReturnsVoid;
1140	}
1141
1142	void setIfReturnsVoid(bool Value) {
1143	ReturnsVoid = Value;
1144	}
1145
1146	/// \returns A pair of default/requested minimum/maximum flat work group sizes
1147	/// for this function.
1148	std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
1149	return FlatWorkGroupSizes;
1150	}
1151
1152	/// \returns Default/requested minimum flat work group size for this function.
1153	unsigned getMinFlatWorkGroupSize() const {
1154	return FlatWorkGroupSizes.first;
1155	}
1156
1157	/// \returns Default/requested maximum flat work group size for this function.
1158	unsigned getMaxFlatWorkGroupSize() const {
1159	return FlatWorkGroupSizes.second;
1160	}
1161
1162	/// \returns A pair of default/requested minimum/maximum number of waves per
1163	/// execution unit.
1164	std::pair<unsigned, unsigned> getWavesPerEU() const {
1165	return WavesPerEU;
1166	}
1167
1168	/// \returns Default/requested minimum number of waves per execution unit.
1169	unsigned getMinWavesPerEU() const {
1170	return WavesPerEU.first;
1171	}
1172
1173	/// \returns Default/requested maximum number of waves per execution unit.
1174	unsigned getMaxWavesPerEU() const {
1175	return WavesPerEU.second;
1176	}
1177
1178	const AMDGPUGWSResourcePseudoSourceValue *
1179	getGWSPSV(const AMDGPUTargetMachine &TM) {
1180	return &GWSResourcePSV;
1181	}
1182
1183	unsigned getOccupancy() const {
1184	return Occupancy;
1185	}
1186
1187	unsigned getMinAllowedOccupancy() const {
1188	if (!isMemoryBound() && !needsWaveLimiter())
1189	return Occupancy;
1190	return (Occupancy < `4`) ? Occupancy : `4`;
1191	}
1192
1193	void limitOccupancy(const MachineFunction &MF);
1194
1195	void limitOccupancy(unsigned Limit) {
1196	if (Occupancy > Limit)
1197	Occupancy = Limit;
1198	}
1199
1200	void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
1201	if (Occupancy < Limit)
1202	Occupancy = Limit;
1203	limitOccupancy(MF);
1204	}
1205
1206	unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
1207
1208	unsigned getMinNumAGPRs() const { return MinNumAGPRs; }
1209
1210	/// Return true if an MFMA that requires at least \p NumRegs should select to
1211	/// the AGPR form, instead of the VGPR form.
1212	bool selectAGPRFormMFMA(unsigned NumRegs) const {
1213	return !MFMAVGPRForm && getMinNumAGPRs() >= NumRegs;
1214	}
1215
1216	// \returns true if a function has a use of AGPRs via inline asm or
1217	// has a call which may use it.
1218	bool mayUseAGPRs(const Function &F) const;
1219
1220	/// \returns Default/requested number of work groups for this function.
1221	SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
1222
1223	unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups [`0`]; }
1224	unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups [`1`]; }
1225	unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups [`2`]; }
1226
1227	AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
1228	};
1229
1230	} // end namespace llvm
1231
1232	#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
1233

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h