SIMachineFunctionInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h]

1	//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14	#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
16	#include "AMDGPUArgumentUsageInfo.h"
17	#include "AMDGPUMachineFunction.h"
18	#include "AMDGPUTargetMachine.h"
19	#include "GCNSubtarget.h"
20	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21	#include "SIInstrInfo.h"
22	#include "SIModeRegisterDefaults.h"
23	#include "llvm/ADT/SetVector.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/CodeGen/MIRYamlMapping.h"
26	#include "llvm/CodeGen/PseudoSourceValue.h"
27	#include "llvm/Support/raw_ostream.h"
28	#include <optional>
29
30	namespace llvm {
31
32	class MachineFrameInfo;
33	class MachineFunction;
34	class SIMachineFunctionInfo;
35	class SIRegisterInfo;
36	class TargetRegisterClass;
37
38	class AMDGPUPseudoSourceValue : public PseudoSourceValue {
39	public:
40	enum AMDGPUPSVKind : unsigned {
41	PSVImage = PseudoSourceValue::TargetCustom,
42	GWSResource
43	};
44
45	protected:
46	AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
47	: PseudoSourceValue (Kind, TM) {}
48
49	public:
50	bool isConstant(const MachineFrameInfo ) const* override {
51	// This should probably be true for most images, but we will start by being
52	// conservative.
53	return false;
54	}
55
56	bool isAliased(const MachineFrameInfo ) const* override {
57	return true;
58	}
59
60	bool mayAlias(const MachineFrameInfo ) const* override {
61	return true;
62	}
63	};
64
65	class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
66	public:
67	explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
68	: AMDGPUPseudoSourceValue (GWSResource, TM) {}
69
70	static bool classof(const PseudoSourceValue *V) {
71	return V->kind() == GWSResource;
72	}
73
74	// These are inaccessible memory from IR.
75	bool isAliased(const MachineFrameInfo ) const* override {
76	return false;
77	}
78
79	// These are inaccessible memory from IR.
80	bool mayAlias(const MachineFrameInfo ) const* override {
81	return false;
82	}
83
84	void printCustom(raw_ostream &OS) const override {
85	OS << "GWSResource";
86	}
87	};
88
89	namespace yaml {
90
91	struct SIArgument {
92	bool IsRegister;
93	union {
94	StringValue RegisterName;
95	unsigned StackOffset;
96	};
97	std::optional<unsigned> Mask;
98
99	// Default constructor, which creates a stack argument.
100	SIArgument() : IsRegister(false), StackOffset(`0`) {}
101	SIArgument(const SIArgument &Other) {
102	IsRegister = Other.IsRegister;
103	if (IsRegister)
104	new (&RegisterName) StringValue (Other.RegisterName);
105	else
106	StackOffset = Other.StackOffset;
107	Mask = Other.Mask;
108	}
109	SIArgument &operator=(const SIArgument &Other) {
110	// Default-construct or destruct the old RegisterName in case of switching
111	// union members
112	if (IsRegister != Other.IsRegister) {
113	if (Other.IsRegister)
114	new (&RegisterName) StringValue ();
115	else
116	RegisterName.~StringValue();
117	}
118	IsRegister = Other.IsRegister;
119	if (IsRegister)
120	RegisterName = Other.RegisterName;
121	else
122	StackOffset = Other.StackOffset;
123	Mask = Other.Mask;
124	return *this;
125	}
126	~SIArgument() {
127	if (IsRegister)
128	RegisterName.~StringValue();
129	}
130
131	// Helper to create a register or stack argument.
132	static inline SIArgument createArgument(bool IsReg) {
133	if (IsReg)
134	return SIArgument (IsReg);
135	return SIArgument ();
136	}
137
138	private:
139	// Construct a register argument.
140	SIArgument(bool) : IsRegister(true), RegisterName () {}
141	};
142
143	template <> struct MappingTraits<SIArgument> {
144	static void mapping(IO &YamlIO, SIArgument &A) {
145	if (YamlIO.outputting()) {
146	if (A.IsRegister)
147	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
148	else
149	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
150	} else {
151	auto Keys = YamlIO.keys();
152	if (is_contained(Range&: Keys, Element: "reg")) {
153	A = SIArgument::createArgument(IsReg: true);
154	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
155	} else if (is_contained(Range&: Keys, Element: "offset"))
156	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
157	else
158	YamlIO.setError("missing required key 'reg' or 'offset'");
159	}
160	YamlIO.mapOptional(Key: "mask", Val&: A.Mask);
161	}
162	static const bool flow = true;
163	};
164
165	struct SIArgumentInfo {
166	std::optional<SIArgument> PrivateSegmentBuffer;
167	std::optional<SIArgument> DispatchPtr;
168	std::optional<SIArgument> QueuePtr;
169	std::optional<SIArgument> KernargSegmentPtr;
170	std::optional<SIArgument> DispatchID;
171	std::optional<SIArgument> FlatScratchInit;
172	std::optional<SIArgument> PrivateSegmentSize;
173
174	std::optional<SIArgument> WorkGroupIDX;
175	std::optional<SIArgument> WorkGroupIDY;
176	std::optional<SIArgument> WorkGroupIDZ;
177	std::optional<SIArgument> WorkGroupInfo;
178	std::optional<SIArgument> LDSKernelId;
179	std::optional<SIArgument> PrivateSegmentWaveByteOffset;
180
181	std::optional<SIArgument> ImplicitArgPtr;
182	std::optional<SIArgument> ImplicitBufferPtr;
183
184	std::optional<SIArgument> WorkItemIDX;
185	std::optional<SIArgument> WorkItemIDY;
186	std::optional<SIArgument> WorkItemIDZ;
187	};
188
189	template <> struct MappingTraits<SIArgumentInfo> {
190	static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
191	YamlIO.mapOptional(Key: "privateSegmentBuffer", Val&: AI.PrivateSegmentBuffer);
192	YamlIO.mapOptional(Key: "dispatchPtr", Val&: AI.DispatchPtr);
193	YamlIO.mapOptional(Key: "queuePtr", Val&: AI.QueuePtr);
194	YamlIO.mapOptional(Key: "kernargSegmentPtr", Val&: AI.KernargSegmentPtr);
195	YamlIO.mapOptional(Key: "dispatchID", Val&: AI.DispatchID);
196	YamlIO.mapOptional(Key: "flatScratchInit", Val&: AI.FlatScratchInit);
197	YamlIO.mapOptional(Key: "privateSegmentSize", Val&: AI.PrivateSegmentSize);
198
199	YamlIO.mapOptional(Key: "workGroupIDX", Val&: AI.WorkGroupIDX);
200	YamlIO.mapOptional(Key: "workGroupIDY", Val&: AI.WorkGroupIDY);
201	YamlIO.mapOptional(Key: "workGroupIDZ", Val&: AI.WorkGroupIDZ);
202	YamlIO.mapOptional(Key: "workGroupInfo", Val&: AI.WorkGroupInfo);
203	YamlIO.mapOptional(Key: "LDSKernelId", Val&: AI.LDSKernelId);
204	YamlIO.mapOptional(Key: "privateSegmentWaveByteOffset",
205	Val&: AI.PrivateSegmentWaveByteOffset);
206
207	YamlIO.mapOptional(Key: "implicitArgPtr", Val&: AI.ImplicitArgPtr);
208	YamlIO.mapOptional(Key: "implicitBufferPtr", Val&: AI.ImplicitBufferPtr);
209
210	YamlIO.mapOptional(Key: "workItemIDX", Val&: AI.WorkItemIDX);
211	YamlIO.mapOptional(Key: "workItemIDY", Val&: AI.WorkItemIDY);
212	YamlIO.mapOptional(Key: "workItemIDZ", Val&: AI.WorkItemIDZ);
213	}
214	};
215
216	// Default to default mode for default calling convention.
217	struct SIMode {
218	bool IEEE = true;
219	bool DX10Clamp = true;
220	bool FP32InputDenormals = true;
221	bool FP32OutputDenormals = true;
222	bool FP64FP16InputDenormals = true;
223	bool FP64FP16OutputDenormals = true;
224
225	SIMode() = default;
226
227	SIMode(const SIModeRegisterDefaults &Mode) {
228	IEEE = Mode.IEEE;
229	DX10Clamp = Mode.DX10Clamp;
230	FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
231	FP32OutputDenormals =
232	Mode.FP32Denormals.Output != DenormalMode::PreserveSign;
233	FP64FP16InputDenormals =
234	Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign;
235	FP64FP16OutputDenormals =
236	Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign;
237	}
238
239	bool operator ==(const SIMode Other) const {
240	return IEEE == Other.IEEE &&
241	DX10Clamp == Other.DX10Clamp &&
242	FP32InputDenormals == Other.FP32InputDenormals &&
243	FP32OutputDenormals == Other.FP32OutputDenormals &&
244	FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
245	FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
246	}
247	};
248
249	template <> struct MappingTraits<SIMode> {
250	static void mapping(IO &YamlIO, SIMode &Mode) {
251	YamlIO.mapOptional(Key: "ieee", Val&: Mode.IEEE, Default: true);
252	YamlIO.mapOptional(Key: "dx10-clamp", Val&: Mode.DX10Clamp, Default: true);
253	YamlIO.mapOptional(Key: "fp32-input-denormals", Val&: Mode.FP32InputDenormals, Default: true);
254	YamlIO.mapOptional(Key: "fp32-output-denormals", Val&: Mode.FP32OutputDenormals, Default: true);
255	YamlIO.mapOptional(Key: "fp64-fp16-input-denormals", Val&: Mode.FP64FP16InputDenormals, Default: true);
256	YamlIO.mapOptional(Key: "fp64-fp16-output-denormals", Val&: Mode.FP64FP16OutputDenormals, Default: true);
257	}
258	};
259
260	struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
261	uint64_t ExplicitKernArgSize = `0`;
262	Align MaxKernArgAlign;
263	uint32_t LDSSize = `0`;
264	uint32_t GDSSize = `0`;
265	Align DynLDSAlign;
266	bool IsEntryFunction = false;
267	bool IsChainFunction = false;
268	bool NoSignedZerosFPMath = false;
269	bool MemoryBound = false;
270	bool WaveLimiter = false;
271	bool HasSpilledSGPRs = false;
272	bool HasSpilledVGPRs = false;
273	uint32_t HighBitsOf32BitAddress = `0`;
274
275	// TODO: 10 may be a better default since it's the maximum.
276	unsigned Occupancy = `0`;
277
278	SmallVector<StringValue, `2`> SpillPhysVGPRS;
279	SmallVector<StringValue> WWMReservedRegs;
280
281	StringValue ScratchRSrcReg = "$private_rsrc_reg";
282	StringValue FrameOffsetReg = "$fp_reg";
283	StringValue StackPtrOffsetReg = "$sp_reg";
284
285	unsigned BytesInStackArgArea = `0`;
286	bool ReturnsVoid = true;
287
288	std::optional<SIArgumentInfo> ArgInfo;
289
290	unsigned PSInputAddr = `0`;
291	unsigned PSInputEnable = `0`;
292	unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
293
294	SIMode Mode;
295	std::optional<FrameIndex> ScavengeFI;
296	StringValue VGPRForAGPRCopy;
297	StringValue SGPRForEXECCopy;
298	StringValue LongBranchReservedReg;
299
300	bool HasInitWholeWave = false;
301
302	unsigned DynamicVGPRBlockSize = `0`;
303	unsigned ScratchReservedForDynamicVGPRs = `0`;
304
305	SIMachineFunctionInfo() = default;
306	SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
307	const TargetRegisterInfo &TRI,
308	const llvm::MachineFunction &MF);
309
310	void mappingImpl(yaml::IO &YamlIO) override;
311	~SIMachineFunctionInfo() = default;
312	};
313
314	template <> struct MappingTraits<SIMachineFunctionInfo> {
315	static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
316	YamlIO.mapOptional(Key: "explicitKernArgSize", Val&: MFI.ExplicitKernArgSize,
317	UINT64_C(`0`));
318	YamlIO.mapOptional(Key: "maxKernArgAlign", Val&: MFI.MaxKernArgAlign);
319	YamlIO.mapOptional(Key: "ldsSize", Val&: MFI.LDSSize, Default: `0u`);
320	YamlIO.mapOptional(Key: "gdsSize", Val&: MFI.GDSSize, Default: `0u`);
321	YamlIO.mapOptional(Key: "dynLDSAlign", Val&: MFI.DynLDSAlign, Default: Align ());
322	YamlIO.mapOptional(Key: "isEntryFunction", Val&: MFI.IsEntryFunction, Default: false);
323	YamlIO.mapOptional(Key: "isChainFunction", Val&: MFI.IsChainFunction, Default: false);
324	YamlIO.mapOptional(Key: "noSignedZerosFPMath", Val&: MFI.NoSignedZerosFPMath, Default: false);
325	YamlIO.mapOptional(Key: "memoryBound", Val&: MFI.MemoryBound, Default: false);
326	YamlIO.mapOptional(Key: "waveLimiter", Val&: MFI.WaveLimiter, Default: false);
327	YamlIO.mapOptional(Key: "hasSpilledSGPRs", Val&: MFI.HasSpilledSGPRs, Default: false);
328	YamlIO.mapOptional(Key: "hasSpilledVGPRs", Val&: MFI.HasSpilledVGPRs, Default: false);
329	YamlIO.mapOptional(Key: "scratchRSrcReg", Val&: MFI.ScratchRSrcReg,
330	Default: StringValue ("$private_rsrc_reg"));
331	YamlIO.mapOptional(Key: "frameOffsetReg", Val&: MFI.FrameOffsetReg,
332	Default: StringValue ("$fp_reg"));
333	YamlIO.mapOptional(Key: "stackPtrOffsetReg", Val&: MFI.StackPtrOffsetReg,
334	Default: StringValue ("$sp_reg"));
335	YamlIO.mapOptional(Key: "bytesInStackArgArea", Val&: MFI.BytesInStackArgArea, Default: `0u`);
336	YamlIO.mapOptional(Key: "returnsVoid", Val&: MFI.ReturnsVoid, Default: true);
337	YamlIO.mapOptional(Key: "argumentInfo", Val&: MFI.ArgInfo);
338	YamlIO.mapOptional(Key: "psInputAddr", Val&: MFI.PSInputAddr, Default: `0u`);
339	YamlIO.mapOptional(Key: "psInputEnable", Val&: MFI.PSInputEnable, Default: `0u`);
340	YamlIO.mapOptional(Key: "maxMemoryClusterDWords", Val&: MFI.MaxMemoryClusterDWords,
341	Default: DefaultMemoryClusterDWordsLimit);
342	YamlIO.mapOptional(Key: "mode", Val&: MFI.Mode, Default: SIMode ());
343	YamlIO.mapOptional(Key: "highBitsOf32BitAddress",
344	Val&: MFI.HighBitsOf32BitAddress, Default: `0u`);
345	YamlIO.mapOptional(Key: "occupancy", Val&: MFI.Occupancy, Default: `0`);
346	YamlIO.mapOptional(Key: "spillPhysVGPRs", Val&: MFI.SpillPhysVGPRS);
347	YamlIO.mapOptional(Key: "wwmReservedRegs", Val&: MFI.WWMReservedRegs);
348	YamlIO.mapOptional(Key: "scavengeFI", Val&: MFI.ScavengeFI);
349	YamlIO.mapOptional(Key: "vgprForAGPRCopy", Val&: MFI.VGPRForAGPRCopy,
350	Default: StringValue ()); // Don't print out when it's empty.
351	YamlIO.mapOptional(Key: "sgprForEXECCopy", Val&: MFI.SGPRForEXECCopy,
352	Default: StringValue ()); // Don't print out when it's empty.
353	YamlIO.mapOptional(Key: "longBranchReservedReg", Val&: MFI.LongBranchReservedReg,
354	Default: StringValue ());
355	YamlIO.mapOptional(Key: "hasInitWholeWave", Val&: MFI.HasInitWholeWave, Default: false);
356	YamlIO.mapOptional(Key: "dynamicVGPRBlockSize", Val&: MFI.DynamicVGPRBlockSize, Default: false);
357	YamlIO.mapOptional(Key: "scratchReservedForDynamicVGPRs",
358	Val&: MFI.ScratchReservedForDynamicVGPRs, Default: `0`);
359	}
360	};
361
362	} // end namespace yaml
363
364	// A CSR SGPR value can be preserved inside a callee using one of the following
365	// methods.
366	// 1. Copy to an unused scratch SGPR.
367	// 2. Spill to a VGPR lane.
368	// 3. Spill to memory via. a scratch VGPR.
369	// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
370	// for an SGPR at function prolog/epilog.
371	enum class SGPRSaveKind : uint8_t {
372	COPY_TO_SCRATCH_SGPR,
373	SPILL_TO_VGPR_LANE,
374	SPILL_TO_MEM
375	};
376
377	class PrologEpilogSGPRSaveRestoreInfo {
378	SGPRSaveKind Kind;
379	union {
380	int Index;
381	Register Reg;
382	};
383
384	public:
385	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {}
386	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
387	: Kind(K), Reg (R) {}
388	Register getReg() const { return Reg; }
389	int getIndex() const { return Index; }
390	SGPRSaveKind getKind() const { return Kind; }
391	};
392
393	struct VGPRBlock2IndexFunctor {
394	using argument_type = Register;
395	unsigned operator()(Register Reg) const {
396	assert(AMDGPU::VReg_1024RegClass.contains(Reg) && "Expecting a VGPR block");
397
398	const MCRegister FirstVGPRBlock = AMDGPU::VReg_1024RegClass.getRegister(i: `0`);
399	return Reg - FirstVGPRBlock;
400	}
401	};
402
403	/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
404	/// tells the hardware which interpolation parameters to load.
405	class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
406	private MachineRegisterInfo::Delegate {
407	friend class GCNTargetMachine;
408
409	// State of MODE register, assumed FP mode.
410	SIModeRegisterDefaults Mode;
411
412	// Registers that may be reserved for spilling purposes. These may be the same
413	// as the input registers.
414	Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
415
416	// This is the unswizzled offset from the current dispatch's scratch wave
417	// base to the beginning of the current function's frame.
418	Register FrameOffsetReg = AMDGPU::FP_REG;
419
420	// This is an ABI register used in the non-entry calling convention to
421	// communicate the unswizzled offset from the current dispatch's scratch wave
422	// base to the beginning of the new function's frame.
423	Register StackPtrOffsetReg = AMDGPU::SP_REG;
424
425	// Registers that may be reserved when RA doesn't allocate enough
426	// registers to plan for the case where an indirect branch ends up
427	// being needed during branch relaxation.
428	Register LongBranchReservedReg;
429
430	AMDGPUFunctionArgInfo ArgInfo;
431
432	// Graphics info.
433	unsigned PSInputAddr = `0`;
434	unsigned PSInputEnable = `0`;
435
436	/// Number of bytes of arguments this function has on the stack. If the callee
437	/// is expected to restore the argument stack this should be a multiple of 16,
438	/// all usable during a tail call.
439	///
440	/// The alternative would forbid tail call optimisation in some cases: if we
441	/// want to transfer control from a function with 8-bytes of stack-argument
442	/// space to a function with 16-bytes then misalignment of this value would
443	/// make a stack adjustment necessary, which could not be undone by the
444	/// callee.
445	unsigned BytesInStackArgArea = `0`;
446
447	bool ReturnsVoid = true;
448
449	// A pair of default/requested minimum/maximum flat work group sizes.
450	// Minimum - first, maximum - second.
451	std::pair<unsigned, unsigned> FlatWorkGroupSizes = {`0`, `0`};
452
453	// A pair of default/requested minimum/maximum number of waves per execution
454	// unit. Minimum - first, maximum - second.
455	std::pair<unsigned, unsigned> WavesPerEU = {`0`, `0`};
456
457	const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
458
459	// Default/requested number of work groups for the function.
460	SmallVector<unsigned> MaxNumWorkGroups = {`0`, `0`, `0`};
461
462	private:
463	unsigned NumUserSGPRs = `0`;
464	unsigned NumSystemSGPRs = `0`;
465
466	bool HasSpilledSGPRs = false;
467	bool HasSpilledVGPRs = false;
468	bool HasNonSpillStackObjects = false;
469	bool IsStackRealigned = false;
470
471	unsigned NumSpilledSGPRs = `0`;
472	unsigned NumSpilledVGPRs = `0`;
473
474	unsigned DynamicVGPRBlockSize = `0`;
475
476	// The size in bytes of the scratch space reserved for the CWSR trap handler
477	// to spill some of the dynamic VGPRs.
478	unsigned ScratchReservedForDynamicVGPRs = `0`;
479
480	// Tracks information about user SGPRs that will be setup by hardware which
481	// will apply to all wavefronts of the grid.
482	GCNUserSGPRUsageInfo UserSGPRInfo;
483
484	// Feature bits required for inputs passed in system SGPRs.
485	bool WorkGroupIDX : `1`; // Always initialized.
486	bool WorkGroupIDY : `1`;
487	bool WorkGroupIDZ : `1`;
488	bool WorkGroupInfo : `1`;
489	bool LDSKernelId : `1`;
490	bool PrivateSegmentWaveByteOffset : `1`;
491
492	bool WorkItemIDX : `1`; // Always initialized.
493	bool WorkItemIDY : `1`;
494	bool WorkItemIDZ : `1`;
495
496	// Pointer to where the ABI inserts special kernel arguments separate from the
497	// user arguments. This is an offset from the KernargSegmentPtr.
498	bool ImplicitArgPtr : `1`;
499
500	bool MayNeedAGPRs : `1`;
501
502	// The hard-wired high half of the address of the global information table
503	// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
504	// current hardware only allows a 16 bit value.
505	unsigned GITPtrHigh;
506
507	unsigned HighBitsOf32BitAddress;
508
509	// Flags associated with the virtual registers.
510	IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags;
511
512	// Current recorded maximum possible occupancy.
513	unsigned Occupancy;
514
515	// Maximum number of dwords that can be clusterred during instruction
516	// scheduler stage.
517	unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
518
519	MCPhysReg getNextUserSGPR() const;
520
521	MCPhysReg getNextSystemSGPR() const;
522
523	// MachineRegisterInfo callback functions to notify events.
524	void MRI_NoteNewVirtualRegister(Register Reg) override;
525	void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
526
527	public:
528	struct VGPRSpillToAGPR {
529	SmallVector<MCPhysReg, `32`> Lanes;
530	bool FullyAllocated = false;
531	bool IsDead = false;
532	};
533
534	private:
535	// To track virtual VGPR + lane index for each subregister of the SGPR spilled
536	// to frameindex key during SILowerSGPRSpills pass.
537	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
538	SGPRSpillsToVirtualVGPRLanes;
539	// To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
540	// like Frame Pointer identified during PrologEpilogInserter.
541	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
542	SGPRSpillsToPhysicalVGPRLanes;
543	unsigned NumVirtualVGPRSpillLanes = `0`;
544	unsigned NumPhysicalVGPRSpillLanes = `0`;
545	SmallVector<Register, `2`> SpillVGPRs;
546	SmallVector<Register, `2`> SpillPhysVGPRs;
547	using WWMSpillsMap = MapVector<Register, int>;
548	// To track the registers used in instructions that can potentially modify the
549	// inactive lanes. The WWM instructions and the writelane instructions for
550	// spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
551	// modified by them should be spilled/restored at function prolog/epilog to
552	// avoid any undesired outcome. Each entry in this map holds a pair of values,
553	// the VGPR and its stack slot index.
554	WWMSpillsMap WWMSpills;
555
556	// Before allocation, the VGPR registers are partitioned into two distinct
557	// sets, the first one for WWM-values and the second set for non-WWM values.
558	// The latter set should be reserved during WWM-regalloc.
559	BitVector NonWWMRegMask;
560
561	using ReservedRegSet = SmallSetVector<Register, `8`>;
562	// To track the VGPRs reserved for WWM instructions. They get stack slots
563	// later during PrologEpilogInserter and get added into the superset WWMSpills
564	// for actual spilling. A separate set makes the register reserved part and
565	// the serialization easier.
566	ReservedRegSet WWMReservedRegs;
567
568	using PrologEpilogSGPRSpill =
569	std::pair<Register, PrologEpilogSGPRSaveRestoreInfo>;
570	// To track the SGPR spill method used for a CSR SGPR register during
571	// frame lowering. Even though the SGPR spills are handled during
572	// SILowerSGPRSpills pass, some special handling needed later during the
573	// PrologEpilogInserter.
574	SmallVector<PrologEpilogSGPRSpill, `3`> PrologEpilogSGPRSpills;
575
576	// To save/restore EXEC MASK around WWM spills and copies.
577	Register SGPRForEXECCopy;
578
579	DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
580
581	// AGPRs used for VGPR spills.
582	SmallVector<MCPhysReg, `32`> SpillAGPR;
583
584	// VGPRs used for AGPR spills.
585	SmallVector<MCPhysReg, `32`> SpillVGPR;
586
587	// Emergency stack slot. Sometimes, we create this before finalizing the stack
588	// frame, so save it here and add it to the RegScavenger later.
589	std::optional<int> ScavengeFI;
590
591	// Map each VGPR CSR to the mask needed to save and restore it using block
592	// load/store instructions. Only used if the subtarget feature for VGPR block
593	// load/store is enabled.
594	IndexedMap<uint32_t, VGPRBlock2IndexFunctor> MaskForVGPRBlockOps;
595
596	private:
597	Register VGPRForAGPRCopy;
598
599	bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
600	unsigned LaneIndex);
601	bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
602	unsigned LaneIndex,
603	bool IsPrologEpilog);
604
605	public:
606	Register getVGPRForAGPRCopy() const {
607	return VGPRForAGPRCopy;
608	}
609
610	void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
611	VGPRForAGPRCopy = NewVGPRForAGPRCopy;
612	}
613
614	bool isCalleeSavedReg(const MCPhysReg CSRegs, MCPhysReg Reg) const*;
615
616	void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask) {
617	MaskForVGPRBlockOps.grow(n: RegisterBlock);
618	MaskForVGPRBlockOps [RegisterBlock] = Mask;
619	}
620
621	uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const {
622	return MaskForVGPRBlockOps [RegisterBlock];
623	}
624
625	bool hasMaskForVGPRBlockOps(Register RegisterBlock) const {
626	return MaskForVGPRBlockOps.inBounds(n: RegisterBlock);
627	}
628
629	public:
630	SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default;
631	SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI);
632
633	MachineFunctionInfo *
634	clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
635	const DenseMap<MachineBasicBlock , MachineBasicBlock > &Src2DstMBB)
636	const override;
637
638	bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
639	const MachineFunction &MF,
640	PerFunctionMIParsingState &PFS,
641	SMDiagnostic &Error, SMRange &SourceRange);
642
643	void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(X: Reg); }
644	bool isWWMReg(Register Reg) const {
645	return Reg.isVirtual() ? checkFlag(Reg, Flag: AMDGPU::VirtRegFlag::WWM_REG)
646	: WWMReservedRegs.contains(key: Reg);
647	}
648
649	void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
650	BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
651	void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); }
652
653	SIModeRegisterDefaults getMode() const { return Mode; }
654
655	ArrayRef<SIRegisterInfo::SpilledReg>
656	getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
657	auto I = SGPRSpillsToVirtualVGPRLanes.find(Val: FrameIndex);
658	return (I == SGPRSpillsToVirtualVGPRLanes.end())
659	? ArrayRef<SIRegisterInfo::SpilledReg>()
660	: ArrayRef(I ->second);
661	}
662
663	ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
664	ArrayRef<Register> getSGPRSpillPhysVGPRs() const { return SpillPhysVGPRs; }
665
666	const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
667	const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
668
669	bool isWWMReservedRegister(Register Reg) const {
670	return WWMReservedRegs.contains(key: Reg);
671	}
672
673	ArrayRef<PrologEpilogSGPRSpill> getPrologEpilogSGPRSpills() const {
674	assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first()));
675	return PrologEpilogSGPRSpills;
676	}
677
678	GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; }
679
680	const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
681
682	void addToPrologEpilogSGPRSpills(Register Reg,
683	PrologEpilogSGPRSaveRestoreInfo SI) {
684	assert(!hasPrologEpilogSGPRSpillEntry(Reg));
685
686	// Insert a new entry in the right place to keep the vector in sorted order.
687	// This should be cheap since the vector is expected to be very short.
688	PrologEpilogSGPRSpills.insert(
689	I: upper_bound(
690	Range&: PrologEpilogSGPRSpills, Value&: Reg,
691	C: [](const auto &LHS, const auto &RHS) { return LHS < RHS.first; }),
692	Elt: std::make_pair(x&: Reg, y&: SI));
693	}
694
695	// Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
696	// on success and false otherwise.
697	bool hasPrologEpilogSGPRSpillEntry(Register Reg) const {
698	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
699	return Spill.first == Reg;
700	});
701	return I != PrologEpilogSGPRSpills.end();
702	}
703
704	// Get the scratch SGPR if allocated to save/restore \p Reg.
705	Register getScratchSGPRCopyDstReg(Register Reg) const {
706	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
707	return Spill.first == Reg;
708	});
709	if (I != PrologEpilogSGPRSpills.end() &&
710	I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
711	return I->second.getReg();
712
713	return AMDGPU::NoRegister;
714	}
715
716	// Get all scratch SGPRs allocated to copy/restore the SGPR spills.
717	void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const {
718	for (const auto &SI : PrologEpilogSGPRSpills) {
719	if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
720	Regs.push_back(Elt: SI.second.getReg());
721	}
722	}
723
724	// Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
725	bool checkIndexInPrologEpilogSGPRSpills(int FI) const {
726	return find_if(Range: PrologEpilogSGPRSpills,
727	P: [FI](const std::pair<Register,
728	PrologEpilogSGPRSaveRestoreInfo> &SI) {
729	return SI.second.getKind() ==
730	SGPRSaveKind::SPILL_TO_VGPR_LANE &&
731	SI.second.getIndex() == FI;
732	}) != PrologEpilogSGPRSpills.end();
733	}
734
735	const PrologEpilogSGPRSaveRestoreInfo &
736	getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
737	const auto I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const* auto &Spill) {
738	return Spill.first == Reg;
739	});
740	assert(I != PrologEpilogSGPRSpills.end());
741
742	return I->second;
743	}
744
745	ArrayRef<SIRegisterInfo::SpilledReg>
746	getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
747	auto I = SGPRSpillsToPhysicalVGPRLanes.find(Val: FrameIndex);
748	return (I == SGPRSpillsToPhysicalVGPRLanes.end())
749	? ArrayRef<SIRegisterInfo::SpilledReg>()
750	: ArrayRef(I ->second);
751	}
752
753	void setFlag(Register Reg, uint8_t Flag) {
754	assert(Reg.isVirtual());
755	if (VRegFlags.inBounds(n: Reg))
756	VRegFlags [Reg] \|= Flag;
757	}
758
759	bool checkFlag(Register Reg, uint8_t Flag) const {
760	if (Reg.isPhysical())
761	return false;
762
763	return VRegFlags.inBounds(n: Reg) && VRegFlags [Reg] & Flag;
764	}
765
766	bool hasVRegFlags() { return VRegFlags.size(); }
767
768	void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = `4`,
769	Align Alignment = Align (`4`));
770
771	void splitWWMSpillRegisters(
772	MachineFunction &MF,
773	SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
774	SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const;
775
776	ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
777	return SpillAGPR;
778	}
779
780	Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; }
781
782	void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; }
783
784	ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
785	return SpillVGPR;
786	}
787
788	MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
789	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
790	return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
791	: I ->second.Lanes [Lane];
792	}
793
794	void setVGPRToAGPRSpillDead(int FrameIndex) {
795	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
796	if (I != VGPRToAGPRSpills.end())
797	I ->second.IsDead = true;
798	}
799
800	// To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
801	// range.
802	void shiftWwmVGPRsToLowestRange(MachineFunction &MF,
803	SmallVectorImpl<Register> &WWMVGPRs,
804	BitVector &SavedVGPRs);
805
806	bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
807	bool SpillToPhysVGPRLane = false,
808	bool IsPrologEpilog = false);
809	bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
810
811	/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
812	/// to the default stack.
813	bool removeDeadFrameIndices(MachineFrameInfo &MFI,
814	bool ResetSGPRSpillStackIDs);
815
816	int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
817	std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
818
819	unsigned getBytesInStackArgArea() const {
820	return BytesInStackArgArea;
821	}
822
823	void setBytesInStackArgArea(unsigned Bytes) {
824	BytesInStackArgArea = Bytes;
825	}
826
827	bool isDynamicVGPREnabled() const { return DynamicVGPRBlockSize != `0`; }
828	unsigned getDynamicVGPRBlockSize() const { return DynamicVGPRBlockSize; }
829
830	// This is only used if we need to save any dynamic VGPRs in scratch.
831	unsigned getScratchReservedForDynamicVGPRs() const {
832	return ScratchReservedForDynamicVGPRs;
833	}
834
835	void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes) {
836	ScratchReservedForDynamicVGPRs = SizeInBytes;
837	}
838
839	// Add user SGPRs.
840	Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
841	Register addDispatchPtr(const SIRegisterInfo &TRI);
842	Register addQueuePtr(const SIRegisterInfo &TRI);
843	Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
844	Register addDispatchID(const SIRegisterInfo &TRI);
845	Register addFlatScratchInit(const SIRegisterInfo &TRI);
846	Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
847	Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
848	Register addLDSKernelId();
849	SmallVectorImpl<MCRegister> *
850	addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
851	unsigned AllocSizeDWord, int KernArgIdx,
852	int PaddingSGPRs);
853
854	/// Increment user SGPRs used for padding the argument list only.
855	Register addReservedUserSGPR() {
856	Register Next = getNextUserSGPR();
857	++NumUserSGPRs;
858	return Next;
859	}
860
861	// Add system SGPRs.
862	Register addWorkGroupIDX() {
863	ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
864	NumSystemSGPRs += `1`;
865	return ArgInfo.WorkGroupIDX.getRegister();
866	}
867
868	Register addWorkGroupIDY() {
869	ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
870	NumSystemSGPRs += `1`;
871	return ArgInfo.WorkGroupIDY.getRegister();
872	}
873
874	Register addWorkGroupIDZ() {
875	ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
876	NumSystemSGPRs += `1`;
877	return ArgInfo.WorkGroupIDZ.getRegister();
878	}
879
880	Register addWorkGroupInfo() {
881	ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
882	NumSystemSGPRs += `1`;
883	return ArgInfo.WorkGroupInfo.getRegister();
884	}
885
886	bool hasLDSKernelId() const { return LDSKernelId; }
887
888	// Add special VGPR inputs
889	void setWorkItemIDX(ArgDescriptor Arg) {
890	ArgInfo.WorkItemIDX = Arg;
891	}
892
893	void setWorkItemIDY(ArgDescriptor Arg) {
894	ArgInfo.WorkItemIDY = Arg;
895	}
896
897	void setWorkItemIDZ(ArgDescriptor Arg) {
898	ArgInfo.WorkItemIDZ = Arg;
899	}
900
901	Register addPrivateSegmentWaveByteOffset() {
902	ArgInfo.PrivateSegmentWaveByteOffset
903	= ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
904	NumSystemSGPRs += `1`;
905	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
906	}
907
908	void setPrivateSegmentWaveByteOffset(Register Reg) {
909	ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
910	}
911
912	bool hasWorkGroupIDX() const {
913	return WorkGroupIDX;
914	}
915
916	bool hasWorkGroupIDY() const {
917	return WorkGroupIDY;
918	}
919
920	bool hasWorkGroupIDZ() const {
921	return WorkGroupIDZ;
922	}
923
924	bool hasWorkGroupInfo() const {
925	return WorkGroupInfo;
926	}
927
928	bool hasPrivateSegmentWaveByteOffset() const {
929	return PrivateSegmentWaveByteOffset;
930	}
931
932	bool hasWorkItemIDX() const {
933	return WorkItemIDX;
934	}
935
936	bool hasWorkItemIDY() const {
937	return WorkItemIDY;
938	}
939
940	bool hasWorkItemIDZ() const {
941	return WorkItemIDZ;
942	}
943
944	bool hasImplicitArgPtr() const {
945	return ImplicitArgPtr;
946	}
947
948	AMDGPUFunctionArgInfo &getArgInfo() {
949	return ArgInfo;
950	}
951
952	const AMDGPUFunctionArgInfo &getArgInfo() const {
953	return ArgInfo;
954	}
955
956	std::tuple<const ArgDescriptor , const* TargetRegisterClass *, LLT>
957	getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
958	return ArgInfo.getPreloadedValue(Value);
959	}
960
961	MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
962	const auto *Arg = std::get<`0`>(t: ArgInfo.getPreloadedValue(Value));
963	return Arg ? Arg->getRegister() : MCRegister ();
964	}
965
966	unsigned getGITPtrHigh() const {
967	return GITPtrHigh;
968	}
969
970	Register getGITPtrLoReg(const MachineFunction &MF) const;
971
972	uint32_t get32BitAddressHighBits() const {
973	return HighBitsOf32BitAddress;
974	}
975
976	unsigned getNumUserSGPRs() const {
977	return NumUserSGPRs;
978	}
979
980	unsigned getNumPreloadedSGPRs() const {
981	return NumUserSGPRs + NumSystemSGPRs;
982	}
983
984	unsigned getNumKernargPreloadedSGPRs() const {
985	return UserSGPRInfo.getNumKernargPreloadSGPRs();
986	}
987
988	Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
989	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
990	}
991
992	/// Returns the physical register reserved for use as the resource
993	/// descriptor for scratch accesses.
994	Register getScratchRSrcReg() const {
995	return ScratchRSrcReg;
996	}
997
998	void setScratchRSrcReg(Register Reg) {
999	assert(Reg != `0` && "Should never be unset");
1000	ScratchRSrcReg = Reg;
1001	}
1002
1003	Register getFrameOffsetReg() const {
1004	return FrameOffsetReg;
1005	}
1006
1007	void setFrameOffsetReg(Register Reg) {
1008	assert(Reg != `0` && "Should never be unset");
1009	FrameOffsetReg = Reg;
1010	}
1011
1012	void setStackPtrOffsetReg(Register Reg) {
1013	assert(Reg != `0` && "Should never be unset");
1014	StackPtrOffsetReg = Reg;
1015	}
1016
1017	void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
1018
1019	// Note the unset value for this is AMDGPU::SP_REG rather than
1020	// NoRegister. This is mostly a workaround for MIR tests where state that
1021	// can't be directly computed from the function is not preserved in serialized
1022	// MIR.
1023	Register getStackPtrOffsetReg() const {
1024	return StackPtrOffsetReg;
1025	}
1026
1027	Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
1028
1029	Register getQueuePtrUserSGPR() const {
1030	return ArgInfo.QueuePtr.getRegister();
1031	}
1032
1033	Register getImplicitBufferPtrUserSGPR() const {
1034	return ArgInfo.ImplicitBufferPtr.getRegister();
1035	}
1036
1037	bool hasSpilledSGPRs() const {
1038	return HasSpilledSGPRs;
1039	}
1040
1041	void setHasSpilledSGPRs(bool Spill = true) {
1042	HasSpilledSGPRs = Spill;
1043	}
1044
1045	bool hasSpilledVGPRs() const {
1046	return HasSpilledVGPRs;
1047	}
1048
1049	void setHasSpilledVGPRs(bool Spill = true) {
1050	HasSpilledVGPRs = Spill;
1051	}
1052
1053	bool hasNonSpillStackObjects() const {
1054	return HasNonSpillStackObjects;
1055	}
1056
1057	void setHasNonSpillStackObjects(bool StackObject = true) {
1058	HasNonSpillStackObjects = StackObject;
1059	}
1060
1061	bool isStackRealigned() const {
1062	return IsStackRealigned;
1063	}
1064
1065	void setIsStackRealigned(bool Realigned = true) {
1066	IsStackRealigned = Realigned;
1067	}
1068
1069	unsigned getNumSpilledSGPRs() const {
1070	return NumSpilledSGPRs;
1071	}
1072
1073	unsigned getNumSpilledVGPRs() const {
1074	return NumSpilledVGPRs;
1075	}
1076
1077	void addToSpilledSGPRs(unsigned num) {
1078	NumSpilledSGPRs += num;
1079	}
1080
1081	void addToSpilledVGPRs(unsigned num) {
1082	NumSpilledVGPRs += num;
1083	}
1084
1085	unsigned getPSInputAddr() const {
1086	return PSInputAddr;
1087	}
1088
1089	unsigned getPSInputEnable() const {
1090	return PSInputEnable;
1091	}
1092
1093	bool isPSInputAllocated(unsigned Index) const {
1094	return PSInputAddr & (`1` << Index);
1095	}
1096
1097	void markPSInputAllocated(unsigned Index) {
1098	PSInputAddr \|= `1` << Index;
1099	}
1100
1101	void markPSInputEnabled(unsigned Index) {
1102	PSInputEnable \|= `1` << Index;
1103	}
1104
1105	bool returnsVoid() const {
1106	return ReturnsVoid;
1107	}
1108
1109	void setIfReturnsVoid(bool Value) {
1110	ReturnsVoid = Value;
1111	}
1112
1113	/// \returns A pair of default/requested minimum/maximum flat work group sizes
1114	/// for this function.
1115	std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
1116	return FlatWorkGroupSizes;
1117	}
1118
1119	/// \returns Default/requested minimum flat work group size for this function.
1120	unsigned getMinFlatWorkGroupSize() const {
1121	return FlatWorkGroupSizes.first;
1122	}
1123
1124	/// \returns Default/requested maximum flat work group size for this function.
1125	unsigned getMaxFlatWorkGroupSize() const {
1126	return FlatWorkGroupSizes.second;
1127	}
1128
1129	/// \returns A pair of default/requested minimum/maximum number of waves per
1130	/// execution unit.
1131	std::pair<unsigned, unsigned> getWavesPerEU() const {
1132	return WavesPerEU;
1133	}
1134
1135	/// \returns Default/requested minimum number of waves per execution unit.
1136	unsigned getMinWavesPerEU() const {
1137	return WavesPerEU.first;
1138	}
1139
1140	/// \returns Default/requested maximum number of waves per execution unit.
1141	unsigned getMaxWavesPerEU() const {
1142	return WavesPerEU.second;
1143	}
1144
1145	const AMDGPUGWSResourcePseudoSourceValue *
1146	getGWSPSV(const AMDGPUTargetMachine &TM) {
1147	return &GWSResourcePSV;
1148	}
1149
1150	unsigned getOccupancy() const {
1151	return Occupancy;
1152	}
1153
1154	unsigned getMinAllowedOccupancy() const {
1155	if (!isMemoryBound() && !needsWaveLimiter())
1156	return Occupancy;
1157	return (Occupancy < `4`) ? Occupancy : `4`;
1158	}
1159
1160	void limitOccupancy(const MachineFunction &MF);
1161
1162	void limitOccupancy(unsigned Limit) {
1163	if (Occupancy > Limit)
1164	Occupancy = Limit;
1165	}
1166
1167	void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
1168	if (Occupancy < Limit)
1169	Occupancy = Limit;
1170	limitOccupancy(MF);
1171	}
1172
1173	unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
1174
1175	bool mayNeedAGPRs() const {
1176	return MayNeedAGPRs;
1177	}
1178
1179	// \returns true if a function has a use of AGPRs via inline asm or
1180	// has a call which may use it.
1181	bool mayUseAGPRs(const Function &F) const;
1182
1183	/// \returns Default/requested number of work groups for this function.
1184	SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
1185
1186	unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups [`0`]; }
1187	unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups [`1`]; }
1188	unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups [`2`]; }
1189	};
1190
1191	} // end namespace llvm
1192
1193	#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
1194

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h