AMDGPUMachineFunction.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp]

1	//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDGPUMachineFunction.h"
10	#include "AMDGPU.h"
11	#include "AMDGPUPerfHintAnalysis.h"
12	#include "AMDGPUSubtarget.h"
13	#include "Utils/AMDGPUBaseInfo.h"
14	#include "llvm/CodeGen/MachineModuleInfo.h"
15	#include "llvm/IR/ConstantRange.h"
16	#include "llvm/IR/Constants.h"
17	#include "llvm/IR/Metadata.h"
18	#include "llvm/Target/TargetMachine.h"
19
20	using namespace llvm;
21
22	static const GlobalVariable *
23	getKernelDynLDSGlobalFromFunction(const Function &F) {
24	const Module *M = F.getParent();
25	SmallString<`64`> KernelDynLDSName("llvm.amdgcn.");
26	KernelDynLDSName += F.getName();
27	KernelDynLDSName += ".dynlds";
28	return M->getNamedGlobal(Name: KernelDynLDSName);
29	}
30
31	static bool hasLDSKernelArgument(const Function &F) {
32	for (const Argument &Arg : F.args()) {
33	Type *ArgTy = Arg.getType();
34	if (auto PtrTy = dyn_cast<PointerType>(Val: ArgTy)) {
35	if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36	return true;
37	}
38	}
39	return false;
40	}
41
42	AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43	const AMDGPUSubtarget &ST)
44	: IsEntryFunction(AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())),
45	IsModuleEntryFunction(
46	AMDGPU::isModuleEntryFunctionCC(CC: F.getCallingConv())),
47	IsChainFunction(AMDGPU::isChainCC(CC: F.getCallingConv())) {
48
49	// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50	// except reserved size is not correctly aligned.
51
52	Attribute MemBoundAttr = F.getFnAttribute(Kind: "amdgpu-memory-bound");
53	MemoryBound = MemBoundAttr.getValueAsBool();
54
55	Attribute WaveLimitAttr = F.getFnAttribute(Kind: "amdgpu-wave-limiter");
56	WaveLimiter = WaveLimitAttr.getValueAsBool();
57
58	// FIXME: How is this attribute supposed to interact with statically known
59	// global sizes?
60	StringRef S = F.getFnAttribute(Kind: "amdgpu-gds-size").getValueAsString();
61	if (!S.empty())
62	S.consumeInteger(Radix: `0`, Result&: GDSSize);
63
64	// Assume the attribute allocates before any known GDS globals.
65	StaticGDSSize = GDSSize;
66
67	// Second value, if present, is the maximum value that can be assigned.
68	// Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
69	// during codegen.
70	std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
71	F, Name: "amdgpu-lds-size", Default: {`0`, UINT32_MAX}, OnlyFirstRequired: true);
72
73	// The two separate variables are only profitable when the LDS module lowering
74	// pass is disabled. If graphics does not use dynamic LDS, this is never
75	// profitable. Leaving cleanup for a later change.
76	LDSSize = LDSSizeRange.first;
77	StaticLDSSize = LDSSize;
78
79	CallingConv::ID CC = F.getCallingConv();
80	if (CC == CallingConv::AMDGPU_KERNEL \|\| CC == CallingConv::SPIR_KERNEL)
81	ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxAlign&: MaxKernArgAlign);
82
83	// FIXME: Shouldn't be target specific
84	Attribute NSZAttr = F.getFnAttribute(Kind: "no-signed-zeros-fp-math");
85	NoSignedZerosFPMath =
86	NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
87
88	const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
89	if (DynLdsGlobal \|\| hasLDSKernelArgument(F))
90	UsesDynamicLDS = true;
91	}
92
93	unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
94	const GlobalVariable &GV,
95	Align Trailing) {
96	auto Entry = LocalMemoryObjects.insert(KV: std::pair(&GV, `0`));
97	if (!Entry.second)
98	return Entry.first ->second;
99
100	Align Alignment =
101	DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
102
103	unsigned Offset;
104	if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
105
106	std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
107	if (MaybeAbs) {
108	// Absolute address LDS variables that exist prior to the LDS lowering
109	// pass raise a fatal error in that pass. These failure modes are only
110	// reachable if that lowering pass is disabled or broken. If/when adding
111	// support for absolute addresses on user specified variables, the
112	// alignment check moves to the lowering pass and the frame calculation
113	// needs to take the user variables into consideration.
114
115	uint32_t ObjectStart = *MaybeAbs;
116
117	if (ObjectStart != alignTo(Size: ObjectStart, A: Alignment)) {
118	report_fatal_error(reason: "Absolute address LDS variable inconsistent with "
119	"variable alignment");
120	}
121
122	if (isModuleEntryFunction()) {
123	// If this is a module entry function, we can also sanity check against
124	// the static frame. Strictly it would be better to check against the
125	// attribute, i.e. that the variable is within the always-allocated
126	// section, and not within some other non-absolute-address object
127	// allocated here, but the extra error detection is minimal and we would
128	// have to pass the Function around or cache the attribute value.
129	uint32_t ObjectEnd =
130	ObjectStart + DL.getTypeAllocSize(Ty: GV.getValueType());
131	if (ObjectEnd > StaticLDSSize) {
132	report_fatal_error(
133	reason: "Absolute address LDS variable outside of static frame");
134	}
135	}
136
137	Entry.first ->second = ObjectStart;
138	return ObjectStart;
139	}
140
141	/// TODO: We should sort these to minimize wasted space due to alignment
142	/// padding. Currently the padding is decided by the first encountered use
143	/// during lowering.
144	Offset = StaticLDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
145
146	StaticLDSSize += DL.getTypeAllocSize(Ty: GV.getValueType());
147
148	// Align LDS size to trailing, e.g. for aligning dynamic shared memory
149	LDSSize = alignTo(Size: StaticLDSSize, A: Trailing);
150	} else {
151	assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
152	"expected region address space");
153
154	Offset = StaticGDSSize = alignTo(Size: StaticGDSSize, A: Alignment);
155	StaticGDSSize += DL.getTypeAllocSize(Ty: GV.getValueType());
156
157	// FIXME: Apply alignment of dynamic GDS
158	GDSSize = StaticGDSSize;
159	}
160
161	Entry.first ->second = Offset;
162	return Offset;
163	}
164
165	std::optional<uint32_t>
166	AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
167	// TODO: Would be more consistent with the abs symbols to use a range
168	MDNode *MD = F.getMetadata(Kind: "llvm.amdgcn.lds.kernel.id");
169	if (MD && MD->getNumOperands() == `1`) {
170	if (ConstantInt *KnownSize =
171	mdconst::extract<ConstantInt>(MD: MD->getOperand(I: `0`))) {
172	uint64_t ZExt = KnownSize->getZExtValue();
173	if (ZExt <= UINT32_MAX) {
174	return ZExt;
175	}
176	}
177	}
178	return {};
179	}
180
181	std::optional<uint32_t>
182	AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
183	if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
184	return {};
185
186	std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
187	if (!AbsSymRange)
188	return {};
189
190	if (const APInt *V = AbsSymRange ->getSingleElement()) {
191	std::optional<uint64_t> ZExt = V->tryZExtValue();
192	if (ZExt && (*ZExt <= UINT32_MAX)) {
193	return *ZExt;
194	}
195	}
196
197	return {};
198	}
199
200	void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
201	const GlobalVariable &GV) {
202	const Module *M = F.getParent();
203	const DataLayout &DL = M->getDataLayout();
204	assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
205
206	Align Alignment =
207	DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
208	if (Alignment <= DynLDSAlign)
209	return;
210
211	LDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
212	DynLDSAlign = Alignment;
213
214	// If there is a dynamic LDS variable associated with this function F, every
215	// further dynamic LDS instance (allocated by calling setDynLDSAlign) must
216	// map to the same address. This holds because no LDS is allocated after the
217	// lowering pass if there are dynamic LDS variables present.
218	const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
219	if (Dyn) {
220	unsigned Offset = LDSSize; // return this?
221	std::optional<uint32_t> Expect = getLDSAbsoluteAddress(GV: *Dyn);
222	if (!Expect \|\| (Offset != *Expect)) {
223	report_fatal_error(reason: "Inconsistent metadata on dynamic LDS variable");
224	}
225	}
226	}
227
228	void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
229	UsesDynamicLDS = DynLDS;
230	}
231
232	bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
233

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp