1//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMachineFunction.h"
10#include "AMDGPU.h"
11#include "AMDGPUMemoryUtils.h"
12#include "AMDGPUSubtarget.h"
13#include "Utils/AMDGPUBaseInfo.h"
14#include "llvm/CodeGen/MachineModuleInfo.h"
15#include "llvm/IR/ConstantRange.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Metadata.h"
18#include "llvm/Target/TargetMachine.h"
19
20using namespace llvm;
21
22static const GlobalVariable *
23getKernelDynLDSGlobalFromFunction(const Function &F) {
24 const Module *M = F.getParent();
25 SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26 KernelDynLDSName += F.getName();
27 KernelDynLDSName += ".dynlds";
28 return M->getNamedGlobal(Name: KernelDynLDSName);
29}
30
31static bool hasLDSKernelArgument(const Function &F) {
32 for (const Argument &Arg : F.args()) {
33 Type *ArgTy = Arg.getType();
34 if (auto *PtrTy = dyn_cast<PointerType>(Val: ArgTy)) {
35 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36 return true;
37 }
38 }
39 return false;
40}
41
42AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43 const AMDGPUSubtarget &ST)
44 : IsEntryFunction(AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())),
45 IsModuleEntryFunction(
46 AMDGPU::isModuleEntryFunctionCC(CC: F.getCallingConv())),
47 IsChainFunction(AMDGPU::isChainCC(CC: F.getCallingConv())) {
48
49 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50 // except reserved size is not correctly aligned.
51
52 Attribute MemBoundAttr = F.getFnAttribute(Kind: "amdgpu-memory-bound");
53 MemoryBound = MemBoundAttr.getValueAsBool();
54
55 Attribute WaveLimitAttr = F.getFnAttribute(Kind: "amdgpu-wave-limiter");
56 WaveLimiter = WaveLimitAttr.getValueAsBool();
57
58 // FIXME: How is this attribute supposed to interact with statically known
59 // global sizes?
60 StringRef S = F.getFnAttribute(Kind: "amdgpu-gds-size").getValueAsString();
61 if (!S.empty())
62 S.consumeInteger(Radix: 0, Result&: GDSSize);
63
64 // Assume the attribute allocates before any known GDS globals.
65 StaticGDSSize = GDSSize;
66
67 // Second value, if present, is the maximum value that can be assigned.
68 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
69 // during codegen.
70 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
71 F, Name: "amdgpu-lds-size", Default: {0, UINT32_MAX}, OnlyFirstRequired: true);
72
73 // The two separate variables are only profitable when the LDS module lowering
74 // pass is disabled. If graphics does not use dynamic LDS, this is never
75 // profitable. Leaving cleanup for a later change.
76 LDSSize = LDSSizeRange.first;
77 StaticLDSSize = LDSSize;
78
79 CallingConv::ID CC = F.getCallingConv();
80 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
81 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxAlign&: MaxKernArgAlign);
82
83 const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
84 if (DynLdsGlobal || hasLDSKernelArgument(F))
85 UsesDynamicLDS = true;
86}
87
88unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
89 const GlobalVariable &GV,
90 Align Trailing) {
91 auto Entry = LocalMemoryObjects.insert(KV: std::pair(&GV, 0));
92 if (!Entry.second)
93 return Entry.first->second;
94
95 Align Alignment =
96 DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
97
98 unsigned Offset;
99 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
100 if (AMDGPU::isNamedBarrier(GV)) {
101 std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV);
102 if (!BarAddr)
103 llvm_unreachable("named barrier should have an assigned address");
104 Entry.first->second = BarAddr.value();
105 unsigned BarCnt = GV.getGlobalSize(DL) / 16;
106 recordNumNamedBarriers(GVAddr: BarAddr.value(), BarCnt);
107 return BarAddr.value();
108 }
109
110 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
111 if (MaybeAbs) {
112 // Absolute address LDS variables that exist prior to the LDS lowering
113 // pass raise a fatal error in that pass. These failure modes are only
114 // reachable if that lowering pass is disabled or broken. If/when adding
115 // support for absolute addresses on user specified variables, the
116 // alignment check moves to the lowering pass and the frame calculation
117 // needs to take the user variables into consideration.
118
119 uint32_t ObjectStart = *MaybeAbs;
120
121 if (ObjectStart != alignTo(Size: ObjectStart, A: Alignment)) {
122 report_fatal_error(reason: "Absolute address LDS variable inconsistent with "
123 "variable alignment");
124 }
125
126 if (isModuleEntryFunction()) {
127 // If this is a module entry function, we can also sanity check against
128 // the static frame. Strictly it would be better to check against the
129 // attribute, i.e. that the variable is within the always-allocated
130 // section, and not within some other non-absolute-address object
131 // allocated here, but the extra error detection is minimal and we would
132 // have to pass the Function around or cache the attribute value.
133 uint32_t ObjectEnd = ObjectStart + GV.getGlobalSize(DL);
134 if (ObjectEnd > StaticLDSSize) {
135 report_fatal_error(
136 reason: "Absolute address LDS variable outside of static frame");
137 }
138 }
139
140 Entry.first->second = ObjectStart;
141 return ObjectStart;
142 }
143
144 /// TODO: We should sort these to minimize wasted space due to alignment
145 /// padding. Currently the padding is decided by the first encountered use
146 /// during lowering.
147 Offset = StaticLDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
148
149 StaticLDSSize += GV.getGlobalSize(DL);
150
151 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
152 LDSSize = alignTo(Size: StaticLDSSize, A: Trailing);
153 } else {
154 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
155 "expected region address space");
156
157 Offset = StaticGDSSize = alignTo(Size: StaticGDSSize, A: Alignment);
158 StaticGDSSize += GV.getGlobalSize(DL);
159
160 // FIXME: Apply alignment of dynamic GDS
161 GDSSize = StaticGDSSize;
162 }
163
164 Entry.first->second = Offset;
165 return Offset;
166}
167
168std::optional<uint32_t>
169AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
170 // TODO: Would be more consistent with the abs symbols to use a range
171 MDNode *MD = F.getMetadata(Kind: "llvm.amdgcn.lds.kernel.id");
172 if (MD && MD->getNumOperands() == 1) {
173 if (ConstantInt *KnownSize =
174 mdconst::extract<ConstantInt>(MD: MD->getOperand(I: 0))) {
175 uint64_t ZExt = KnownSize->getZExtValue();
176 if (ZExt <= UINT32_MAX) {
177 return ZExt;
178 }
179 }
180 }
181 return {};
182}
183
184std::optional<uint32_t>
185AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
186 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
187 return {};
188
189 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
190 if (!AbsSymRange)
191 return {};
192
193 if (const APInt *V = AbsSymRange->getSingleElement()) {
194 std::optional<uint64_t> ZExt = V->tryZExtValue();
195 if (ZExt && (*ZExt <= UINT32_MAX)) {
196 return *ZExt;
197 }
198 }
199
200 return {};
201}
202
203void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
204 const GlobalVariable &GV) {
205 const Module *M = F.getParent();
206 const DataLayout &DL = M->getDataLayout();
207 assert(GV.getGlobalSize(DL) == 0);
208
209 Align Alignment =
210 DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
211 if (Alignment <= DynLDSAlign)
212 return;
213
214 LDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
215 DynLDSAlign = Alignment;
216
217 // If there is a dynamic LDS variable associated with this function F, every
218 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
219 // map to the same address. This holds because no LDS is allocated after the
220 // lowering pass if there are dynamic LDS variables present.
221 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
222 if (Dyn) {
223 unsigned Offset = LDSSize; // return this?
224 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(GV: *Dyn);
225 if (!Expect || (Offset != *Expect)) {
226 report_fatal_error(reason: "Inconsistent metadata on dynamic LDS variable");
227 }
228 }
229}
230
231void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
232 UsesDynamicLDS = DynLDS;
233}
234
235bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
236