| 1 | //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AMDGPUMachineFunction.h" |
| 10 | #include "AMDGPU.h" |
| 11 | #include "AMDGPUMemoryUtils.h" |
| 12 | #include "AMDGPUSubtarget.h" |
| 13 | #include "Utils/AMDGPUBaseInfo.h" |
| 14 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 15 | #include "llvm/IR/ConstantRange.h" |
| 16 | #include "llvm/IR/Constants.h" |
| 17 | #include "llvm/IR/Metadata.h" |
| 18 | #include "llvm/Target/TargetMachine.h" |
| 19 | |
| 20 | using namespace llvm; |
| 21 | |
| 22 | static const GlobalVariable * |
| 23 | getKernelDynLDSGlobalFromFunction(const Function &F) { |
| 24 | const Module *M = F.getParent(); |
| 25 | SmallString<64> KernelDynLDSName("llvm.amdgcn." ); |
| 26 | KernelDynLDSName += F.getName(); |
| 27 | KernelDynLDSName += ".dynlds" ; |
| 28 | return M->getNamedGlobal(Name: KernelDynLDSName); |
| 29 | } |
| 30 | |
| 31 | static bool hasLDSKernelArgument(const Function &F) { |
| 32 | for (const Argument &Arg : F.args()) { |
| 33 | Type *ArgTy = Arg.getType(); |
| 34 | if (auto *PtrTy = dyn_cast<PointerType>(Val: ArgTy)) { |
| 35 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
| 36 | return true; |
| 37 | } |
| 38 | } |
| 39 | return false; |
| 40 | } |
| 41 | |
| 42 | AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, |
| 43 | const AMDGPUSubtarget &ST) |
| 44 | : IsEntryFunction(AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())), |
| 45 | IsModuleEntryFunction( |
| 46 | AMDGPU::isModuleEntryFunctionCC(CC: F.getCallingConv())), |
| 47 | IsChainFunction(AMDGPU::isChainCC(CC: F.getCallingConv())) { |
| 48 | |
| 49 | // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, |
| 50 | // except reserved size is not correctly aligned. |
| 51 | |
| 52 | Attribute MemBoundAttr = F.getFnAttribute(Kind: "amdgpu-memory-bound" ); |
| 53 | MemoryBound = MemBoundAttr.getValueAsBool(); |
| 54 | |
| 55 | Attribute WaveLimitAttr = F.getFnAttribute(Kind: "amdgpu-wave-limiter" ); |
| 56 | WaveLimiter = WaveLimitAttr.getValueAsBool(); |
| 57 | |
| 58 | // FIXME: How is this attribute supposed to interact with statically known |
| 59 | // global sizes? |
| 60 | StringRef S = F.getFnAttribute(Kind: "amdgpu-gds-size" ).getValueAsString(); |
| 61 | if (!S.empty()) |
| 62 | S.consumeInteger(Radix: 0, Result&: GDSSize); |
| 63 | |
| 64 | // Assume the attribute allocates before any known GDS globals. |
| 65 | StaticGDSSize = GDSSize; |
| 66 | |
| 67 | // Second value, if present, is the maximum value that can be assigned. |
| 68 | // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics |
| 69 | // during codegen. |
| 70 | std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( |
| 71 | F, Name: "amdgpu-lds-size" , Default: {0, UINT32_MAX}, OnlyFirstRequired: true); |
| 72 | |
| 73 | // The two separate variables are only profitable when the LDS module lowering |
| 74 | // pass is disabled. If graphics does not use dynamic LDS, this is never |
| 75 | // profitable. Leaving cleanup for a later change. |
| 76 | LDSSize = LDSSizeRange.first; |
| 77 | StaticLDSSize = LDSSize; |
| 78 | |
| 79 | CallingConv::ID CC = F.getCallingConv(); |
| 80 | if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) |
| 81 | ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxAlign&: MaxKernArgAlign); |
| 82 | |
| 83 | // FIXME: Shouldn't be target specific |
| 84 | Attribute NSZAttr = F.getFnAttribute(Kind: "no-signed-zeros-fp-math" ); |
| 85 | NoSignedZerosFPMath = |
| 86 | NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true" ; |
| 87 | |
| 88 | const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); |
| 89 | if (DynLdsGlobal || hasLDSKernelArgument(F)) |
| 90 | UsesDynamicLDS = true; |
| 91 | } |
| 92 | |
| 93 | unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, |
| 94 | const GlobalVariable &GV, |
| 95 | Align Trailing) { |
| 96 | auto Entry = LocalMemoryObjects.insert(KV: std::pair(&GV, 0)); |
| 97 | if (!Entry.second) |
| 98 | return Entry.first->second; |
| 99 | |
| 100 | Align Alignment = |
| 101 | DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType()); |
| 102 | |
| 103 | unsigned Offset; |
| 104 | if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { |
| 105 | if (AMDGPU::isNamedBarrier(GV)) { |
| 106 | std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV); |
| 107 | if (!BarAddr) |
| 108 | llvm_unreachable("named barrier should have an assigned address" ); |
| 109 | Entry.first->second = BarAddr.value(); |
| 110 | return BarAddr.value(); |
| 111 | } |
| 112 | |
| 113 | std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); |
| 114 | if (MaybeAbs) { |
| 115 | // Absolute address LDS variables that exist prior to the LDS lowering |
| 116 | // pass raise a fatal error in that pass. These failure modes are only |
| 117 | // reachable if that lowering pass is disabled or broken. If/when adding |
| 118 | // support for absolute addresses on user specified variables, the |
| 119 | // alignment check moves to the lowering pass and the frame calculation |
| 120 | // needs to take the user variables into consideration. |
| 121 | |
| 122 | uint32_t ObjectStart = *MaybeAbs; |
| 123 | |
| 124 | if (ObjectStart != alignTo(Size: ObjectStart, A: Alignment)) { |
| 125 | report_fatal_error(reason: "Absolute address LDS variable inconsistent with " |
| 126 | "variable alignment" ); |
| 127 | } |
| 128 | |
| 129 | if (isModuleEntryFunction()) { |
| 130 | // If this is a module entry function, we can also sanity check against |
| 131 | // the static frame. Strictly it would be better to check against the |
| 132 | // attribute, i.e. that the variable is within the always-allocated |
| 133 | // section, and not within some other non-absolute-address object |
| 134 | // allocated here, but the extra error detection is minimal and we would |
| 135 | // have to pass the Function around or cache the attribute value. |
| 136 | uint32_t ObjectEnd = |
| 137 | ObjectStart + DL.getTypeAllocSize(Ty: GV.getValueType()); |
| 138 | if (ObjectEnd > StaticLDSSize) { |
| 139 | report_fatal_error( |
| 140 | reason: "Absolute address LDS variable outside of static frame" ); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | Entry.first->second = ObjectStart; |
| 145 | return ObjectStart; |
| 146 | } |
| 147 | |
| 148 | /// TODO: We should sort these to minimize wasted space due to alignment |
| 149 | /// padding. Currently the padding is decided by the first encountered use |
| 150 | /// during lowering. |
| 151 | Offset = StaticLDSSize = alignTo(Size: StaticLDSSize, A: Alignment); |
| 152 | |
| 153 | StaticLDSSize += DL.getTypeAllocSize(Ty: GV.getValueType()); |
| 154 | |
| 155 | // Align LDS size to trailing, e.g. for aligning dynamic shared memory |
| 156 | LDSSize = alignTo(Size: StaticLDSSize, A: Trailing); |
| 157 | } else { |
| 158 | assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && |
| 159 | "expected region address space" ); |
| 160 | |
| 161 | Offset = StaticGDSSize = alignTo(Size: StaticGDSSize, A: Alignment); |
| 162 | StaticGDSSize += DL.getTypeAllocSize(Ty: GV.getValueType()); |
| 163 | |
| 164 | // FIXME: Apply alignment of dynamic GDS |
| 165 | GDSSize = StaticGDSSize; |
| 166 | } |
| 167 | |
| 168 | Entry.first->second = Offset; |
| 169 | return Offset; |
| 170 | } |
| 171 | |
| 172 | std::optional<uint32_t> |
| 173 | AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { |
| 174 | // TODO: Would be more consistent with the abs symbols to use a range |
| 175 | MDNode *MD = F.getMetadata(Kind: "llvm.amdgcn.lds.kernel.id" ); |
| 176 | if (MD && MD->getNumOperands() == 1) { |
| 177 | if (ConstantInt *KnownSize = |
| 178 | mdconst::extract<ConstantInt>(MD: MD->getOperand(I: 0))) { |
| 179 | uint64_t ZExt = KnownSize->getZExtValue(); |
| 180 | if (ZExt <= UINT32_MAX) { |
| 181 | return ZExt; |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | return {}; |
| 186 | } |
| 187 | |
| 188 | std::optional<uint32_t> |
| 189 | AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { |
| 190 | if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) |
| 191 | return {}; |
| 192 | |
| 193 | std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); |
| 194 | if (!AbsSymRange) |
| 195 | return {}; |
| 196 | |
| 197 | if (const APInt *V = AbsSymRange->getSingleElement()) { |
| 198 | std::optional<uint64_t> ZExt = V->tryZExtValue(); |
| 199 | if (ZExt && (*ZExt <= UINT32_MAX)) { |
| 200 | return *ZExt; |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | return {}; |
| 205 | } |
| 206 | |
| 207 | void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, |
| 208 | const GlobalVariable &GV) { |
| 209 | const Module *M = F.getParent(); |
| 210 | const DataLayout &DL = M->getDataLayout(); |
| 211 | assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); |
| 212 | |
| 213 | Align Alignment = |
| 214 | DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType()); |
| 215 | if (Alignment <= DynLDSAlign) |
| 216 | return; |
| 217 | |
| 218 | LDSSize = alignTo(Size: StaticLDSSize, A: Alignment); |
| 219 | DynLDSAlign = Alignment; |
| 220 | |
| 221 | // If there is a dynamic LDS variable associated with this function F, every |
| 222 | // further dynamic LDS instance (allocated by calling setDynLDSAlign) must |
| 223 | // map to the same address. This holds because no LDS is allocated after the |
| 224 | // lowering pass if there are dynamic LDS variables present. |
| 225 | const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); |
| 226 | if (Dyn) { |
| 227 | unsigned Offset = LDSSize; // return this? |
| 228 | std::optional<uint32_t> Expect = getLDSAbsoluteAddress(GV: *Dyn); |
| 229 | if (!Expect || (Offset != *Expect)) { |
| 230 | report_fatal_error(reason: "Inconsistent metadata on dynamic LDS variable" ); |
| 231 | } |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { |
| 236 | UsesDynamicLDS = DynLDS; |
| 237 | } |
| 238 | |
| 239 | bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } |
| 240 | |