| 1 | //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
| 10 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
| 11 | |
| 12 | #include "Utils/AMDGPUBaseInfo.h" |
| 13 | #include "llvm/ADT/DenseMap.h" |
| 14 | #include "llvm/CodeGen/MachineFunction.h" |
| 15 | #include "llvm/IR/DataLayout.h" |
| 16 | #include "llvm/IR/Function.h" |
| 17 | #include "llvm/IR/GlobalValue.h" |
| 18 | #include "llvm/IR/GlobalVariable.h" |
| 19 | |
| 20 | namespace llvm { |
| 21 | |
| 22 | class AMDGPUSubtarget; |
| 23 | |
| 24 | class AMDGPUMachineFunction : public MachineFunctionInfo { |
| 25 | /// A map to keep track of local memory objects and their offsets within the |
| 26 | /// local memory space. |
| 27 | SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; |
| 28 | |
| 29 | protected: |
| 30 | uint64_t ExplicitKernArgSize = 0; // Cache for this. |
| 31 | Align MaxKernArgAlign; // Cache for this. |
| 32 | |
| 33 | /// Number of bytes in the LDS that are being used. |
| 34 | uint32_t LDSSize = 0; |
| 35 | uint32_t GDSSize = 0; |
| 36 | |
| 37 | /// Number of bytes in the LDS allocated statically. This field is only used |
| 38 | /// in the instruction selector and not part of the machine function info. |
| 39 | uint32_t StaticLDSSize = 0; |
| 40 | uint32_t StaticGDSSize = 0; |
| 41 | |
| 42 | /// Align for dynamic shared memory if any. Dynamic shared memory is |
| 43 | /// allocated directly after the static one, i.e., LDSSize. Need to pad |
| 44 | /// LDSSize to ensure that dynamic one is aligned accordingly. |
| 45 | /// The maximal alignment is updated during IR translation or lowering |
| 46 | /// stages. |
| 47 | Align DynLDSAlign; |
| 48 | |
| 49 | // Flag to check dynamic LDS usage by kernel. |
| 50 | bool UsesDynamicLDS = false; |
| 51 | |
| 52 | // Kernels + shaders. i.e. functions called by the hardware and not called |
| 53 | // by other functions. |
| 54 | bool IsEntryFunction = false; |
| 55 | |
| 56 | // Entry points called by other functions instead of directly by the hardware. |
| 57 | bool IsModuleEntryFunction = false; |
| 58 | |
| 59 | // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. |
| 60 | bool IsChainFunction = false; |
| 61 | |
| 62 | bool NoSignedZerosFPMath = false; |
| 63 | |
| 64 | // Function may be memory bound. |
| 65 | bool MemoryBound = false; |
| 66 | |
| 67 | // Kernel may need limited waves per EU for better performance. |
| 68 | bool WaveLimiter = false; |
| 69 | |
| 70 | bool HasInitWholeWave = false; |
| 71 | |
| 72 | public: |
| 73 | AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); |
| 74 | |
| 75 | uint64_t getExplicitKernArgSize() const { |
| 76 | return ExplicitKernArgSize; |
| 77 | } |
| 78 | |
| 79 | Align getMaxKernArgAlign() const { return MaxKernArgAlign; } |
| 80 | |
| 81 | uint32_t getLDSSize() const { |
| 82 | return LDSSize; |
| 83 | } |
| 84 | |
| 85 | uint32_t getGDSSize() const { |
| 86 | return GDSSize; |
| 87 | } |
| 88 | |
| 89 | bool isEntryFunction() const { |
| 90 | return IsEntryFunction; |
| 91 | } |
| 92 | |
| 93 | bool isModuleEntryFunction() const { return IsModuleEntryFunction; } |
| 94 | |
| 95 | bool isChainFunction() const { return IsChainFunction; } |
| 96 | |
| 97 | // The stack is empty upon entry to this function. |
| 98 | bool isBottomOfStack() const { |
| 99 | return isEntryFunction() || isChainFunction(); |
| 100 | } |
| 101 | |
| 102 | bool hasNoSignedZerosFPMath() const { |
| 103 | return NoSignedZerosFPMath; |
| 104 | } |
| 105 | |
| 106 | bool isMemoryBound() const { |
| 107 | return MemoryBound; |
| 108 | } |
| 109 | |
| 110 | bool needsWaveLimiter() const { |
| 111 | return WaveLimiter; |
| 112 | } |
| 113 | |
| 114 | bool hasInitWholeWave() const { return HasInitWholeWave; } |
| 115 | void setInitWholeWave() { HasInitWholeWave = true; } |
| 116 | |
| 117 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { |
| 118 | return allocateLDSGlobal(DL, GV, Trailing: DynLDSAlign); |
| 119 | } |
| 120 | |
| 121 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, |
| 122 | Align Trailing); |
| 123 | |
| 124 | static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); |
| 125 | static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); |
| 126 | |
| 127 | Align getDynLDSAlign() const { return DynLDSAlign; } |
| 128 | |
| 129 | void setDynLDSAlign(const Function &F, const GlobalVariable &GV); |
| 130 | |
| 131 | void setUsesDynamicLDS(bool DynLDS); |
| 132 | |
| 133 | bool isDynamicLDSUsed() const; |
| 134 | }; |
| 135 | |
| 136 | } |
| 137 | #endif |
| 138 | |