1 | //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
10 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
11 | |
12 | #include "Utils/AMDGPUBaseInfo.h" |
13 | #include "llvm/ADT/DenseMap.h" |
14 | #include "llvm/CodeGen/MachineFunction.h" |
15 | #include "llvm/IR/DataLayout.h" |
16 | #include "llvm/IR/Function.h" |
17 | #include "llvm/IR/GlobalValue.h" |
18 | #include "llvm/IR/GlobalVariable.h" |
19 | |
20 | namespace llvm { |
21 | |
22 | class AMDGPUSubtarget; |
23 | |
24 | class AMDGPUMachineFunction : public MachineFunctionInfo { |
25 | /// A map to keep track of local memory objects and their offsets within the |
26 | /// local memory space. |
27 | SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; |
28 | |
29 | protected: |
30 | uint64_t ExplicitKernArgSize = 0; // Cache for this. |
31 | Align MaxKernArgAlign; // Cache for this. |
32 | |
33 | /// Number of bytes in the LDS that are being used. |
34 | uint32_t LDSSize = 0; |
35 | uint32_t GDSSize = 0; |
36 | |
37 | /// Number of bytes in the LDS allocated statically. This field is only used |
38 | /// in the instruction selector and not part of the machine function info. |
39 | uint32_t StaticLDSSize = 0; |
40 | uint32_t StaticGDSSize = 0; |
41 | |
42 | /// Align for dynamic shared memory if any. Dynamic shared memory is |
43 | /// allocated directly after the static one, i.e., LDSSize. Need to pad |
44 | /// LDSSize to ensure that dynamic one is aligned accordingly. |
45 | /// The maximal alignment is updated during IR translation or lowering |
46 | /// stages. |
47 | Align DynLDSAlign; |
48 | |
49 | // Flag to check dynamic LDS usage by kernel. |
50 | bool UsesDynamicLDS = false; |
51 | |
52 | // Kernels + shaders. i.e. functions called by the hardware and not called |
53 | // by other functions. |
54 | bool IsEntryFunction = false; |
55 | |
56 | // Entry points called by other functions instead of directly by the hardware. |
57 | bool IsModuleEntryFunction = false; |
58 | |
59 | // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. |
60 | bool IsChainFunction = false; |
61 | |
62 | bool NoSignedZerosFPMath = false; |
63 | |
64 | // Function may be memory bound. |
65 | bool MemoryBound = false; |
66 | |
67 | // Kernel may need limited waves per EU for better performance. |
68 | bool WaveLimiter = false; |
69 | |
70 | bool HasInitWholeWave = false; |
71 | |
72 | public: |
73 | AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); |
74 | |
75 | uint64_t getExplicitKernArgSize() const { |
76 | return ExplicitKernArgSize; |
77 | } |
78 | |
79 | Align getMaxKernArgAlign() const { return MaxKernArgAlign; } |
80 | |
81 | uint32_t getLDSSize() const { |
82 | return LDSSize; |
83 | } |
84 | |
85 | uint32_t getGDSSize() const { |
86 | return GDSSize; |
87 | } |
88 | |
89 | bool isEntryFunction() const { |
90 | return IsEntryFunction; |
91 | } |
92 | |
93 | bool isModuleEntryFunction() const { return IsModuleEntryFunction; } |
94 | |
95 | bool isChainFunction() const { return IsChainFunction; } |
96 | |
97 | // The stack is empty upon entry to this function. |
98 | bool isBottomOfStack() const { |
99 | return isEntryFunction() || isChainFunction(); |
100 | } |
101 | |
102 | bool hasNoSignedZerosFPMath() const { |
103 | return NoSignedZerosFPMath; |
104 | } |
105 | |
106 | bool isMemoryBound() const { |
107 | return MemoryBound; |
108 | } |
109 | |
110 | bool needsWaveLimiter() const { |
111 | return WaveLimiter; |
112 | } |
113 | |
114 | bool hasInitWholeWave() const { return HasInitWholeWave; } |
115 | void setInitWholeWave() { HasInitWholeWave = true; } |
116 | |
117 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { |
118 | return allocateLDSGlobal(DL, GV, Trailing: DynLDSAlign); |
119 | } |
120 | |
121 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, |
122 | Align Trailing); |
123 | |
124 | static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); |
125 | static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); |
126 | |
127 | Align getDynLDSAlign() const { return DynLDSAlign; } |
128 | |
129 | void setDynLDSAlign(const Function &F, const GlobalVariable &GV); |
130 | |
131 | void setUsesDynamicLDS(bool DynLDS); |
132 | |
133 | bool isDynamicLDSUsed() const; |
134 | }; |
135 | |
136 | } |
137 | #endif |
138 | |