1 | //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
10 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
11 | |
12 | #include "Utils/AMDGPUBaseInfo.h" |
13 | #include "llvm/ADT/DenseMap.h" |
14 | #include "llvm/CodeGen/MachineFunction.h" |
15 | #include "llvm/IR/DataLayout.h" |
16 | #include "llvm/IR/Function.h" |
17 | #include "llvm/IR/GlobalValue.h" |
18 | #include "llvm/IR/GlobalVariable.h" |
19 | |
20 | namespace llvm { |
21 | |
22 | class AMDGPUSubtarget; |
23 | |
24 | class AMDGPUMachineFunction : public MachineFunctionInfo { |
25 | /// A map to keep track of local memory objects and their offsets within the |
26 | /// local memory space. |
27 | SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; |
28 | |
29 | protected: |
30 | uint64_t ExplicitKernArgSize = 0; // Cache for this. |
31 | Align MaxKernArgAlign; // Cache for this. |
32 | |
33 | /// Number of bytes in the LDS that are being used. |
34 | uint32_t LDSSize = 0; |
35 | uint32_t GDSSize = 0; |
36 | |
37 | /// Number of bytes in the LDS allocated statically. This field is only used |
38 | /// in the instruction selector and not part of the machine function info. |
39 | uint32_t StaticLDSSize = 0; |
40 | uint32_t StaticGDSSize = 0; |
41 | |
42 | /// Align for dynamic shared memory if any. Dynamic shared memory is |
43 | /// allocated directly after the static one, i.e., LDSSize. Need to pad |
44 | /// LDSSize to ensure that dynamic one is aligned accordingly. |
45 | /// The maximal alignment is updated during IR translation or lowering |
46 | /// stages. |
47 | Align DynLDSAlign; |
48 | |
49 | // Flag to check dynamic LDS usage by kernel. |
50 | bool UsesDynamicLDS = false; |
51 | |
52 | // Kernels + shaders. i.e. functions called by the hardware and not called |
53 | // by other functions. |
54 | bool IsEntryFunction = false; |
55 | |
56 | // Entry points called by other functions instead of directly by the hardware. |
57 | bool IsModuleEntryFunction = false; |
58 | |
59 | // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. |
60 | bool IsChainFunction = false; |
61 | |
62 | bool NoSignedZerosFPMath = false; |
63 | |
64 | // Function may be memory bound. |
65 | bool MemoryBound = false; |
66 | |
67 | // Kernel may need limited waves per EU for better performance. |
68 | bool WaveLimiter = false; |
69 | |
70 | public: |
71 | AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); |
72 | |
73 | uint64_t getExplicitKernArgSize() const { |
74 | return ExplicitKernArgSize; |
75 | } |
76 | |
77 | Align getMaxKernArgAlign() const { return MaxKernArgAlign; } |
78 | |
79 | uint32_t getLDSSize() const { |
80 | return LDSSize; |
81 | } |
82 | |
83 | uint32_t getGDSSize() const { |
84 | return GDSSize; |
85 | } |
86 | |
87 | bool isEntryFunction() const { |
88 | return IsEntryFunction; |
89 | } |
90 | |
91 | bool isModuleEntryFunction() const { return IsModuleEntryFunction; } |
92 | |
93 | bool isChainFunction() const { return IsChainFunction; } |
94 | |
95 | // The stack is empty upon entry to this function. |
96 | bool isBottomOfStack() const { |
97 | return isEntryFunction() || isChainFunction(); |
98 | } |
99 | |
100 | bool hasNoSignedZerosFPMath() const { |
101 | return NoSignedZerosFPMath; |
102 | } |
103 | |
104 | bool isMemoryBound() const { |
105 | return MemoryBound; |
106 | } |
107 | |
108 | bool needsWaveLimiter() const { |
109 | return WaveLimiter; |
110 | } |
111 | |
112 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { |
113 | return allocateLDSGlobal(DL, GV, Trailing: DynLDSAlign); |
114 | } |
115 | |
116 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, |
117 | Align Trailing); |
118 | |
119 | static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); |
120 | static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); |
121 | |
122 | Align getDynLDSAlign() const { return DynLDSAlign; } |
123 | |
124 | void setDynLDSAlign(const Function &F, const GlobalVariable &GV); |
125 | |
126 | void setUsesDynamicLDS(bool DynLDS); |
127 | |
128 | bool isDynamicLDSUsed() const; |
129 | }; |
130 | |
131 | } |
132 | #endif |
133 | |