| 1 | //===- llvm/Analysis/MemoryProfileInfo.h - memory profile info ---*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains utilities to analyze memory profile information. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H |
| 14 | #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H |
| 15 | |
| 16 | #include "llvm/IR/Metadata.h" |
| 17 | #include "llvm/IR/ModuleSummaryIndex.h" |
| 18 | #include "llvm/Support/Compiler.h" |
| 19 | #include <map> |
| 20 | |
| 21 | namespace llvm { |
| 22 | |
| 23 | class ; |
| 24 | |
| 25 | namespace memprof { |
| 26 | |
| 27 | /// Whether the alloc memeprof metadata will include context size info for all |
| 28 | /// MIBs. |
| 29 | LLVM_ABI bool metadataIncludesAllContextSizeInfo(); |
| 30 | |
| 31 | /// Whether the alloc memprof metadata may include context size info for some |
| 32 | /// MIBs (but possibly not all). |
| 33 | LLVM_ABI bool metadataMayIncludeContextSizeInfo(); |
| 34 | |
| 35 | /// Whether we need to record the context size info in the alloc trie used to |
| 36 | /// build metadata. |
| 37 | LLVM_ABI bool recordContextSizeInfoForAnalysis(); |
| 38 | |
| 39 | /// Build callstack metadata from the provided list of call stack ids. Returns |
| 40 | /// the resulting metadata node. |
| 41 | LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack, |
| 42 | LLVMContext &Ctx); |
| 43 | |
| 44 | /// Returns the stack node from an MIB metadata node. |
| 45 | LLVM_ABI MDNode *getMIBStackNode(const MDNode *MIB); |
| 46 | |
| 47 | /// Returns the allocation type from an MIB metadata node. |
| 48 | LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB); |
| 49 | |
| 50 | /// Returns the string to use in attributes with the given type. |
| 51 | LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type); |
| 52 | |
| 53 | /// True if the AllocTypes bitmask contains just a single type. |
| 54 | LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes); |
| 55 | |
| 56 | /// Removes any existing "ambiguous" memprof attribute. Called before we apply a |
| 57 | /// specific allocation type such as "cold", "notcold", or "hot". |
| 58 | LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB); |
| 59 | |
| 60 | /// Adds an "ambiguous" memprof attribute to call with a matched allocation |
| 61 | /// profile but that we haven't yet been able to disambiguate. |
| 62 | LLVM_ABI void addAmbiguousAttribute(CallBase *CB); |
| 63 | |
| 64 | // During matching we also keep the AllocationType along with the |
| 65 | // ContextTotalSize in the Trie for the most accurate reporting when we decide |
| 66 | // to hint unambiguously where there is a dominant type. We don't put the |
| 67 | // AllocationType in the ContextTotalSize struct as it isn't needed there |
| 68 | // during the LTO step, because due to context trimming a summarized |
| 69 | // context with its allocation type can correspond to multiple context/size |
| 70 | // pairs. Here the redundancy is a short-lived convenience. |
| 71 | using ContextSizeTypePair = std::pair<ContextTotalSize, AllocationType>; |
| 72 | |
| 73 | /// Class to build a trie of call stack contexts for a particular profiled |
| 74 | /// allocation call, along with their associated allocation types. |
| 75 | /// The allocation will be at the root of the trie, which is then used to |
| 76 | /// compute the minimum lists of context ids needed to associate a call context |
| 77 | /// with a single allocation type. |
| 78 | class CallStackTrie { |
| 79 | private: |
| 80 | struct CallStackTrieNode { |
| 81 | // Allocation types for call context sharing the context prefix at this |
| 82 | // node. |
| 83 | uint8_t AllocTypes; |
| 84 | // If the user has requested reporting of hinted sizes, keep track of the |
| 85 | // associated full stack id and profiled sizes. Can have more than one |
| 86 | // after trimming (e.g. when building from metadata). This is only placed on |
| 87 | // the last (root-most) trie node for each allocation context. Also |
| 88 | // track the original allocation type of the context. |
| 89 | std::vector<ContextSizeTypePair> ContextInfo; |
| 90 | // Map of caller stack id to the corresponding child Trie node. |
| 91 | std::map<uint64_t, CallStackTrieNode *> Callers; |
| 92 | CallStackTrieNode(AllocationType Type) |
| 93 | : AllocTypes(static_cast<uint8_t>(Type)) {} |
| 94 | void addAllocType(AllocationType AllocType) { |
| 95 | AllocTypes |= static_cast<uint8_t>(AllocType); |
| 96 | } |
| 97 | void removeAllocType(AllocationType AllocType) { |
| 98 | AllocTypes &= ~static_cast<uint8_t>(AllocType); |
| 99 | } |
| 100 | bool hasAllocType(AllocationType AllocType) const { |
| 101 | return AllocTypes & static_cast<uint8_t>(AllocType); |
| 102 | } |
| 103 | }; |
| 104 | |
| 105 | // The node for the allocation at the root. |
| 106 | CallStackTrieNode *Alloc = nullptr; |
| 107 | // The allocation's leaf stack id. |
| 108 | uint64_t AllocStackId = 0; |
| 109 | |
| 110 | // If the client provides a remarks emitter object, we will emit remarks on |
| 111 | // allocations for which we apply non-context sensitive allocation hints. |
| 112 | OptimizationRemarkEmitter *ORE; |
| 113 | |
| 114 | // The maximum size of a cold allocation context, from the profile summary. |
| 115 | uint64_t MaxColdSize; |
| 116 | |
| 117 | // Tracks whether we have built the Trie from existing MD_memprof metadata. We |
| 118 | // apply different heuristics for determining whether to discard non-cold |
| 119 | // contexts when rebuilding as we have lost information available during the |
| 120 | // original profile match. |
| 121 | bool BuiltFromExistingMetadata = false; |
| 122 | |
| 123 | void deleteTrieNode(CallStackTrieNode *Node) { |
| 124 | if (!Node) |
| 125 | return; |
| 126 | for (auto C : Node->Callers) |
| 127 | deleteTrieNode(Node: C.second); |
| 128 | delete Node; |
| 129 | } |
| 130 | |
| 131 | // Recursively build up a complete list of context information from the |
| 132 | // trie nodes reached form the given Node, including each context's |
| 133 | // ContextTotalSize and AllocationType, for hint size reporting. |
| 134 | void collectContextInfo(CallStackTrieNode *Node, |
| 135 | std::vector<ContextSizeTypePair> &ContextInfo); |
| 136 | |
| 137 | // Recursively convert hot allocation types to notcold, since we don't |
| 138 | // actually do any cloning for hot contexts, to facilitate more aggressive |
| 139 | // pruning of contexts. |
| 140 | void convertHotToNotCold(CallStackTrieNode *Node); |
| 141 | |
| 142 | // Recursive helper to trim contexts and create metadata nodes. |
| 143 | bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, |
| 144 | std::vector<uint64_t> &MIBCallStack, |
| 145 | std::vector<Metadata *> &MIBNodes, |
| 146 | bool CalleeHasAmbiguousCallerContext, uint64_t &TotalBytes, |
| 147 | uint64_t &ColdBytes); |
| 148 | |
| 149 | public: |
| 150 | (OptimizationRemarkEmitter *ORE = nullptr, |
| 151 | uint64_t MaxColdSize = 0) |
| 152 | : ORE(ORE), MaxColdSize(MaxColdSize) {} |
| 153 | ~CallStackTrie() { deleteTrieNode(Node: Alloc); } |
| 154 | |
| 155 | bool empty() const { return Alloc == nullptr; } |
| 156 | |
| 157 | /// Add a call stack context with the given allocation type to the Trie. |
| 158 | /// The context is represented by the list of stack ids (computed during |
| 159 | /// matching via a debug location hash), expected to be in order from the |
| 160 | /// allocation call down to the bottom of the call stack (i.e. callee to |
| 161 | /// caller order). |
| 162 | LLVM_ABI void |
| 163 | addCallStack(AllocationType AllocType, ArrayRef<uint64_t> StackIds, |
| 164 | std::vector<ContextTotalSize> ContextSizeInfo = {}); |
| 165 | |
| 166 | /// Add the call stack context along with its allocation type from the MIB |
| 167 | /// metadata to the Trie. |
| 168 | LLVM_ABI void addCallStack(MDNode *MIB); |
| 169 | |
| 170 | /// Build and attach the minimal necessary MIB metadata. If the alloc has a |
| 171 | /// single allocation type, add a function attribute instead. The reason for |
| 172 | /// adding an attribute in this case is that it matches how the behavior for |
| 173 | /// allocation calls will be communicated to lib call simplification after |
| 174 | /// cloning or another optimization to distinguish the allocation types, |
| 175 | /// which is lower overhead and more direct than maintaining this metadata. |
| 176 | /// Returns true if memprof metadata attached, false if not (attribute added). |
| 177 | LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI); |
| 178 | |
| 179 | /// Add an attribute for the given allocation type to the call instruction. |
| 180 | /// If hinted by reporting is enabled, a message is emitted with the given |
| 181 | /// descriptor used to identify the category of single allocation type. |
| 182 | LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, |
| 183 | StringRef Descriptor); |
| 184 | }; |
| 185 | |
| 186 | /// Helper class to iterate through stack ids in both metadata (memprof MIB and |
| 187 | /// callsite) and the corresponding ThinLTO summary data structures |
| 188 | /// (CallsiteInfo and MIBInfo). This simplifies implementation of client code |
| 189 | /// which doesn't need to worry about whether we are operating with IR (Regular |
| 190 | /// LTO), or summary (ThinLTO). |
| 191 | template <class NodeT, class IteratorT> class CallStack { |
| 192 | public: |
| 193 | CallStack(const NodeT *N = nullptr) : N(N) {} |
| 194 | |
| 195 | // Implement minimum required methods for range-based for loop. |
| 196 | // The default implementation assumes we are operating on ThinLTO data |
| 197 | // structures, which have a vector of StackIdIndices. There are specialized |
| 198 | // versions provided to iterate through metadata. |
| 199 | struct CallStackIterator { |
| 200 | const NodeT *N = nullptr; |
| 201 | IteratorT Iter; |
| 202 | CallStackIterator(const NodeT *N, bool End); |
| 203 | uint64_t operator*(); |
| 204 | bool operator==(const CallStackIterator &rhs) { return Iter == rhs.Iter; } |
| 205 | bool operator!=(const CallStackIterator &rhs) { return !(*this == rhs); } |
| 206 | void operator++() { ++Iter; } |
| 207 | }; |
| 208 | |
| 209 | bool empty() const { return N == nullptr; } |
| 210 | |
| 211 | CallStackIterator begin() const; |
| 212 | CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); } |
| 213 | CallStackIterator beginAfterSharedPrefix(const CallStack &Other); |
| 214 | uint64_t back() const; |
| 215 | |
| 216 | private: |
| 217 | const NodeT *N = nullptr; |
| 218 | }; |
| 219 | |
| 220 | template <class NodeT, class IteratorT> |
| 221 | CallStack<NodeT, IteratorT>::CallStackIterator::CallStackIterator( |
| 222 | const NodeT *N, bool End) |
| 223 | : N(N) { |
| 224 | if (!N) { |
| 225 | Iter = nullptr; |
| 226 | return; |
| 227 | } |
| 228 | Iter = End ? N->StackIdIndices.end() : N->StackIdIndices.begin(); |
| 229 | } |
| 230 | |
| 231 | template <class NodeT, class IteratorT> |
| 232 | uint64_t CallStack<NodeT, IteratorT>::CallStackIterator::operator*() { |
| 233 | assert(Iter != N->StackIdIndices.end()); |
| 234 | return *Iter; |
| 235 | } |
| 236 | |
| 237 | template <class NodeT, class IteratorT> |
| 238 | uint64_t CallStack<NodeT, IteratorT>::back() const { |
| 239 | assert(N); |
| 240 | return N->StackIdIndices.back(); |
| 241 | } |
| 242 | |
| 243 | template <class NodeT, class IteratorT> |
| 244 | typename CallStack<NodeT, IteratorT>::CallStackIterator |
| 245 | CallStack<NodeT, IteratorT>::begin() const { |
| 246 | return CallStackIterator(N, /*End*/ false); |
| 247 | } |
| 248 | |
| 249 | template <class NodeT, class IteratorT> |
| 250 | typename CallStack<NodeT, IteratorT>::CallStackIterator |
| 251 | CallStack<NodeT, IteratorT>::beginAfterSharedPrefix(const CallStack &Other) { |
| 252 | CallStackIterator Cur = begin(); |
| 253 | for (CallStackIterator OtherCur = Other.begin(); |
| 254 | Cur != end() && OtherCur != Other.end(); ++Cur, ++OtherCur) |
| 255 | assert(*Cur == *OtherCur); |
| 256 | return Cur; |
| 257 | } |
| 258 | |
| 259 | /// Specializations for iterating through IR metadata stack contexts. |
| 260 | template <> |
| 261 | LLVM_ABI |
| 262 | CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator( |
| 263 | const MDNode *N, bool End); |
| 264 | template <> |
| 265 | LLVM_ABI uint64_t |
| 266 | CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*(); |
| 267 | template <> |
| 268 | LLVM_ABI uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const; |
| 269 | |
| 270 | } // end namespace memprof |
| 271 | } // end namespace llvm |
| 272 | |
| 273 | #endif |
| 274 | |