1//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utilities to analyze memory profile information.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/MemoryProfileInfo.h"
14#include "llvm/Analysis/OptimizationRemarkEmitter.h"
15#include "llvm/IR/Constants.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/Format.h"
19
20using namespace llvm;
21using namespace llvm::memprof;
22
23#define DEBUG_TYPE "memory-profile-info"
24
25namespace llvm {
26
27cl::opt<bool> MemProfReportHintedSizes(
28 "memprof-report-hinted-sizes", cl::init(Val: false), cl::Hidden,
29 cl::desc("Report total allocation sizes of hinted allocations"));
30
31// This is useful if we have enabled reporting of hinted sizes, and want to get
32// information from the indexing step for all contexts (especially for testing),
33// or have specified a value less than 100% for -memprof-cloning-cold-threshold.
34LLVM_ABI cl::opt<bool> MemProfKeepAllNotColdContexts(
35 "memprof-keep-all-not-cold-contexts", cl::init(Val: false), cl::Hidden,
36 cl::desc("Keep all non-cold contexts (increases cloning overheads)"));
37
38cl::opt<unsigned> MinClonedColdBytePercent(
39 "memprof-cloning-cold-threshold", cl::init(Val: 100), cl::Hidden,
40 cl::desc("Min percent of cold bytes to hint alloc cold during cloning"));
41
42// Discard non-cold contexts if they overlap with much larger cold contexts,
43// specifically, if all contexts reaching a given callsite are at least this
44// percent cold byte allocations. This reduces the amount of cloning required
45// to expose the cold contexts when they greatly dominate non-cold contexts.
46cl::opt<unsigned> MinCallsiteColdBytePercent(
47 "memprof-callsite-cold-threshold", cl::init(Val: 100), cl::Hidden,
48 cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
49 "contexts"));
50
51// Enable saving context size information for largest cold contexts, which can
52// be used to flag contexts for more aggressive cloning and reporting.
53cl::opt<unsigned> MinPercentMaxColdSize(
54 "memprof-min-percent-max-cold-size", cl::init(Val: 100), cl::Hidden,
55 cl::desc("Min percent of max cold bytes for critical cold context"));
56
57// Use this to keep the context size information in the memprof metadata for use
58// in remarks.
59cl::opt<bool> MemProfKeepContextSizeInfo(
60 "memprof-keep-context-size-info", cl::init(Val: false), cl::Hidden,
61 cl::desc("Keep context size information in memprof metadata"));
62
63LLVM_ABI cl::opt<bool> MemProfUseAmbiguousAttributes(
64 "memprof-ambiguous-attributes", cl::init(Val: true), cl::Hidden,
65 cl::desc("Apply ambiguous memprof attribute to ambiguous allocations"));
66
67} // end namespace llvm
68
69bool llvm::memprof::metadataIncludesAllContextSizeInfo() {
70 return MemProfReportHintedSizes || MemProfKeepContextSizeInfo ||
71 MinClonedColdBytePercent < 100;
72}
73
74bool llvm::memprof::metadataMayIncludeContextSizeInfo() {
75 return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100;
76}
77
78bool llvm::memprof::recordContextSizeInfoForAnalysis() {
79 return metadataMayIncludeContextSizeInfo() ||
80 MinCallsiteColdBytePercent < 100;
81}
82
83MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
84 LLVMContext &Ctx) {
85 SmallVector<Metadata *, 8> StackVals;
86 StackVals.reserve(N: CallStack.size());
87 for (auto Id : CallStack) {
88 auto *StackValMD =
89 ValueAsMetadata::get(V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Id));
90 StackVals.push_back(Elt: StackValMD);
91 }
92 return MDNode::get(Context&: Ctx, MDs: StackVals);
93}
94
95MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) {
96 assert(MIB->getNumOperands() >= 2);
97 // The stack metadata is the first operand of each memprof MIB metadata.
98 return cast<MDNode>(Val: MIB->getOperand(I: 0));
99}
100
101AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
102 assert(MIB->getNumOperands() >= 2);
103 // The allocation type is currently the second operand of each memprof
104 // MIB metadata. This will need to change as we add additional allocation
105 // types that can be applied based on the allocation profile data.
106 auto *MDS = dyn_cast<MDString>(Val: MIB->getOperand(I: 1));
107 assert(MDS);
108 if (MDS->getString() == "cold") {
109 return AllocationType::Cold;
110 } else if (MDS->getString() == "hot") {
111 return AllocationType::Hot;
112 }
113 return AllocationType::NotCold;
114}
115
116std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
117 switch (Type) {
118 case AllocationType::NotCold:
119 return "notcold";
120 break;
121 case AllocationType::Cold:
122 return "cold";
123 break;
124 case AllocationType::Hot:
125 return "hot";
126 break;
127 default:
128 assert(false && "Unexpected alloc type");
129 }
130 llvm_unreachable("invalid alloc type");
131}
132
133bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
134 const unsigned NumAllocTypes = llvm::popcount(Value: AllocTypes);
135 assert(NumAllocTypes != 0);
136 return NumAllocTypes == 1;
137}
138
139void llvm::memprof::removeAnyExistingAmbiguousAttribute(CallBase *CB) {
140 if (!CB->hasFnAttr(Kind: "memprof"))
141 return;
142 assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
143 CB->removeFnAttr(Kind: "memprof");
144}
145
146void llvm::memprof::addAmbiguousAttribute(CallBase *CB) {
147 if (!MemProfUseAmbiguousAttributes)
148 return;
149 // We may have an existing ambiguous attribute if we are reanalyzing
150 // after inlining.
151 if (CB->hasFnAttr(Kind: "memprof")) {
152 assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
153 } else {
154 auto A = llvm::Attribute::get(Context&: CB->getContext(), Kind: "memprof", Val: "ambiguous");
155 CB->addFnAttr(Attr: A);
156 }
157}
158
159void CallStackTrie::addCallStack(
160 AllocationType AllocType, ArrayRef<uint64_t> StackIds,
161 std::vector<ContextTotalSize> ContextSizeInfo) {
162 bool First = true;
163 CallStackTrieNode *Curr = nullptr;
164 for (auto StackId : StackIds) {
165 // If this is the first stack frame, add or update alloc node.
166 if (First) {
167 First = false;
168 if (Alloc) {
169 assert(AllocStackId == StackId);
170 Alloc->addAllocType(AllocType);
171 } else {
172 AllocStackId = StackId;
173 Alloc = new CallStackTrieNode(AllocType);
174 }
175 Curr = Alloc;
176 continue;
177 }
178 // Update existing caller node if it exists.
179 auto [Next, Inserted] = Curr->Callers.try_emplace(k: StackId);
180 if (!Inserted) {
181 Curr = Next->second;
182 Curr->addAllocType(AllocType);
183 continue;
184 }
185 // Otherwise add a new caller node.
186 auto *New = new CallStackTrieNode(AllocType);
187 Next->second = New;
188 Curr = New;
189 }
190 assert(Curr);
191 // Append all of the ContextSizeInfo, along with their original AllocType.
192 llvm::append_range(C&: Curr->ContextInfo,
193 R: llvm::map_range(C&: ContextSizeInfo,
194 F: [AllocType](const ContextTotalSize &CTS) {
195 return ContextSizeTypePair(CTS,
196 AllocType);
197 }));
198}
199
200void CallStackTrie::addCallStack(MDNode *MIB) {
201 // Note that we are building this from existing MD_memprof metadata.
202 BuiltFromExistingMetadata = true;
203 MDNode *StackMD = getMIBStackNode(MIB);
204 assert(StackMD);
205 std::vector<uint64_t> CallStack;
206 CallStack.reserve(n: StackMD->getNumOperands());
207 for (const auto &MIBStackIter : StackMD->operands()) {
208 auto *StackId = mdconst::dyn_extract<ConstantInt>(MD: MIBStackIter);
209 assert(StackId);
210 CallStack.push_back(x: StackId->getZExtValue());
211 }
212 std::vector<ContextTotalSize> ContextSizeInfo;
213 // Collect the context size information if it exists.
214 if (MIB->getNumOperands() > 2) {
215 for (unsigned I = 2; I < MIB->getNumOperands(); I++) {
216 MDNode *ContextSizePair = dyn_cast<MDNode>(Val: MIB->getOperand(I));
217 assert(ContextSizePair->getNumOperands() == 2);
218 uint64_t FullStackId =
219 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 0))
220 ->getZExtValue();
221 uint64_t TotalSize =
222 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 1))
223 ->getZExtValue();
224 ContextSizeInfo.push_back(x: {.FullStackId: FullStackId, .TotalSize: TotalSize});
225 }
226 }
227 addCallStack(AllocType: getMIBAllocType(MIB), StackIds: CallStack, ContextSizeInfo: std::move(ContextSizeInfo));
228}
229
230static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
231 AllocationType AllocType,
232 ArrayRef<ContextSizeTypePair> ContextInfo,
233 const uint64_t MaxColdSize,
234 bool BuiltFromExistingMetadata,
235 uint64_t &TotalBytes, uint64_t &ColdBytes) {
236 SmallVector<Metadata *> MIBPayload(
237 {buildCallstackMetadata(CallStack: MIBCallStack, Ctx)});
238 MIBPayload.push_back(
239 Elt: MDString::get(Context&: Ctx, Str: getAllocTypeAttributeString(Type: AllocType)));
240
241 if (ContextInfo.empty()) {
242 // The profile matcher should have provided context size info if there was a
243 // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
244 // handle a user-provided percent larger than 100. However, we may not have
245 // this information if we built the Trie from existing MD_memprof metadata.
246 assert(BuiltFromExistingMetadata || MinCallsiteColdBytePercent >= 100);
247 return MDNode::get(Context&: Ctx, MDs: MIBPayload);
248 }
249
250 for (const auto &[CSI, AT] : ContextInfo) {
251 const auto &[FullStackId, TotalSize] = CSI;
252 TotalBytes += TotalSize;
253 bool LargeColdContext = false;
254 if (AllocType == AllocationType::Cold) {
255 ColdBytes += TotalSize;
256 // If we have the max cold context size from summary information and have
257 // requested identification of contexts above a percentage of the max, see
258 // if this context qualifies. We should assume this is large if we rebuilt
259 // the trie from existing metadata (i.e. to update after inlining), in
260 // which case we don't have a MaxSize from the profile - we assume any
261 // context size info in existence on the metadata should be propagated.
262 if (BuiltFromExistingMetadata ||
263 (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
264 TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize))
265 LargeColdContext = true;
266 }
267 // Only add the context size info as metadata if we need it in the thin
268 // link (currently if reporting of hinted sizes is enabled, we have
269 // specified a threshold for marking allocations cold after cloning, or we
270 // have identified this as a large cold context of interest above).
271 if (metadataIncludesAllContextSizeInfo() || LargeColdContext) {
272 auto *FullStackIdMD = ValueAsMetadata::get(
273 V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: FullStackId));
274 auto *TotalSizeMD = ValueAsMetadata::get(
275 V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: TotalSize));
276 auto *ContextSizeMD = MDNode::get(Context&: Ctx, MDs: {FullStackIdMD, TotalSizeMD});
277 MIBPayload.push_back(Elt: ContextSizeMD);
278 }
279 }
280 assert(TotalBytes > 0);
281 return MDNode::get(Context&: Ctx, MDs: MIBPayload);
282}
283
284void CallStackTrie::collectContextInfo(
285 CallStackTrieNode *Node, std::vector<ContextSizeTypePair> &ContextInfo) {
286 llvm::append_range(C&: ContextInfo, R&: Node->ContextInfo);
287 for (auto &Caller : Node->Callers)
288 collectContextInfo(Node: Caller.second, ContextInfo);
289}
290
291void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
292 if (Node->hasAllocType(AllocType: AllocationType::Hot)) {
293 Node->removeAllocType(AllocType: AllocationType::Hot);
294 Node->addAllocType(AllocType: AllocationType::NotCold);
295 }
296 for (auto &Caller : Node->Callers)
297 convertHotToNotCold(Node: Caller.second);
298}
299
300// Helper to emit messages for non-cold contexts that are ignored for various
301// reasons when reporting of hinted bytes is enabled.
302static void emitIgnoredNonColdContextMessage(StringRef Tag,
303 uint64_t FullStackId,
304 StringRef Extra,
305 uint64_t TotalSize) {
306 errs() << "MemProf hinting: Total size for " << Tag
307 << " non-cold full allocation context hash " << FullStackId << Extra
308 << ": " << TotalSize << "\n";
309}
310
311// Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
312// on options that enable filtering out some NotCold contexts.
313static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
314 std::vector<Metadata *> &SavedMIBNodes,
315 unsigned CallerContextLength,
316 uint64_t TotalBytes, uint64_t ColdBytes,
317 bool BuiltFromExistingMetadata) {
318 const bool MostlyCold =
319 // If we have built the Trie from existing MD_memprof metadata, we may or
320 // may not have context size information (in which case ColdBytes and
321 // TotalBytes are 0, which is not also guarded against below). Even if we
322 // do have some context size information from the the metadata, we have
323 // already gone through a round of discarding of small non-cold contexts
324 // during matching, and it would be overly aggressive to do it again, and
325 // we also want to maintain the same behavior with and without reporting
326 // of hinted bytes enabled.
327 !BuiltFromExistingMetadata && MinCallsiteColdBytePercent < 100 &&
328 ColdBytes > 0 &&
329 ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
330
331 // In the simplest case, with pruning disabled, keep all the new MIB nodes.
332 if (MemProfKeepAllNotColdContexts && !MostlyCold) {
333 append_range(C&: SavedMIBNodes, R&: NewMIBNodes);
334 return;
335 }
336
337 auto EmitMessageForRemovedContexts = [](const MDNode *MIBMD, StringRef Tag,
338 StringRef Extra) {
339 assert(MIBMD->getNumOperands() > 2);
340 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
341 MDNode *ContextSizePair = dyn_cast<MDNode>(Val: MIBMD->getOperand(I));
342 assert(ContextSizePair->getNumOperands() == 2);
343 uint64_t FullStackId =
344 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 0))
345 ->getZExtValue();
346 uint64_t TS =
347 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 1))
348 ->getZExtValue();
349 emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TotalSize: TS);
350 }
351 };
352
353 // If the cold bytes at the current callsite exceed the given threshold, we
354 // discard all non-cold contexts so do not need any of the later pruning
355 // handling. We can simply copy over all the cold contexts and return early.
356 if (MostlyCold) {
357 auto NewColdMIBNodes =
358 make_filter_range(Range&: NewMIBNodes, Pred: [&](const Metadata *M) {
359 auto MIBMD = cast<MDNode>(Val: M);
360 // Only append cold contexts.
361 if (getMIBAllocType(MIB: MIBMD) == AllocationType::Cold)
362 return true;
363 if (MemProfReportHintedSizes) {
364 const float PercentCold = ColdBytes * 100.0 / TotalBytes;
365 std::string PercentStr;
366 llvm::raw_string_ostream OS(PercentStr);
367 OS << format(Fmt: " for %5.2f%% cold bytes", Vals: PercentCold);
368 EmitMessageForRemovedContexts(MIBMD, "discarded", OS.str());
369 }
370 return false;
371 });
372 for (auto *M : NewColdMIBNodes)
373 SavedMIBNodes.push_back(x: M);
374 return;
375 }
376
377 // Prune unneeded NotCold contexts, taking advantage of the fact
378 // that we later will only clone Cold contexts, as NotCold is the allocation
379 // default. We only need to keep as metadata the NotCold contexts that
380 // overlap the longest with Cold allocations, so that we know how deeply we
381 // need to clone. For example, assume we add the following contexts to the
382 // trie:
383 // 1 3 (notcold)
384 // 1 2 4 (cold)
385 // 1 2 5 (notcold)
386 // 1 2 6 (notcold)
387 // the trie looks like:
388 // 1
389 // / \
390 // 2 3
391 // /|\
392 // 4 5 6
393 //
394 // It is sufficient to prune all but one not-cold contexts (either 1,2,5 or
395 // 1,2,6, we arbitrarily keep the first one we encounter which will be
396 // 1,2,5).
397 //
398 // To do this pruning, we first check if there were any not-cold
399 // contexts kept for a deeper caller, which will have a context length larger
400 // than the CallerContextLength being handled here (i.e. kept by a deeper
401 // recursion step). If so, none of the not-cold MIB nodes added for the
402 // immediate callers need to be kept. If not, we keep the first (created
403 // for the immediate caller) not-cold MIB node.
404 bool LongerNotColdContextKept = false;
405 for (auto *MIB : NewMIBNodes) {
406 auto MIBMD = cast<MDNode>(Val: MIB);
407 if (getMIBAllocType(MIB: MIBMD) == AllocationType::Cold)
408 continue;
409 MDNode *StackMD = getMIBStackNode(MIB: MIBMD);
410 assert(StackMD);
411 if (StackMD->getNumOperands() > CallerContextLength) {
412 LongerNotColdContextKept = true;
413 break;
414 }
415 }
416 // Don't need to emit any for the immediate caller if we already have
417 // longer overlapping contexts;
418 bool KeepFirstNewNotCold = !LongerNotColdContextKept;
419 auto NewColdMIBNodes = make_filter_range(Range&: NewMIBNodes, Pred: [&](const Metadata *M) {
420 auto MIBMD = cast<MDNode>(Val: M);
421 // Only keep cold contexts and first (longest non-cold context).
422 if (getMIBAllocType(MIB: MIBMD) != AllocationType::Cold) {
423 MDNode *StackMD = getMIBStackNode(MIB: MIBMD);
424 assert(StackMD);
425 // Keep any already kept for longer contexts.
426 if (StackMD->getNumOperands() > CallerContextLength)
427 return true;
428 // Otherwise keep the first one added by the immediate caller if there
429 // were no longer contexts.
430 if (KeepFirstNewNotCold) {
431 KeepFirstNewNotCold = false;
432 return true;
433 }
434 if (MemProfReportHintedSizes)
435 EmitMessageForRemovedContexts(MIBMD, "pruned", "");
436 return false;
437 }
438 return true;
439 });
440 for (auto *M : NewColdMIBNodes)
441 SavedMIBNodes.push_back(x: M);
442}
443
444// Recursive helper to trim contexts and create metadata nodes.
445// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
446// caller makes it simpler to handle the many early returns in this method.
447// Updates the total and cold profiled bytes in the subtrie rooted at this node.
448bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
449 std::vector<uint64_t> &MIBCallStack,
450 std::vector<Metadata *> &MIBNodes,
451 bool CalleeHasAmbiguousCallerContext,
452 uint64_t &TotalBytes, uint64_t &ColdBytes) {
453 // Trim context below the first node in a prefix with a single alloc type.
454 // Add an MIB record for the current call stack prefix.
455 if (hasSingleAllocType(AllocTypes: Node->AllocTypes)) {
456 std::vector<ContextSizeTypePair> ContextInfo;
457 collectContextInfo(Node, ContextInfo);
458 MIBNodes.push_back(x: createMIBNode(
459 Ctx, MIBCallStack, AllocType: (AllocationType)Node->AllocTypes, ContextInfo,
460 MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
461 return true;
462 }
463
464 // We don't have a single allocation for all the contexts sharing this prefix,
465 // so recursively descend into callers in trie.
466 if (!Node->Callers.empty()) {
467 bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1;
468 bool AddedMIBNodesForAllCallerContexts = true;
469 // Accumulate all new MIB nodes by the recursive calls below into a vector
470 // that will later be filtered before adding to the caller's MIBNodes
471 // vector.
472 std::vector<Metadata *> NewMIBNodes;
473 // Determine the total and cold byte counts for all callers, then add to the
474 // caller's counts further below.
475 uint64_t CallerTotalBytes = 0;
476 uint64_t CallerColdBytes = 0;
477 for (auto &Caller : Node->Callers) {
478 MIBCallStack.push_back(x: Caller.first);
479 AddedMIBNodesForAllCallerContexts &= buildMIBNodes(
480 Node: Caller.second, Ctx, MIBCallStack, MIBNodes&: NewMIBNodes,
481 CalleeHasAmbiguousCallerContext: NodeHasAmbiguousCallerContext, TotalBytes&: CallerTotalBytes, ColdBytes&: CallerColdBytes);
482 // Remove Caller.
483 MIBCallStack.pop_back();
484 }
485 // Pass in the stack length of the MIB nodes added for the immediate caller,
486 // which is the current stack length plus 1.
487 saveFilteredNewMIBNodes(NewMIBNodes, SavedMIBNodes&: MIBNodes, CallerContextLength: MIBCallStack.size() + 1,
488 TotalBytes: CallerTotalBytes, ColdBytes: CallerColdBytes,
489 BuiltFromExistingMetadata);
490 TotalBytes += CallerTotalBytes;
491 ColdBytes += CallerColdBytes;
492
493 if (AddedMIBNodesForAllCallerContexts)
494 return true;
495 // We expect that the callers should be forced to add MIBs to disambiguate
496 // the context in this case (see below).
497 assert(!NodeHasAmbiguousCallerContext);
498 }
499
500 // If we reached here, then this node does not have a single allocation type,
501 // and we didn't add metadata for a longer call stack prefix including any of
502 // Node's callers. That means we never hit a single allocation type along all
503 // call stacks with this prefix. This can happen due to recursion collapsing
504 // or the stack being deeper than tracked by the profiler runtime, leading to
505 // contexts with different allocation types being merged. In that case, we
506 // trim the context just below the deepest context split, which is this
507 // node if the callee has an ambiguous caller context (multiple callers),
508 // since the recursive calls above returned false. Conservatively give it
509 // non-cold allocation type.
510 if (!CalleeHasAmbiguousCallerContext)
511 return false;
512 std::vector<ContextSizeTypePair> ContextInfo;
513 collectContextInfo(Node, ContextInfo);
514 MIBNodes.push_back(x: createMIBNode(
515 Ctx, MIBCallStack, AllocType: AllocationType::NotCold, ContextInfo, MaxColdSize,
516 BuiltFromExistingMetadata, TotalBytes, ColdBytes));
517 return true;
518}
519
520void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
521 StringRef Descriptor) {
522 auto AllocTypeString = getAllocTypeAttributeString(Type: AT);
523 auto A = llvm::Attribute::get(Context&: CI->getContext(), Kind: "memprof", Val: AllocTypeString);
524 // After inlining we may be able to convert an existing ambiguous allocation
525 // to an unambiguous one.
526 removeAnyExistingAmbiguousAttribute(CB: CI);
527 CI->addFnAttr(Attr: A);
528
529 std::vector<ContextSizeTypePair> ContextInfo;
530 collectContextInfo(Node: Alloc, ContextInfo);
531
532 // If we don't have context size info, just emit a single remark for this
533 // allocation.
534 if (ContextInfo.empty()) {
535 if (ORE)
536 ORE->emit(OptDiag: OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CI)
537 << ore::NV("AllocationCall", CI) << " in function "
538 << ore::NV("Caller", CI->getFunction())
539 << " marked with memprof allocation attribute "
540 << ore::NV("Attribute", AllocTypeString));
541 return;
542 }
543
544 // Emit remarks or stderr reporting if requested.
545 for (const auto &[CSI, OrigAT] : ContextInfo) {
546 const auto &[FullStackId, TotalSize] = CSI;
547 // If the original alloc type is not the one being applied as the hint,
548 // then don't report that it was hinted. Optionally report that we ignored
549 // this context.
550 if (AT != OrigAT) {
551 if (MemProfReportHintedSizes)
552 emitIgnoredNonColdContextMessage(Tag: "ignored", FullStackId, Extra: "", TotalSize);
553 continue;
554 }
555 if (MemProfReportHintedSizes)
556 errs() << "MemProf hinting: Total size for full allocation context hash "
557 << FullStackId << " and " << Descriptor << " alloc type "
558 << getAllocTypeAttributeString(Type: AT) << ": " << TotalSize << "\n";
559 if (ORE)
560 ORE->emit(OptDiag: OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CI)
561 << ore::NV("AllocationCall", CI) << " in function "
562 << ore::NV("Caller", CI->getFunction())
563 << " marked with memprof allocation attribute "
564 << ore::NV("Attribute", AllocTypeString)
565 << " for full allocation context hash "
566 << ore::NV("FullStackId", FullStackId) << " with total size "
567 << ore::NV("TotalSize", TotalSize));
568 }
569}
570
571// Build and attach the minimal necessary MIB metadata. If the alloc has a
572// single allocation type, add a function attribute instead. Returns true if
573// memprof metadata attached, false if not (attribute added).
574bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
575 if (hasSingleAllocType(AllocTypes: Alloc->AllocTypes)) {
576 addSingleAllocTypeAttribute(CI, AT: (AllocationType)Alloc->AllocTypes,
577 Descriptor: "single");
578 return false;
579 }
580 // If there were any hot allocation contexts, the Alloc trie node would have
581 // the Hot type set. If so, because we don't currently support cloning for hot
582 // contexts, they should be converted to NotCold. This happens in the cloning
583 // support anyway, however, doing this now enables more aggressive context
584 // trimming when building the MIB metadata (and possibly may make the
585 // allocation have a single NotCold allocation type), greatly reducing
586 // overheads in bitcode, cloning memory and cloning time.
587 if (Alloc->hasAllocType(AllocType: AllocationType::Hot)) {
588 convertHotToNotCold(Node: Alloc);
589 // Check whether we now have a single alloc type.
590 if (hasSingleAllocType(AllocTypes: Alloc->AllocTypes)) {
591 addSingleAllocTypeAttribute(CI, AT: (AllocationType)Alloc->AllocTypes,
592 Descriptor: "single");
593 return false;
594 }
595 }
596 auto &Ctx = CI->getContext();
597 std::vector<uint64_t> MIBCallStack;
598 MIBCallStack.push_back(x: AllocStackId);
599 std::vector<Metadata *> MIBNodes;
600 uint64_t TotalBytes = 0;
601 uint64_t ColdBytes = 0;
602 assert(!Alloc->Callers.empty() && "addCallStack has not been called yet");
603 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
604 // callee of the given node has more than one caller. Here the node being
605 // passed in is the alloc and it has no callees. So it's false.
606 if (buildMIBNodes(Node: Alloc, Ctx, MIBCallStack, MIBNodes,
607 /*CalleeHasAmbiguousCallerContext=*/false, TotalBytes,
608 ColdBytes)) {
609 assert(MIBCallStack.size() == 1 &&
610 "Should only be left with Alloc's location in stack");
611 CI->setMetadata(KindID: LLVMContext::MD_memprof, Node: MDNode::get(Context&: Ctx, MDs: MIBNodes));
612 addAmbiguousAttribute(CB: CI);
613 return true;
614 }
615 // If there exists corner case that CallStackTrie has one chain to leaf
616 // and all node in the chain have multi alloc type, conservatively give
617 // it non-cold allocation type.
618 // FIXME: Avoid this case before memory profile created. Alternatively, select
619 // hint based on fraction cold.
620 addSingleAllocTypeAttribute(CI, AT: AllocationType::NotCold, Descriptor: "indistinguishable");
621 return false;
622}
623
624template <>
625CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
626 const MDNode *N, bool End)
627 : N(N) {
628 if (!N)
629 return;
630 Iter = End ? N->op_end() : N->op_begin();
631}
632
633template <>
634uint64_t
635CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*() {
636 assert(Iter != N->op_end());
637 ConstantInt *StackIdCInt = mdconst::dyn_extract<ConstantInt>(MD: *Iter);
638 assert(StackIdCInt);
639 return StackIdCInt->getZExtValue();
640}
641
642template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
643 assert(N);
644 return mdconst::dyn_extract<ConstantInt>(MD: N->operands().back())
645 ->getZExtValue();
646}
647
648MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
649 // TODO: Support more sophisticated merging, such as selecting the one with
650 // more bytes allocated, or implement support for carrying multiple allocation
651 // leaf contexts. For now, keep the first one.
652 if (A)
653 return A;
654 return B;
655}
656
657MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
658 // TODO: Support more sophisticated merging, which will require support for
659 // carrying multiple contexts. For now, keep the first one.
660 if (A)
661 return A;
662 return B;
663}
664