1//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains utilities to analyze memory profile information.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/MemoryProfileInfo.h"
14#include "llvm/Analysis/OptimizationRemarkEmitter.h"
15#include "llvm/IR/Constants.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/Format.h"
19
20using namespace llvm;
21using namespace llvm::memprof;
22
23#define DEBUG_TYPE "memory-profile-info"
24
25namespace llvm {
26
27cl::opt<bool> MemProfReportHintedSizes(
28 "memprof-report-hinted-sizes", cl::init(Val: false), cl::Hidden,
29 cl::desc("Report total allocation sizes of hinted allocations"));
30
31// This is useful if we have enabled reporting of hinted sizes, and want to get
32// information from the indexing step for all contexts (especially for testing),
33// or have specified a value less than 100% for -memprof-cloning-cold-threshold.
34LLVM_ABI cl::opt<bool> MemProfKeepAllNotColdContexts(
35 "memprof-keep-all-not-cold-contexts", cl::init(Val: false), cl::Hidden,
36 cl::desc("Keep all non-cold contexts (increases cloning overheads)"));
37
38cl::opt<unsigned> MinClonedColdBytePercent(
39 "memprof-cloning-cold-threshold", cl::init(Val: 100), cl::Hidden,
40 cl::desc("Min percent of cold bytes to hint alloc cold during cloning"));
41
42// Discard non-cold contexts if they overlap with much larger cold contexts,
43// specifically, if all contexts reaching a given callsite are at least this
44// percent cold byte allocations. This reduces the amount of cloning required
45// to expose the cold contexts when they greatly dominate non-cold contexts.
46cl::opt<unsigned> MinCallsiteColdBytePercent(
47 "memprof-callsite-cold-threshold", cl::init(Val: 100), cl::Hidden,
48 cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
49 "contexts"));
50
51// Enable saving context size information for largest cold contexts, which can
52// be used to flag contexts for more aggressive cloning and reporting.
53cl::opt<unsigned> MinPercentMaxColdSize(
54 "memprof-min-percent-max-cold-size", cl::init(Val: 100), cl::Hidden,
55 cl::desc("Min percent of max cold bytes for critical cold context"));
56
57LLVM_ABI cl::opt<bool> MemProfUseAmbiguousAttributes(
58 "memprof-ambiguous-attributes", cl::init(Val: true), cl::Hidden,
59 cl::desc("Apply ambiguous memprof attribute to ambiguous allocations"));
60
61} // end namespace llvm
62
63bool llvm::memprof::metadataIncludesAllContextSizeInfo() {
64 return MemProfReportHintedSizes || MinClonedColdBytePercent < 100;
65}
66
67bool llvm::memprof::metadataMayIncludeContextSizeInfo() {
68 return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100;
69}
70
71bool llvm::memprof::recordContextSizeInfoForAnalysis() {
72 return metadataMayIncludeContextSizeInfo() ||
73 MinCallsiteColdBytePercent < 100;
74}
75
76MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
77 LLVMContext &Ctx) {
78 SmallVector<Metadata *, 8> StackVals;
79 StackVals.reserve(N: CallStack.size());
80 for (auto Id : CallStack) {
81 auto *StackValMD =
82 ValueAsMetadata::get(V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Id));
83 StackVals.push_back(Elt: StackValMD);
84 }
85 return MDNode::get(Context&: Ctx, MDs: StackVals);
86}
87
88MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) {
89 assert(MIB->getNumOperands() >= 2);
90 // The stack metadata is the first operand of each memprof MIB metadata.
91 return cast<MDNode>(Val: MIB->getOperand(I: 0));
92}
93
94AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
95 assert(MIB->getNumOperands() >= 2);
96 // The allocation type is currently the second operand of each memprof
97 // MIB metadata. This will need to change as we add additional allocation
98 // types that can be applied based on the allocation profile data.
99 auto *MDS = dyn_cast<MDString>(Val: MIB->getOperand(I: 1));
100 assert(MDS);
101 if (MDS->getString() == "cold") {
102 return AllocationType::Cold;
103 } else if (MDS->getString() == "hot") {
104 return AllocationType::Hot;
105 }
106 return AllocationType::NotCold;
107}
108
109std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
110 switch (Type) {
111 case AllocationType::NotCold:
112 return "notcold";
113 break;
114 case AllocationType::Cold:
115 return "cold";
116 break;
117 case AllocationType::Hot:
118 return "hot";
119 break;
120 default:
121 assert(false && "Unexpected alloc type");
122 }
123 llvm_unreachable("invalid alloc type");
124}
125
126bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
127 const unsigned NumAllocTypes = llvm::popcount(Value: AllocTypes);
128 assert(NumAllocTypes != 0);
129 return NumAllocTypes == 1;
130}
131
132void llvm::memprof::removeAnyExistingAmbiguousAttribute(CallBase *CB) {
133 if (!CB->hasFnAttr(Kind: "memprof"))
134 return;
135 assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
136 CB->removeFnAttr(Kind: "memprof");
137}
138
139void llvm::memprof::addAmbiguousAttribute(CallBase *CB) {
140 if (!MemProfUseAmbiguousAttributes)
141 return;
142 // We may have an existing ambiguous attribute if we are reanalyzing
143 // after inlining.
144 if (CB->hasFnAttr(Kind: "memprof")) {
145 assert(CB->getFnAttr("memprof").getValueAsString() == "ambiguous");
146 } else {
147 auto A = llvm::Attribute::get(Context&: CB->getContext(), Kind: "memprof", Val: "ambiguous");
148 CB->addFnAttr(Attr: A);
149 }
150}
151
152void CallStackTrie::addCallStack(
153 AllocationType AllocType, ArrayRef<uint64_t> StackIds,
154 std::vector<ContextTotalSize> ContextSizeInfo) {
155 bool First = true;
156 CallStackTrieNode *Curr = nullptr;
157 for (auto StackId : StackIds) {
158 // If this is the first stack frame, add or update alloc node.
159 if (First) {
160 First = false;
161 if (Alloc) {
162 assert(AllocStackId == StackId);
163 Alloc->addAllocType(AllocType);
164 } else {
165 AllocStackId = StackId;
166 Alloc = new CallStackTrieNode(AllocType);
167 }
168 Curr = Alloc;
169 continue;
170 }
171 // Update existing caller node if it exists.
172 auto [Next, Inserted] = Curr->Callers.try_emplace(k: StackId);
173 if (!Inserted) {
174 Curr = Next->second;
175 Curr->addAllocType(AllocType);
176 continue;
177 }
178 // Otherwise add a new caller node.
179 auto *New = new CallStackTrieNode(AllocType);
180 Next->second = New;
181 Curr = New;
182 }
183 assert(Curr);
184 // Append all of the ContextSizeInfo, along with their original AllocType.
185 llvm::append_range(C&: Curr->ContextInfo,
186 R: llvm::map_range(C&: ContextSizeInfo,
187 F: [AllocType](const ContextTotalSize &CTS) {
188 return ContextSizeTypePair(CTS,
189 AllocType);
190 }));
191}
192
193void CallStackTrie::addCallStack(MDNode *MIB) {
194 // Note that we are building this from existing MD_memprof metadata.
195 BuiltFromExistingMetadata = true;
196 MDNode *StackMD = getMIBStackNode(MIB);
197 assert(StackMD);
198 std::vector<uint64_t> CallStack;
199 CallStack.reserve(n: StackMD->getNumOperands());
200 for (const auto &MIBStackIter : StackMD->operands()) {
201 auto *StackId = mdconst::dyn_extract<ConstantInt>(MD: MIBStackIter);
202 assert(StackId);
203 CallStack.push_back(x: StackId->getZExtValue());
204 }
205 std::vector<ContextTotalSize> ContextSizeInfo;
206 // Collect the context size information if it exists.
207 if (MIB->getNumOperands() > 2) {
208 for (unsigned I = 2; I < MIB->getNumOperands(); I++) {
209 MDNode *ContextSizePair = dyn_cast<MDNode>(Val: MIB->getOperand(I));
210 assert(ContextSizePair->getNumOperands() == 2);
211 uint64_t FullStackId =
212 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 0))
213 ->getZExtValue();
214 uint64_t TotalSize =
215 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 1))
216 ->getZExtValue();
217 ContextSizeInfo.push_back(x: {.FullStackId: FullStackId, .TotalSize: TotalSize});
218 }
219 }
220 addCallStack(AllocType: getMIBAllocType(MIB), StackIds: CallStack, ContextSizeInfo: std::move(ContextSizeInfo));
221}
222
223static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
224 AllocationType AllocType,
225 ArrayRef<ContextSizeTypePair> ContextInfo,
226 const uint64_t MaxColdSize,
227 bool BuiltFromExistingMetadata,
228 uint64_t &TotalBytes, uint64_t &ColdBytes) {
229 SmallVector<Metadata *> MIBPayload(
230 {buildCallstackMetadata(CallStack: MIBCallStack, Ctx)});
231 MIBPayload.push_back(
232 Elt: MDString::get(Context&: Ctx, Str: getAllocTypeAttributeString(Type: AllocType)));
233
234 if (ContextInfo.empty()) {
235 // The profile matcher should have provided context size info if there was a
236 // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
237 // handle a user-provided percent larger than 100. However, we may not have
238 // this information if we built the Trie from existing MD_memprof metadata.
239 assert(BuiltFromExistingMetadata || MinCallsiteColdBytePercent >= 100);
240 return MDNode::get(Context&: Ctx, MDs: MIBPayload);
241 }
242
243 for (const auto &[CSI, AT] : ContextInfo) {
244 const auto &[FullStackId, TotalSize] = CSI;
245 TotalBytes += TotalSize;
246 bool LargeColdContext = false;
247 if (AllocType == AllocationType::Cold) {
248 ColdBytes += TotalSize;
249 // If we have the max cold context size from summary information and have
250 // requested identification of contexts above a percentage of the max, see
251 // if this context qualifies. We should assume this is large if we rebuilt
252 // the trie from existing metadata (i.e. to update after inlining), in
253 // which case we don't have a MaxSize from the profile - we assume any
254 // context size info in existence on the metadata should be propagated.
255 if (BuiltFromExistingMetadata ||
256 (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
257 TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize))
258 LargeColdContext = true;
259 }
260 // Only add the context size info as metadata if we need it in the thin
261 // link (currently if reporting of hinted sizes is enabled, we have
262 // specified a threshold for marking allocations cold after cloning, or we
263 // have identified this as a large cold context of interest above).
264 if (metadataIncludesAllContextSizeInfo() || LargeColdContext) {
265 auto *FullStackIdMD = ValueAsMetadata::get(
266 V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: FullStackId));
267 auto *TotalSizeMD = ValueAsMetadata::get(
268 V: ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: TotalSize));
269 auto *ContextSizeMD = MDNode::get(Context&: Ctx, MDs: {FullStackIdMD, TotalSizeMD});
270 MIBPayload.push_back(Elt: ContextSizeMD);
271 }
272 }
273 assert(TotalBytes > 0);
274 return MDNode::get(Context&: Ctx, MDs: MIBPayload);
275}
276
277void CallStackTrie::collectContextInfo(
278 CallStackTrieNode *Node, std::vector<ContextSizeTypePair> &ContextInfo) {
279 llvm::append_range(C&: ContextInfo, R&: Node->ContextInfo);
280 for (auto &Caller : Node->Callers)
281 collectContextInfo(Node: Caller.second, ContextInfo);
282}
283
284void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
285 if (Node->hasAllocType(AllocType: AllocationType::Hot)) {
286 Node->removeAllocType(AllocType: AllocationType::Hot);
287 Node->addAllocType(AllocType: AllocationType::NotCold);
288 }
289 for (auto &Caller : Node->Callers)
290 convertHotToNotCold(Node: Caller.second);
291}
292
293// Helper to emit messages for non-cold contexts that are ignored for various
294// reasons when reporting of hinted bytes is enabled.
295static void emitIgnoredNonColdContextMessage(StringRef Tag,
296 uint64_t FullStackId,
297 StringRef Extra,
298 uint64_t TotalSize) {
299 errs() << "MemProf hinting: Total size for " << Tag
300 << " non-cold full allocation context hash " << FullStackId << Extra
301 << ": " << TotalSize << "\n";
302}
303
304// Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
305// on options that enable filtering out some NotCold contexts.
306static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
307 std::vector<Metadata *> &SavedMIBNodes,
308 unsigned CallerContextLength,
309 uint64_t TotalBytes, uint64_t ColdBytes,
310 bool BuiltFromExistingMetadata) {
311 const bool MostlyCold =
312 // If we have built the Trie from existing MD_memprof metadata, we may or
313 // may not have context size information (in which case ColdBytes and
314 // TotalBytes are 0, which is not also guarded against below). Even if we
315 // do have some context size information from the the metadata, we have
316 // already gone through a round of discarding of small non-cold contexts
317 // during matching, and it would be overly aggressive to do it again, and
318 // we also want to maintain the same behavior with and without reporting
319 // of hinted bytes enabled.
320 !BuiltFromExistingMetadata && MinCallsiteColdBytePercent < 100 &&
321 ColdBytes > 0 &&
322 ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
323
324 // In the simplest case, with pruning disabled, keep all the new MIB nodes.
325 if (MemProfKeepAllNotColdContexts && !MostlyCold) {
326 append_range(C&: SavedMIBNodes, R&: NewMIBNodes);
327 return;
328 }
329
330 auto EmitMessageForRemovedContexts = [](const MDNode *MIBMD, StringRef Tag,
331 StringRef Extra) {
332 assert(MIBMD->getNumOperands() > 2);
333 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
334 MDNode *ContextSizePair = dyn_cast<MDNode>(Val: MIBMD->getOperand(I));
335 assert(ContextSizePair->getNumOperands() == 2);
336 uint64_t FullStackId =
337 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 0))
338 ->getZExtValue();
339 uint64_t TS =
340 mdconst::dyn_extract<ConstantInt>(MD: ContextSizePair->getOperand(I: 1))
341 ->getZExtValue();
342 emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TotalSize: TS);
343 }
344 };
345
346 // If the cold bytes at the current callsite exceed the given threshold, we
347 // discard all non-cold contexts so do not need any of the later pruning
348 // handling. We can simply copy over all the cold contexts and return early.
349 if (MostlyCold) {
350 auto NewColdMIBNodes =
351 make_filter_range(Range&: NewMIBNodes, Pred: [&](const Metadata *M) {
352 auto MIBMD = cast<MDNode>(Val: M);
353 // Only append cold contexts.
354 if (getMIBAllocType(MIB: MIBMD) == AllocationType::Cold)
355 return true;
356 if (MemProfReportHintedSizes) {
357 const float PercentCold = ColdBytes * 100.0 / TotalBytes;
358 std::string PercentStr;
359 llvm::raw_string_ostream OS(PercentStr);
360 OS << format(Fmt: " for %5.2f%% cold bytes", Vals: PercentCold);
361 EmitMessageForRemovedContexts(MIBMD, "discarded", OS.str());
362 }
363 return false;
364 });
365 for (auto *M : NewColdMIBNodes)
366 SavedMIBNodes.push_back(x: M);
367 return;
368 }
369
370 // Prune unneeded NotCold contexts, taking advantage of the fact
371 // that we later will only clone Cold contexts, as NotCold is the allocation
372 // default. We only need to keep as metadata the NotCold contexts that
373 // overlap the longest with Cold allocations, so that we know how deeply we
374 // need to clone. For example, assume we add the following contexts to the
375 // trie:
376 // 1 3 (notcold)
377 // 1 2 4 (cold)
378 // 1 2 5 (notcold)
379 // 1 2 6 (notcold)
380 // the trie looks like:
381 // 1
382 // / \
383 // 2 3
384 // /|\
385 // 4 5 6
386 //
387 // It is sufficient to prune all but one not-cold contexts (either 1,2,5 or
388 // 1,2,6, we arbitrarily keep the first one we encounter which will be
389 // 1,2,5).
390 //
391 // To do this pruning, we first check if there were any not-cold
392 // contexts kept for a deeper caller, which will have a context length larger
393 // than the CallerContextLength being handled here (i.e. kept by a deeper
394 // recursion step). If so, none of the not-cold MIB nodes added for the
395 // immediate callers need to be kept. If not, we keep the first (created
396 // for the immediate caller) not-cold MIB node.
397 bool LongerNotColdContextKept = false;
398 for (auto *MIB : NewMIBNodes) {
399 auto MIBMD = cast<MDNode>(Val: MIB);
400 if (getMIBAllocType(MIB: MIBMD) == AllocationType::Cold)
401 continue;
402 MDNode *StackMD = getMIBStackNode(MIB: MIBMD);
403 assert(StackMD);
404 if (StackMD->getNumOperands() > CallerContextLength) {
405 LongerNotColdContextKept = true;
406 break;
407 }
408 }
409 // Don't need to emit any for the immediate caller if we already have
410 // longer overlapping contexts;
411 bool KeepFirstNewNotCold = !LongerNotColdContextKept;
412 auto NewColdMIBNodes = make_filter_range(Range&: NewMIBNodes, Pred: [&](const Metadata *M) {
413 auto MIBMD = cast<MDNode>(Val: M);
414 // Only keep cold contexts and first (longest non-cold context).
415 if (getMIBAllocType(MIB: MIBMD) != AllocationType::Cold) {
416 MDNode *StackMD = getMIBStackNode(MIB: MIBMD);
417 assert(StackMD);
418 // Keep any already kept for longer contexts.
419 if (StackMD->getNumOperands() > CallerContextLength)
420 return true;
421 // Otherwise keep the first one added by the immediate caller if there
422 // were no longer contexts.
423 if (KeepFirstNewNotCold) {
424 KeepFirstNewNotCold = false;
425 return true;
426 }
427 if (MemProfReportHintedSizes)
428 EmitMessageForRemovedContexts(MIBMD, "pruned", "");
429 return false;
430 }
431 return true;
432 });
433 for (auto *M : NewColdMIBNodes)
434 SavedMIBNodes.push_back(x: M);
435}
436
437// Recursive helper to trim contexts and create metadata nodes.
438// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
439// caller makes it simpler to handle the many early returns in this method.
440// Updates the total and cold profiled bytes in the subtrie rooted at this node.
441bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
442 std::vector<uint64_t> &MIBCallStack,
443 std::vector<Metadata *> &MIBNodes,
444 bool CalleeHasAmbiguousCallerContext,
445 uint64_t &TotalBytes, uint64_t &ColdBytes) {
446 // Trim context below the first node in a prefix with a single alloc type.
447 // Add an MIB record for the current call stack prefix.
448 if (hasSingleAllocType(AllocTypes: Node->AllocTypes)) {
449 std::vector<ContextSizeTypePair> ContextInfo;
450 collectContextInfo(Node, ContextInfo);
451 MIBNodes.push_back(x: createMIBNode(
452 Ctx, MIBCallStack, AllocType: (AllocationType)Node->AllocTypes, ContextInfo,
453 MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
454 return true;
455 }
456
457 // We don't have a single allocation for all the contexts sharing this prefix,
458 // so recursively descend into callers in trie.
459 if (!Node->Callers.empty()) {
460 bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1;
461 bool AddedMIBNodesForAllCallerContexts = true;
462 // Accumulate all new MIB nodes by the recursive calls below into a vector
463 // that will later be filtered before adding to the caller's MIBNodes
464 // vector.
465 std::vector<Metadata *> NewMIBNodes;
466 // Determine the total and cold byte counts for all callers, then add to the
467 // caller's counts further below.
468 uint64_t CallerTotalBytes = 0;
469 uint64_t CallerColdBytes = 0;
470 for (auto &Caller : Node->Callers) {
471 MIBCallStack.push_back(x: Caller.first);
472 AddedMIBNodesForAllCallerContexts &= buildMIBNodes(
473 Node: Caller.second, Ctx, MIBCallStack, MIBNodes&: NewMIBNodes,
474 CalleeHasAmbiguousCallerContext: NodeHasAmbiguousCallerContext, TotalBytes&: CallerTotalBytes, ColdBytes&: CallerColdBytes);
475 // Remove Caller.
476 MIBCallStack.pop_back();
477 }
478 // Pass in the stack length of the MIB nodes added for the immediate caller,
479 // which is the current stack length plus 1.
480 saveFilteredNewMIBNodes(NewMIBNodes, SavedMIBNodes&: MIBNodes, CallerContextLength: MIBCallStack.size() + 1,
481 TotalBytes: CallerTotalBytes, ColdBytes: CallerColdBytes,
482 BuiltFromExistingMetadata);
483 TotalBytes += CallerTotalBytes;
484 ColdBytes += CallerColdBytes;
485
486 if (AddedMIBNodesForAllCallerContexts)
487 return true;
488 // We expect that the callers should be forced to add MIBs to disambiguate
489 // the context in this case (see below).
490 assert(!NodeHasAmbiguousCallerContext);
491 }
492
493 // If we reached here, then this node does not have a single allocation type,
494 // and we didn't add metadata for a longer call stack prefix including any of
495 // Node's callers. That means we never hit a single allocation type along all
496 // call stacks with this prefix. This can happen due to recursion collapsing
497 // or the stack being deeper than tracked by the profiler runtime, leading to
498 // contexts with different allocation types being merged. In that case, we
499 // trim the context just below the deepest context split, which is this
500 // node if the callee has an ambiguous caller context (multiple callers),
501 // since the recursive calls above returned false. Conservatively give it
502 // non-cold allocation type.
503 if (!CalleeHasAmbiguousCallerContext)
504 return false;
505 std::vector<ContextSizeTypePair> ContextInfo;
506 collectContextInfo(Node, ContextInfo);
507 MIBNodes.push_back(x: createMIBNode(
508 Ctx, MIBCallStack, AllocType: AllocationType::NotCold, ContextInfo, MaxColdSize,
509 BuiltFromExistingMetadata, TotalBytes, ColdBytes));
510 return true;
511}
512
513void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
514 StringRef Descriptor) {
515 auto AllocTypeString = getAllocTypeAttributeString(Type: AT);
516 auto A = llvm::Attribute::get(Context&: CI->getContext(), Kind: "memprof", Val: AllocTypeString);
517 // After inlining we may be able to convert an existing ambiguous allocation
518 // to an unambiguous one.
519 removeAnyExistingAmbiguousAttribute(CB: CI);
520 CI->addFnAttr(Attr: A);
521 if (MemProfReportHintedSizes) {
522 std::vector<ContextSizeTypePair> ContextInfo;
523 collectContextInfo(Node: Alloc, ContextInfo);
524 for (const auto &[CSI, OrigAT] : ContextInfo) {
525 const auto &[FullStackId, TotalSize] = CSI;
526 // If the original alloc type is not the one being applied as the hint,
527 // report that we ignored this context.
528 if (AT != OrigAT) {
529 emitIgnoredNonColdContextMessage(Tag: "ignored", FullStackId, Extra: "", TotalSize);
530 continue;
531 }
532 errs() << "MemProf hinting: Total size for full allocation context hash "
533 << FullStackId << " and " << Descriptor << " alloc type "
534 << getAllocTypeAttributeString(Type: AT) << ": " << TotalSize << "\n";
535 }
536 }
537 if (ORE)
538 ORE->emit(OptDiag: OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CI)
539 << ore::NV("AllocationCall", CI) << " in function "
540 << ore::NV("Caller", CI->getFunction())
541 << " marked with memprof allocation attribute "
542 << ore::NV("Attribute", AllocTypeString));
543}
544
545// Build and attach the minimal necessary MIB metadata. If the alloc has a
546// single allocation type, add a function attribute instead. Returns true if
547// memprof metadata attached, false if not (attribute added).
548bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
549 if (hasSingleAllocType(AllocTypes: Alloc->AllocTypes)) {
550 addSingleAllocTypeAttribute(CI, AT: (AllocationType)Alloc->AllocTypes,
551 Descriptor: "single");
552 return false;
553 }
554 // If there were any hot allocation contexts, the Alloc trie node would have
555 // the Hot type set. If so, because we don't currently support cloning for hot
556 // contexts, they should be converted to NotCold. This happens in the cloning
557 // support anyway, however, doing this now enables more aggressive context
558 // trimming when building the MIB metadata (and possibly may make the
559 // allocation have a single NotCold allocation type), greatly reducing
560 // overheads in bitcode, cloning memory and cloning time.
561 if (Alloc->hasAllocType(AllocType: AllocationType::Hot)) {
562 convertHotToNotCold(Node: Alloc);
563 // Check whether we now have a single alloc type.
564 if (hasSingleAllocType(AllocTypes: Alloc->AllocTypes)) {
565 addSingleAllocTypeAttribute(CI, AT: (AllocationType)Alloc->AllocTypes,
566 Descriptor: "single");
567 return false;
568 }
569 }
570 auto &Ctx = CI->getContext();
571 std::vector<uint64_t> MIBCallStack;
572 MIBCallStack.push_back(x: AllocStackId);
573 std::vector<Metadata *> MIBNodes;
574 uint64_t TotalBytes = 0;
575 uint64_t ColdBytes = 0;
576 assert(!Alloc->Callers.empty() && "addCallStack has not been called yet");
577 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
578 // callee of the given node has more than one caller. Here the node being
579 // passed in is the alloc and it has no callees. So it's false.
580 if (buildMIBNodes(Node: Alloc, Ctx, MIBCallStack, MIBNodes,
581 /*CalleeHasAmbiguousCallerContext=*/false, TotalBytes,
582 ColdBytes)) {
583 assert(MIBCallStack.size() == 1 &&
584 "Should only be left with Alloc's location in stack");
585 CI->setMetadata(KindID: LLVMContext::MD_memprof, Node: MDNode::get(Context&: Ctx, MDs: MIBNodes));
586 addAmbiguousAttribute(CB: CI);
587 return true;
588 }
589 // If there exists corner case that CallStackTrie has one chain to leaf
590 // and all node in the chain have multi alloc type, conservatively give
591 // it non-cold allocation type.
592 // FIXME: Avoid this case before memory profile created. Alternatively, select
593 // hint based on fraction cold.
594 addSingleAllocTypeAttribute(CI, AT: AllocationType::NotCold, Descriptor: "indistinguishable");
595 return false;
596}
597
598template <>
599CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
600 const MDNode *N, bool End)
601 : N(N) {
602 if (!N)
603 return;
604 Iter = End ? N->op_end() : N->op_begin();
605}
606
607template <>
608uint64_t
609CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*() {
610 assert(Iter != N->op_end());
611 ConstantInt *StackIdCInt = mdconst::dyn_extract<ConstantInt>(MD: *Iter);
612 assert(StackIdCInt);
613 return StackIdCInt->getZExtValue();
614}
615
616template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
617 assert(N);
618 return mdconst::dyn_extract<ConstantInt>(MD: N->operands().back())
619 ->getZExtValue();
620}
621
622MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
623 // TODO: Support more sophisticated merging, such as selecting the one with
624 // more bytes allocated, or implement support for carrying multiple allocation
625 // leaf contexts. For now, keep the first one.
626 if (A)
627 return A;
628 return B;
629}
630
631MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
632 // TODO: Support more sophisticated merging, which will require support for
633 // carrying multiple contexts. For now, keep the first one.
634 if (A)
635 return A;
636 return B;
637}
638