1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Instrumentation/MemProfUse.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Analysis/MemoryProfileInfo.h"
20#include "llvm/Analysis/OptimizationRemarkEmitter.h"
21#include "llvm/Analysis/StaticDataProfileInfo.h"
22#include "llvm/Analysis/TargetLibraryInfo.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/Module.h"
27#include "llvm/ProfileData/DataAccessProf.h"
28#include "llvm/ProfileData/InstrProf.h"
29#include "llvm/ProfileData/InstrProfReader.h"
30#include "llvm/ProfileData/MemProfCommon.h"
31#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Format.h"
35#include "llvm/Support/HashBuilder.h"
36#include "llvm/Support/MD5.h"
37#include "llvm/Support/VirtualFileSystem.h"
38#include "llvm/Transforms/Utils/LongestCommonSequence.h"
39#include <map>
40#include <set>
41
42using namespace llvm;
43using namespace llvm::memprof;
44
45#define DEBUG_TYPE "memprof"
46
47namespace llvm {
48extern cl::opt<bool> PGOWarnMissing;
49extern cl::opt<bool> NoPGOWarnMismatch;
50extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
51extern cl::opt<bool> AnnotateStringLiteralSectionPrefix;
52} // namespace llvm
53
54// By default disable matching of allocation profiles onto operator new that
55// already explicitly pass a hot/cold hint, since we don't currently
56// override these hints anyway.
57static cl::opt<bool> ClMemProfMatchHotColdNew(
58 "memprof-match-hot-cold-new",
59 cl::desc(
60 "Match allocation profiles onto existing hot/cold operator new calls"),
61 cl::Hidden, cl::init(Val: false));
62
63static cl::opt<bool>
64 ClPrintMemProfMatchInfo("memprof-print-match-info",
65 cl::desc("Print matching stats for each allocation "
66 "context in this module's profiles"),
67 cl::Hidden, cl::init(Val: false));
68
69static cl::opt<bool> PrintMatchedAllocStack(
70 "memprof-print-matched-alloc-stack",
71 cl::desc("Print full stack context for matched "
72 "allocations with -memprof-print-match-info."),
73 cl::Hidden, cl::init(Val: false));
74
75static cl::opt<bool>
76 PrintFunctionGuids("memprof-print-function-guids",
77 cl::desc("Print function GUIDs computed for matching"),
78 cl::Hidden, cl::init(Val: false));
79
80static cl::opt<bool>
81 SalvageStaleProfile("memprof-salvage-stale-profile",
82 cl::desc("Salvage stale MemProf profile"),
83 cl::init(Val: false), cl::Hidden);
84
85static cl::opt<bool> ClMemProfAttachCalleeGuids(
86 "memprof-attach-calleeguids",
87 cl::desc(
88 "Attach calleeguids as value profile metadata for indirect calls."),
89 cl::init(Val: true), cl::Hidden);
90
91static cl::opt<unsigned> MinMatchedColdBytePercent(
92 "memprof-matching-cold-threshold", cl::init(Val: 100), cl::Hidden,
93 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
94
95static cl::opt<bool> AnnotateStaticDataSectionPrefix(
96 "memprof-annotate-static-data-prefix", cl::init(Val: false), cl::Hidden,
97 cl::desc("If true, annotate the static data section prefix"));
98
99// Matching statistics
100STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
101STATISTIC(NumOfMemProfMismatch,
102 "Number of functions having mismatched memory profile hash.");
103STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
104STATISTIC(NumOfMemProfAllocContextProfiles,
105 "Number of alloc contexts in memory profile.");
106STATISTIC(NumOfMemProfCallSiteProfiles,
107 "Number of callsites in memory profile.");
108STATISTIC(NumOfMemProfMatchedAllocContexts,
109 "Number of matched memory profile alloc contexts.");
110STATISTIC(NumOfMemProfMatchedAllocs,
111 "Number of matched memory profile allocs.");
112STATISTIC(NumOfMemProfMatchedCallSites,
113 "Number of matched memory profile callsites.");
114STATISTIC(NumOfMemProfHotGlobalVars,
115 "Number of global vars annotated with 'hot' section prefix.");
116STATISTIC(NumOfMemProfColdGlobalVars,
117 "Number of global vars annotated with 'unlikely' section prefix.");
118STATISTIC(NumOfMemProfUnknownGlobalVars,
119 "Number of global vars with unknown hotness (no section prefix).");
120STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
121 "Number of global vars with user-specified section (not annotated).");
122
123static void addCallsiteMetadata(Instruction &I,
124 ArrayRef<uint64_t> InlinedCallStack,
125 LLVMContext &Ctx) {
126 I.setMetadata(KindID: LLVMContext::MD_callsite,
127 Node: buildCallstackMetadata(CallStack: InlinedCallStack, Ctx));
128}
129
130static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
131 uint32_t Column) {
132 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
133 HashBuilder;
134 HashBuilder.add(Args: Function, Args: LineOffset, Args: Column);
135 llvm::BLAKE3Result<8> Hash = HashBuilder.final();
136 uint64_t Id;
137 std::memcpy(dest: &Id, src: Hash.data(), n: sizeof(Hash));
138 return Id;
139}
140
141static uint64_t computeStackId(const memprof::Frame &Frame) {
142 return computeStackId(Function: Frame.Function, LineOffset: Frame.LineOffset, Column: Frame.Column);
143}
144
145static AllocationType getAllocType(const AllocationInfo *AllocInfo) {
146 return getAllocType(TotalLifetimeAccessDensity: AllocInfo->Info.getTotalLifetimeAccessDensity(),
147 AllocCount: AllocInfo->Info.getAllocCount(),
148 TotalLifetime: AllocInfo->Info.getTotalLifetime());
149}
150
151static AllocationType addCallStack(CallStackTrie &AllocTrie,
152 const AllocationInfo *AllocInfo,
153 uint64_t FullStackId) {
154 SmallVector<uint64_t> StackIds;
155 for (const auto &StackFrame : AllocInfo->CallStack)
156 StackIds.push_back(Elt: computeStackId(Frame: StackFrame));
157 auto AllocType = getAllocType(AllocInfo);
158 std::vector<ContextTotalSize> ContextSizeInfo;
159 if (recordContextSizeInfoForAnalysis()) {
160 auto TotalSize = AllocInfo->Info.getTotalSize();
161 assert(TotalSize);
162 assert(FullStackId != 0);
163 ContextSizeInfo.push_back(x: {.FullStackId: FullStackId, .TotalSize: TotalSize});
164 }
165 AllocTrie.addCallStack(AllocType, StackIds, ContextSizeInfo: std::move(ContextSizeInfo));
166 return AllocType;
167}
168
169// Return true if InlinedCallStack, computed from a call instruction's debug
170// info, is a prefix of ProfileCallStack, a list of Frames from profile data
171// (either the allocation data or a callsite).
172static bool
173stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
174 ArrayRef<uint64_t> InlinedCallStack) {
175 return ProfileCallStack.size() >= InlinedCallStack.size() &&
176 llvm::equal(LRange: ProfileCallStack.take_front(N: InlinedCallStack.size()),
177 RRange&: InlinedCallStack, P: [](const Frame &F, uint64_t StackId) {
178 return computeStackId(Frame: F) == StackId;
179 });
180}
181
182static bool isAllocationWithHotColdVariant(const Function *Callee,
183 const TargetLibraryInfo &TLI) {
184 if (!Callee)
185 return false;
186 LibFunc Func;
187 if (!TLI.getLibFunc(FDecl: *Callee, F&: Func))
188 return false;
189 switch (Func) {
190 case LibFunc_Znwm:
191 case LibFunc_ZnwmRKSt9nothrow_t:
192 case LibFunc_ZnwmSt11align_val_t:
193 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
194 case LibFunc_Znam:
195 case LibFunc_ZnamRKSt9nothrow_t:
196 case LibFunc_ZnamSt11align_val_t:
197 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
198 case LibFunc_size_returning_new:
199 case LibFunc_size_returning_new_aligned:
200 return true;
201 case LibFunc_Znwm12__hot_cold_t:
202 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
203 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
204 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
205 case LibFunc_Znam12__hot_cold_t:
206 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
207 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
208 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
209 case LibFunc_size_returning_new_hot_cold:
210 case LibFunc_size_returning_new_aligned_hot_cold:
211 return ClMemProfMatchHotColdNew;
212 default:
213 return false;
214 }
215}
216
217static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
218 AnnotationKind Kind) {
219 assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK &&
220 "Should not handle AnnotationOK here");
221 SmallString<32> Reason;
222 switch (Kind) {
223 case llvm::memprof::AnnotationKind::ExplicitSection:
224 ++NumOfMemProfExplicitSectionGlobalVars;
225 Reason.append(RHS: "explicit section name");
226 break;
227 case llvm::memprof::AnnotationKind::DeclForLinker:
228 Reason.append(RHS: "linker declaration");
229 break;
230 case llvm::memprof::AnnotationKind::ReservedName:
231 Reason.append(RHS: "name starts with `llvm.`");
232 break;
233 default:
234 llvm_unreachable("Unexpected annotation kind");
235 }
236 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
237 << Reason << ".\n");
238}
239
240// Computes the LLVM version of MD5 hash for the content of a string
241// literal.
242static std::optional<uint64_t>
243getStringContentHash(const GlobalVariable &GVar) {
244 auto *Initializer = GVar.getInitializer();
245 if (!Initializer)
246 return std::nullopt;
247 if (auto *C = dyn_cast<ConstantDataSequential>(Val: Initializer))
248 if (C->isString()) {
249 // Note the hash computed for the literal would include the null byte.
250 return llvm::MD5Hash(Str: C->getAsString());
251 }
252 return std::nullopt;
253}
254
255// Structure for tracking info about matched allocation contexts for use with
256// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
257struct AllocMatchInfo {
258 // Total size in bytes of matched context.
259 uint64_t TotalSize = 0;
260 // Matched allocation's type.
261 AllocationType AllocType = AllocationType::None;
262 // Number of frames matched to the allocation itself (values will be >1 in
263 // cases where allocation was already inlined). Use a set because there can
264 // be multiple inlined instances and each may have a different inline depth.
265 // Use std::set to iterate in sorted order when printing.
266 std::set<unsigned> MatchedFramesSet;
267 // The full call stack of the allocation, for cases where requested via
268 // -memprof-print-matched-alloc-stack.
269 std::vector<Frame> CallStack;
270
271 // Caller responsible for inserting the matched frames and the call stack when
272 // appropriate.
273 AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
274 : TotalSize(TotalSize), AllocType(AllocType) {}
275};
276
277DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
278memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
279 function_ref<bool(uint64_t)> IsPresentInProfile) {
280 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
281
282 auto GetOffset = [](const DILocation *DIL) {
283 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
284 0xffff;
285 };
286
287 for (Function &F : M) {
288 if (F.isDeclaration())
289 continue;
290
291 for (auto &BB : F) {
292 for (auto &I : BB) {
293 if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I))
294 continue;
295
296 auto *CB = dyn_cast<CallBase>(Val: &I);
297 auto *CalledFunction = CB->getCalledFunction();
298 // Disregard indirect calls and intrinsics.
299 if (!CalledFunction || CalledFunction->isIntrinsic())
300 continue;
301
302 StringRef CalleeName = CalledFunction->getName();
303 // True if we are calling a heap allocation function that supports
304 // hot/cold variants.
305 bool IsAlloc = isAllocationWithHotColdVariant(Callee: CalledFunction, TLI);
306 // True for the first iteration below, indicating that we are looking at
307 // a leaf node.
308 bool IsLeaf = true;
309 for (const DILocation *DIL = I.getDebugLoc(); DIL;
310 DIL = DIL->getInlinedAt()) {
311 StringRef CallerName = DIL->getSubprogramLinkageName();
312 assert(!CallerName.empty() &&
313 "Be sure to enable -fdebug-info-for-profiling");
314 uint64_t CallerGUID = memprof::getGUID(FunctionName: CallerName);
315 uint64_t CalleeGUID = memprof::getGUID(FunctionName: CalleeName);
316 // Pretend that we are calling a function with GUID == 0 if we are
317 // in the inline stack leading to a heap allocation function.
318 if (IsAlloc) {
319 if (IsLeaf) {
320 // For leaf nodes, set CalleeGUID to 0 without consulting
321 // IsPresentInProfile.
322 CalleeGUID = 0;
323 } else if (!IsPresentInProfile(CalleeGUID)) {
324 // In addition to the leaf case above, continue to set CalleeGUID
325 // to 0 as long as we don't see CalleeGUID in the profile.
326 CalleeGUID = 0;
327 } else {
328 // Once we encounter a callee that exists in the profile, stop
329 // setting CalleeGUID to 0.
330 IsAlloc = false;
331 }
332 }
333
334 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
335 Calls[CallerGUID].emplace_back(Args&: Loc, Args&: CalleeGUID);
336 CalleeName = CallerName;
337 IsLeaf = false;
338 }
339 }
340 }
341 }
342
343 // Sort each call list by the source location.
344 for (auto &[CallerGUID, CallList] : Calls) {
345 llvm::sort(C&: CallList);
346 CallList.erase(CS: llvm::unique(R&: CallList), CE: CallList.end());
347 }
348
349 return Calls;
350}
351
352DenseMap<uint64_t, LocToLocMap>
353memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
354 const TargetLibraryInfo &TLI) {
355 DenseMap<uint64_t, LocToLocMap> UndriftMaps;
356
357 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
358 MemProfReader->getMemProfCallerCalleePairs();
359 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
360 extractCallsFromIR(M, TLI, IsPresentInProfile: [&](uint64_t GUID) {
361 return CallsFromProfile.contains(Val: GUID);
362 });
363
364 // Compute an undrift map for each CallerGUID.
365 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
366 auto It = CallsFromProfile.find(Val: CallerGUID);
367 if (It == CallsFromProfile.end())
368 continue;
369 const auto &ProfileAnchors = It->second;
370
371 LocToLocMap Matchings;
372 longestCommonSequence<LineLocation, GlobalValue::GUID>(
373 AnchorList1: ProfileAnchors, AnchorList2: IRAnchors, FunctionMatchesProfile: std::equal_to<GlobalValue::GUID>(),
374 InsertMatching: [&](LineLocation A, LineLocation B) { Matchings.try_emplace(Key: A, Args&: B); });
375 [[maybe_unused]] bool Inserted =
376 UndriftMaps.try_emplace(Key: CallerGUID, Args: std::move(Matchings)).second;
377
378 // The insertion must succeed because we visit each GUID exactly once.
379 assert(Inserted);
380 }
381
382 return UndriftMaps;
383}
384
385// Given a MemProfRecord, undrift all the source locations present in the
386// record in place.
387static void
388undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
389 memprof::MemProfRecord &MemProfRec) {
390 // Undrift a call stack in place.
391 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
392 for (auto &F : CallStack) {
393 auto I = UndriftMaps.find(Val: F.Function);
394 if (I == UndriftMaps.end())
395 continue;
396 auto J = I->second.find(Val: LineLocation(F.LineOffset, F.Column));
397 if (J == I->second.end())
398 continue;
399 auto &NewLoc = J->second;
400 F.LineOffset = NewLoc.LineOffset;
401 F.Column = NewLoc.Column;
402 }
403 };
404
405 for (auto &AS : MemProfRec.AllocSites)
406 UndriftCallStack(AS.CallStack);
407
408 for (auto &CS : MemProfRec.CallSites)
409 UndriftCallStack(CS.Frames);
410}
411
412// Helper function to process CalleeGuids and create value profile metadata
413static void addVPMetadata(Module &M, Instruction &I,
414 ArrayRef<GlobalValue::GUID> CalleeGuids) {
415 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
416 return;
417
418 // Prepare the vector of value data, initializing from any existing
419 // value-profile metadata present on the instruction so that we merge the
420 // new CalleeGuids into the existing entries.
421 SmallVector<InstrProfValueData> VDs;
422 uint64_t TotalCount = 0;
423
424 if (I.getMetadata(KindID: LLVMContext::MD_prof)) {
425 // Read all existing entries so we can merge them. Use a large
426 // MaxNumValueData to retrieve all existing entries.
427 VDs = getValueProfDataFromInst(Inst: I, ValueKind: IPVK_IndirectCallTarget,
428 /*MaxNumValueData=*/UINT32_MAX, TotalC&: TotalCount);
429 }
430
431 // Save the original size for use later in detecting whether any were added.
432 const size_t OriginalSize = VDs.size();
433
434 // Initialize the set of existing guids with the original list.
435 DenseSet<uint64_t> ExistingValues(
436 llvm::from_range,
437 llvm::map_range(
438 C&: VDs, F: [](const InstrProfValueData &Entry) { return Entry.Value; }));
439
440 // Merge CalleeGuids into list of existing VDs, by appending any that are not
441 // already included.
442 VDs.reserve(N: OriginalSize + CalleeGuids.size());
443 for (auto G : CalleeGuids) {
444 if (!ExistingValues.insert(V: G).second)
445 continue;
446 InstrProfValueData NewEntry;
447 NewEntry.Value = G;
448 // For MemProf, we don't have actual call counts, so we assign
449 // a weight of 1 to each potential target.
450 // TODO: Consider making this weight configurable or increasing it to
451 // improve effectiveness for ICP.
452 NewEntry.Count = 1;
453 TotalCount += NewEntry.Count;
454 VDs.push_back(Elt: NewEntry);
455 }
456
457 // Update the VP metadata if we added any new callee GUIDs to the list.
458 assert(VDs.size() >= OriginalSize);
459 if (VDs.size() == OriginalSize)
460 return;
461
462 // First clear the existing !prof.
463 I.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
464
465 // No need to sort the updated VDs as all appended entries have the same count
466 // of 1, which is no larger than any existing entries. The incoming list of
467 // CalleeGuids should already be deterministic for a given profile.
468 annotateValueSite(M, Inst&: I, VDs, Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget, MaxMDCount: VDs.size());
469}
470
471static void handleAllocSite(
472 Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
473 LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
474 const std::set<const AllocationInfo *> &AllocInfoSet,
475 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
476 // TODO: Remove this once the profile creation logic deduplicates contexts
477 // that are the same other than the IsInlineFrame bool. Until then, keep the
478 // largest.
479 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
480 for (auto *AllocInfo : AllocInfoSet) {
481 auto FullStackId = computeFullStackId(CallStack: AllocInfo->CallStack);
482 auto [It, Inserted] =
483 UniqueFullContextIdAllocInfo.insert(KV: {FullStackId, AllocInfo});
484 // If inserted entry, done.
485 if (Inserted)
486 continue;
487 // Keep the larger one, or the noncold one if they are the same size.
488 auto CurSize = It->second->Info.getTotalSize();
489 auto NewSize = AllocInfo->Info.getTotalSize();
490 if ((CurSize > NewSize) ||
491 (CurSize == NewSize &&
492 getAllocType(AllocInfo) != AllocationType::NotCold))
493 continue;
494 It->second = AllocInfo;
495 }
496 // We may match this instruction's location list to multiple MIB
497 // contexts. Add them to a Trie specialized for trimming the contexts to
498 // the minimal needed to disambiguate contexts with unique behavior.
499 CallStackTrie AllocTrie(&ORE, MaxColdSize);
500 uint64_t TotalSize = 0;
501 uint64_t TotalColdSize = 0;
502 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
503 // Check the full inlined call stack against this one.
504 // If we found and thus matched all frames on the call, include
505 // this MIB.
506 if (stackFrameIncludesInlinedCallStack(ProfileCallStack: AllocInfo->CallStack,
507 InlinedCallStack)) {
508 NumOfMemProfMatchedAllocContexts++;
509 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
510 TotalSize += AllocInfo->Info.getTotalSize();
511 if (AllocType == AllocationType::Cold)
512 TotalColdSize += AllocInfo->Info.getTotalSize();
513 // Record information about the allocation if match info printing
514 // was requested.
515 if (ClPrintMemProfMatchInfo) {
516 assert(FullStackId != 0);
517 auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
518 k: FullStackId,
519 args: AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
520 // Always insert the new matched frame count, since it may differ.
521 Iter->second.MatchedFramesSet.insert(x: InlinedCallStack.size());
522 if (Inserted && PrintMatchedAllocStack)
523 Iter->second.CallStack.insert(position: Iter->second.CallStack.begin(),
524 first: AllocInfo->CallStack.begin(),
525 last: AllocInfo->CallStack.end());
526 }
527 ORE.emit(
528 OptDiag: OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
529 << ore::NV("AllocationCall", CI) << " in function "
530 << ore::NV("Caller", CI->getFunction())
531 << " matched alloc context with alloc type "
532 << ore::NV("Attribute", getAllocTypeAttributeString(Type: AllocType))
533 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
534 << " full context id " << ore::NV("Context", FullStackId)
535 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
536 }
537 }
538 // If the threshold for the percent of cold bytes is less than 100%,
539 // and not all bytes are cold, see if we should still hint this
540 // allocation as cold without context sensitivity.
541 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
542 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
543 AllocTrie.addSingleAllocTypeAttribute(CI, AT: AllocationType::Cold, Descriptor: "dominant");
544 return;
545 }
546
547 // We might not have matched any to the full inlined call stack.
548 // But if we did, create and attach metadata, or a function attribute if
549 // all contexts have identical profiled behavior.
550 if (!AllocTrie.empty()) {
551 NumOfMemProfMatchedAllocs++;
552 // MemprofMDAttached will be false if a function attribute was
553 // attached.
554 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
555 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
556 if (MemprofMDAttached) {
557 // Add callsite metadata for the instruction's location list so that
558 // it simpler later on to identify which part of the MIB contexts
559 // are from this particular instruction (including during inlining,
560 // when the callsite metadata will be updated appropriately).
561 // FIXME: can this be changed to strip out the matching stack
562 // context ids from the MIB contexts and not add any callsite
563 // metadata here to save space?
564 addCallsiteMetadata(I, InlinedCallStack, Ctx);
565 }
566 }
567}
568
569// Helper struct for maintaining refs to callsite data. As an alternative we
570// could store a pointer to the CallSiteInfo struct but we also need the frame
571// index. Using ArrayRefs instead makes it a little easier to read.
572struct CallSiteEntry {
573 // Subset of frames for the corresponding CallSiteInfo.
574 ArrayRef<Frame> Frames;
575 // Potential targets for indirect calls.
576 ArrayRef<GlobalValue::GUID> CalleeGuids;
577};
578
579static void handleCallSite(Instruction &I, const Function *CalledFunction,
580 ArrayRef<uint64_t> InlinedCallStack,
581 const std::vector<CallSiteEntry> &CallSiteEntries,
582 Module &M,
583 std::set<std::vector<uint64_t>> &MatchedCallSites,
584 OptimizationRemarkEmitter &ORE) {
585 auto &Ctx = M.getContext();
586 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
587 // to support cases where the instuction's inlined frames match multiple call
588 // site entries, which can happen if the profile was collected from a binary
589 // where this instruction was eventually inlined into multiple callers.
590 SetVector<GlobalValue::GUID> CalleeGuids;
591 bool CallsiteMDAdded = false;
592 for (const auto &CallSiteEntry : CallSiteEntries) {
593 // If we found and thus matched all frames on the call, create and
594 // attach call stack metadata.
595 if (stackFrameIncludesInlinedCallStack(ProfileCallStack: CallSiteEntry.Frames,
596 InlinedCallStack)) {
597 NumOfMemProfMatchedCallSites++;
598 // Only need to find one with a matching call stack and add a single
599 // callsite metadata.
600 if (!CallsiteMDAdded) {
601 addCallsiteMetadata(I, InlinedCallStack, Ctx);
602
603 // Accumulate call site matching information upon request.
604 if (ClPrintMemProfMatchInfo) {
605 std::vector<uint64_t> CallStack;
606 append_range(C&: CallStack, R&: InlinedCallStack);
607 MatchedCallSites.insert(x: std::move(CallStack));
608 }
609 OptimizationRemark Remark(DEBUG_TYPE, "MemProfUse", &I);
610 Remark << ore::NV("CallSite", &I) << " in function "
611 << ore::NV("Caller", I.getFunction())
612 << " matched callsite with frame count "
613 << ore::NV("Frames", InlinedCallStack.size())
614 << " and stack ids";
615 for (uint64_t StackId : InlinedCallStack)
616 Remark << " " << ore::NV("StackId", StackId);
617 ORE.emit(OptDiag&: Remark);
618
619 // If this is a direct call, we're done.
620 if (CalledFunction)
621 break;
622 CallsiteMDAdded = true;
623 }
624
625 assert(!CalledFunction && "Didn't expect direct call");
626
627 // Collect Callee GUIDs from all matching CallSiteEntries.
628 CalleeGuids.insert(Start: CallSiteEntry.CalleeGuids.begin(),
629 End: CallSiteEntry.CalleeGuids.end());
630 }
631 }
632 // Try to attach indirect call metadata if possible.
633 addVPMetadata(M, I, CalleeGuids: CalleeGuids.getArrayRef());
634}
635
636// Dump inline call stack for debugging purposes.
637static void dumpInlineCallStack(Instruction &I, CallBase *CI,
638 OptimizationRemarkEmitter &ORE,
639 DenseSet<uint64_t> &SeenFrames,
640 DenseSet<uint64_t> &SeenStacks,
641 bool ProfileHasColumns) {
642 auto GetOffset = [](const DILocation *DIL) {
643 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
644 0xffff;
645 };
646
647 // Dump frame info. Frames are deduplicated using FrameID.
648 std::string CallStack;
649 raw_string_ostream CallStackOS(CallStack);
650 bool First = true;
651 for (const DILocation *DIL = I.getDebugLoc(); DIL;
652 DIL = DIL->getInlinedAt()) {
653 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
654 if (Name.empty())
655 Name = DIL->getScope()->getSubprogram()->getName();
656 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(GlobalName: Name);
657 uint64_t FrameID = computeStackId(Function: CalleeGUID, LineOffset: GetOffset(DIL),
658 Column: ProfileHasColumns ? DIL->getColumn() : 0);
659 if (SeenFrames.insert(V: FrameID).second) {
660 std::string DictMsg;
661 raw_string_ostream DictOS(DictMsg);
662 DictOS << "frame: " << FrameID << " " << Name << ":" << GetOffset(DIL)
663 << ":" << (ProfileHasColumns ? DIL->getColumn() : 0);
664 ORE.emit(OptDiag: OptimizationRemarkAnalysis(DEBUG_TYPE, "MemProfUse", CI)
665 << DictOS.str());
666 }
667
668 if (First)
669 First = false;
670 else
671 CallStackOS << ",";
672 CallStackOS << FrameID;
673 }
674
675 // Dump inline call stack info. Stacks are deduplicated using StackHash.
676 uint64_t StackHash = llvm::MD5Hash(Str: CallStack);
677 if (SeenStacks.insert(V: StackHash).second) {
678 std::string Msg;
679 raw_string_ostream OS(Msg);
680 OS << "inline call stack: " << CallStack;
681 ORE.emit(OptDiag: OptimizationRemarkAnalysis(DEBUG_TYPE, "MemProfUse", CI)
682 << OS.str());
683 }
684}
685
686static void
687readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
688 const TargetLibraryInfo &TLI,
689 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
690 std::set<std::vector<uint64_t>> &MatchedCallSites,
691 DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
692 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
693 DenseSet<uint64_t> &SeenStacks, DenseSet<uint64_t> &SeenFrames) {
694 auto &Ctx = M.getContext();
695 // Previously we used getIRPGOFuncName() here. If F is local linkage,
696 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
697 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
698 // contain FileName's prefix. It caused local linkage function can't
699 // find MemProfRecord. So we use getName() now.
700 // 'unique-internal-linkage-names' can make MemProf work better for local
701 // linkage function.
702 auto FuncName = F.getName();
703 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(GlobalName: FuncName);
704 if (PrintFunctionGuids)
705 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
706 << "\n";
707 std::optional<memprof::MemProfRecord> MemProfRec;
708 auto Err = MemProfReader->getMemProfRecord(FuncNameHash: FuncGUID).moveInto(Value&: MemProfRec);
709 if (Err) {
710 handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) {
711 auto Err = IPE.get();
712 bool SkipWarning = false;
713 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
714 << ": ");
715 if (Err == instrprof_error::unknown_function) {
716 NumOfMemProfMissing++;
717 SkipWarning = !PGOWarnMissing;
718 LLVM_DEBUG(dbgs() << "unknown function");
719 } else if (Err == instrprof_error::hash_mismatch) {
720 NumOfMemProfMismatch++;
721 SkipWarning =
722 NoPGOWarnMismatch ||
723 (NoPGOWarnMismatchComdatWeak &&
724 (F.hasComdat() ||
725 F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
726 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
727 }
728
729 if (SkipWarning)
730 return;
731
732 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
733 Twine(" Hash = ") + std::to_string(val: FuncGUID))
734 .str();
735
736 Ctx.diagnose(
737 DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
738 });
739 return;
740 }
741
742 NumOfMemProfFunc++;
743
744 // If requested, undrfit MemProfRecord so that the source locations in it
745 // match those in the IR.
746 if (SalvageStaleProfile)
747 undriftMemProfRecord(UndriftMaps, MemProfRec&: *MemProfRec);
748
749 // Detect if there are non-zero column numbers in the profile. If not,
750 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
751 // columns in the IR). The profiled binary might have been built with
752 // column numbers disabled, for example.
753 bool ProfileHasColumns = false;
754
755 // Build maps of the location hash to all profile data with that leaf location
756 // (allocation info and the callsites).
757 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
758
759 // For the callsites we need to record slices of the frame array (see comments
760 // below where the map entries are added) along with their CalleeGuids.
761 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
762 for (auto &AI : MemProfRec->AllocSites) {
763 NumOfMemProfAllocContextProfiles++;
764 // Associate the allocation info with the leaf frame. The later matching
765 // code will match any inlined call sequences in the IR with a longer prefix
766 // of call stack frames.
767 uint64_t StackId = computeStackId(Frame: AI.CallStack[0]);
768 LocHashToAllocInfo[StackId].insert(x: &AI);
769 ProfileHasColumns |= AI.CallStack[0].Column;
770 }
771 for (auto &CS : MemProfRec->CallSites) {
772 NumOfMemProfCallSiteProfiles++;
773 // Need to record all frames from leaf up to and including this function,
774 // as any of these may or may not have been inlined at this point.
775 unsigned Idx = 0;
776 for (auto &StackFrame : CS.Frames) {
777 uint64_t StackId = computeStackId(Frame: StackFrame);
778 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(N: Idx++);
779 // The callee guids for the slice containing all frames (due to the
780 // increment above Idx is now 1) comes from the CalleeGuids recorded in
781 // the CallSite. For the slices not containing the leaf-most frame, the
782 // callee guid is simply the function GUID of the prior frame.
783 LocHashToCallSites[StackId].push_back(
784 x: {.Frames: FrameSlice, .CalleeGuids: (Idx == 1 ? CS.CalleeGuids
785 : ArrayRef<GlobalValue::GUID>(
786 CS.Frames[Idx - 2].Function))});
787
788 ProfileHasColumns |= StackFrame.Column;
789 // Once we find this function, we can stop recording.
790 if (StackFrame.Function == FuncGUID)
791 break;
792 }
793 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
794 }
795
796 auto GetOffset = [](const DILocation *DIL) {
797 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
798 0xffff;
799 };
800
801 // Now walk the instructions, looking up the associated profile data using
802 // debug locations.
803 for (auto &BB : F) {
804 for (auto &I : BB) {
805 if (I.isDebugOrPseudoInst())
806 continue;
807 // We are only interested in calls (allocation or interior call stack
808 // context calls).
809 auto *CI = dyn_cast<CallBase>(Val: &I);
810 if (!CI)
811 continue;
812 auto *CalledFunction = CI->getCalledFunction();
813 if (CalledFunction && CalledFunction->isIntrinsic())
814 continue;
815
816 if (ORE.allowExtraAnalysis(DEBUG_TYPE))
817 dumpInlineCallStack(I, CI, ORE, SeenFrames, SeenStacks,
818 ProfileHasColumns);
819
820 // List of call stack ids computed from the location hashes on debug
821 // locations (leaf to inlined at root).
822 SmallVector<uint64_t, 8> InlinedCallStack;
823 // Was the leaf location found in one of the profile maps?
824 bool LeafFound = false;
825 // If leaf was found in a map, iterators pointing to its location in both
826 // of the maps. It might exist in neither, one, or both (the latter case
827 // can happen because we don't currently have discriminators to
828 // distinguish the case when a single line/col maps to both an allocation
829 // and another callsite).
830 auto AllocInfoIter = LocHashToAllocInfo.end();
831 auto CallSitesIter = LocHashToCallSites.end();
832 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
833 DIL = DIL->getInlinedAt()) {
834 // Use C++ linkage name if possible. Need to compile with
835 // -fdebug-info-for-profiling to get linkage name.
836 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
837 if (Name.empty())
838 Name = DIL->getScope()->getSubprogram()->getName();
839 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(GlobalName: Name);
840 auto StackId = computeStackId(Function: CalleeGUID, LineOffset: GetOffset(DIL),
841 Column: ProfileHasColumns ? DIL->getColumn() : 0);
842 // Check if we have found the profile's leaf frame. If yes, collect
843 // the rest of the call's inlined context starting here. If not, see if
844 // we find a match further up the inlined context (in case the profile
845 // was missing debug frames at the leaf).
846 if (!LeafFound) {
847 AllocInfoIter = LocHashToAllocInfo.find(x: StackId);
848 CallSitesIter = LocHashToCallSites.find(x: StackId);
849 if (AllocInfoIter != LocHashToAllocInfo.end() ||
850 CallSitesIter != LocHashToCallSites.end())
851 LeafFound = true;
852 }
853 if (LeafFound)
854 InlinedCallStack.push_back(Elt: StackId);
855 }
856 // If leaf not in either of the maps, skip inst.
857 if (!LeafFound)
858 continue;
859
860 // First add !memprof metadata from allocation info, if we found the
861 // instruction's leaf location in that map, and if the rest of the
862 // instruction's locations match the prefix Frame locations on an
863 // allocation context with the same leaf.
864 if (AllocInfoIter != LocHashToAllocInfo.end() &&
865 // Only consider allocations which support hinting.
866 isAllocationWithHotColdVariant(Callee: CI->getCalledFunction(), TLI))
867 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
868 AllocInfoSet: AllocInfoIter->second, FullStackIdToAllocMatchInfo);
869 else if (CallSitesIter != LocHashToCallSites.end())
870 // Otherwise, add callsite metadata. If we reach here then we found the
871 // instruction's leaf location in the callsites map and not the
872 // allocation map.
873 handleCallSite(I, CalledFunction, InlinedCallStack,
874 CallSiteEntries: CallSitesIter->second, M, MatchedCallSites, ORE);
875 }
876 }
877}
878
879MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
880 IntrusiveRefCntPtr<vfs::FileSystem> FS)
881 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
882 if (!FS)
883 this->FS = vfs::getRealFileSystem();
884}
885
886PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
887 // Return immediately if the module doesn't contain any function or global
888 // variables.
889 if (M.empty() && M.globals().empty())
890 return PreservedAnalyses::all();
891
892 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
893 auto &Ctx = M.getContext();
894 auto ReaderOrErr = IndexedInstrProfReader::create(Path: MemoryProfileFileName, FS&: *FS);
895 if (Error E = ReaderOrErr.takeError()) {
896 handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) {
897 Ctx.diagnose(
898 DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
899 });
900 return PreservedAnalyses::all();
901 }
902
903 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
904 std::move(ReaderOrErr.get());
905 if (!MemProfReader) {
906 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
907 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
908 return PreservedAnalyses::all();
909 }
910
911 if (!MemProfReader->hasMemoryProfile()) {
912 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
913 "Not a memory profile"));
914 return PreservedAnalyses::all();
915 }
916
917 const bool Changed =
918 annotateGlobalVariables(M, DataAccessProf: MemProfReader->getDataAccessProfileData());
919
920 // If the module doesn't contain any function, return after we process all
921 // global variables.
922 if (M.empty())
923 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
924
925 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
926
927 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: *M.begin());
928 DenseMap<uint64_t, LocToLocMap> UndriftMaps;
929 if (SalvageStaleProfile)
930 UndriftMaps = computeUndriftMap(M, MemProfReader: MemProfReader.get(), TLI);
931
932 // Map from the stack hash of each matched allocation context in the function
933 // profiles to match info such as the total profiled size (bytes), allocation
934 // type, number of frames matched to the allocation itself, and the full array
935 // of call stack ids.
936 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
937
938 // Set of the matched call sites, each expressed as a sequence of an inline
939 // call stack.
940 std::set<std::vector<uint64_t>> MatchedCallSites;
941
942 DenseSet<uint64_t> SeenStacks;
943 DenseSet<uint64_t> SeenFrames;
944
945 uint64_t MaxColdSize = 0;
946 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
947 MaxColdSize = MemProfSum->getMaxColdTotalSize();
948
949 for (auto &F : M) {
950 if (F.isDeclaration())
951 continue;
952
953 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F);
954 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
955 readMemprof(M, F, MemProfReader: MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
956 MatchedCallSites, UndriftMaps, ORE, MaxColdSize, SeenStacks,
957 SeenFrames);
958 }
959
960 if (ClPrintMemProfMatchInfo) {
961 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
962 for (auto Frames : Info.MatchedFramesSet) {
963 // TODO: To reduce verbosity, should we change the existing message
964 // so that we emit a list of matched frame counts in a single message
965 // about the context (instead of one message per frame count?
966 errs() << "MemProf " << getAllocTypeAttributeString(Type: Info.AllocType)
967 << " context with id " << Id << " has total profiled size "
968 << Info.TotalSize << " is matched with " << Frames << " frames";
969 if (PrintMatchedAllocStack) {
970 errs() << " and call stack";
971 for (auto &F : Info.CallStack)
972 errs() << " " << computeStackId(Frame: F);
973 }
974 errs() << "\n";
975 }
976 }
977
978 for (const auto &CallStack : MatchedCallSites) {
979 errs() << "MemProf callsite match for inline call stack";
980 for (uint64_t StackId : CallStack)
981 errs() << " " << StackId;
982 errs() << "\n";
983 }
984 }
985
986 return PreservedAnalyses::none();
987}
988
989bool MemProfUsePass::annotateGlobalVariables(
990 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
991 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
992 return false;
993
994 if (!DataAccessProf) {
995 M.addModuleFlag(Behavior: Module::Warning, Key: "EnableDataAccessProf", Val: 0U);
996 // FIXME: Add a diagnostic message without failing the compilation when
997 // data access profile payload is not available.
998 return false;
999 }
1000 M.addModuleFlag(Behavior: Module::Warning, Key: "EnableDataAccessProf", Val: 1U);
1001
1002 bool Changed = false;
1003 // Iterate all global variables in the module and annotate them based on
1004 // data access profiles. Note it's up to the linker to decide how to map input
1005 // sections to output sections, and one conservative practice is to map
1006 // unlikely-prefixed ones to unlikely output section, and map the rest
1007 // (hot-prefixed or prefix-less) to the canonical output section.
1008 for (GlobalVariable &GVar : M.globals()) {
1009 assert(!GVar.getSectionPrefix().has_value() &&
1010 "GVar shouldn't have section prefix yet");
1011 auto Kind = llvm::memprof::getAnnotationKind(GV: GVar);
1012 if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) {
1013 HandleUnsupportedAnnotationKinds(GVar, Kind);
1014 continue;
1015 }
1016
1017 StringRef Name = GVar.getName();
1018 SymbolHandleRef Handle = SymbolHandleRef(Name);
1019 // Skip string literals as their mangled names don't stay stable across
1020 // binary releases.
1021 if (!AnnotateStringLiteralSectionPrefix)
1022 if (Name.starts_with(Prefix: ".str"))
1023 continue;
1024
1025 if (Name.starts_with(Prefix: ".str")) {
1026 std::optional<uint64_t> Hash = getStringContentHash(GVar);
1027 if (!Hash) {
1028 LLVM_DEBUG(dbgs() << "Cannot compute content hash for string literal "
1029 << Name << "\n");
1030 continue;
1031 }
1032 Handle = SymbolHandleRef(Hash.value());
1033 }
1034
1035 // DataAccessProfRecord's get* methods will canonicalize the name under the
1036 // hood before looking it up, so optimizer doesn't need to do it.
1037 std::optional<DataAccessProfRecord> Record =
1038 DataAccessProf->getProfileRecord(SymID: Handle);
1039 // Annotate a global variable as hot if it has non-zero sampled count, and
1040 // annotate it as cold if it's seen in the profiled binary
1041 // file but doesn't have any access sample.
1042 // For logging, optimization remark emitter requires a llvm::Function, but
1043 // it's not well defined how to associate a global variable with a function.
1044 // So we just print out the static data section prefix in LLVM_DEBUG.
1045 if (Record && Record->AccessCount > 0) {
1046 ++NumOfMemProfHotGlobalVars;
1047 Changed |= GVar.setSectionPrefix("hot");
1048 LLVM_DEBUG(dbgs() << "Global variable " << Name
1049 << " is annotated as hot\n");
1050 } else if (DataAccessProf->isKnownColdSymbol(SymID: Handle)) {
1051 ++NumOfMemProfColdGlobalVars;
1052 Changed |= GVar.setSectionPrefix("unlikely");
1053 Changed = true;
1054 LLVM_DEBUG(dbgs() << "Global variable " << Name
1055 << " is annotated as unlikely\n");
1056 } else {
1057 ++NumOfMemProfUnknownGlobalVars;
1058 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
1059 }
1060 }
1061
1062 return Changed;
1063}
1064