1 | //===- MemProfiler.cpp - memory allocation and access profiler ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of MemProfiler. Memory accesses are instrumented |
10 | // to increment the access count held in a shadow memory location, or |
11 | // alternatively to call into the runtime. Memory intrinsic calls (memmove, |
12 | // memcpy, memset) are changed to call the memory profiling runtime version |
13 | // instead. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Transforms/Instrumentation/MemProfiler.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/Analysis/MemoryBuiltins.h" |
22 | #include "llvm/Analysis/MemoryProfileInfo.h" |
23 | #include "llvm/Analysis/ValueTracking.h" |
24 | #include "llvm/IR/Constant.h" |
25 | #include "llvm/IR/DataLayout.h" |
26 | #include "llvm/IR/DiagnosticInfo.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/IR/GlobalValue.h" |
29 | #include "llvm/IR/IRBuilder.h" |
30 | #include "llvm/IR/Instruction.h" |
31 | #include "llvm/IR/IntrinsicInst.h" |
32 | #include "llvm/IR/Module.h" |
33 | #include "llvm/IR/Type.h" |
34 | #include "llvm/IR/Value.h" |
35 | #include "llvm/ProfileData/InstrProf.h" |
36 | #include "llvm/ProfileData/InstrProfReader.h" |
37 | #include "llvm/Support/BLAKE3.h" |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" |
40 | #include "llvm/Support/HashBuilder.h" |
41 | #include "llvm/Support/VirtualFileSystem.h" |
42 | #include "llvm/TargetParser/Triple.h" |
43 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
44 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
45 | #include <map> |
46 | #include <set> |
47 | |
48 | using namespace llvm; |
49 | using namespace llvm::memprof; |
50 | |
51 | #define DEBUG_TYPE "memprof" |
52 | |
53 | namespace llvm { |
54 | extern cl::opt<bool> PGOWarnMissing; |
55 | extern cl::opt<bool> NoPGOWarnMismatch; |
56 | extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; |
57 | } // namespace llvm |
58 | |
59 | constexpr int LLVM_MEM_PROFILER_VERSION = 1; |
60 | |
61 | // Size of memory mapped to a single shadow location. |
62 | constexpr uint64_t DefaultMemGranularity = 64; |
63 | |
64 | // Scale from granularity down to shadow size. |
65 | constexpr uint64_t DefaultShadowScale = 3; |
66 | |
67 | constexpr char MemProfModuleCtorName[] = "memprof.module_ctor" ; |
68 | constexpr uint64_t MemProfCtorAndDtorPriority = 1; |
69 | // On Emscripten, the system needs more than one priorities for constructors. |
70 | constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; |
71 | constexpr char MemProfInitName[] = "__memprof_init" ; |
72 | constexpr char MemProfVersionCheckNamePrefix[] = |
73 | "__memprof_version_mismatch_check_v" ; |
74 | |
75 | constexpr char MemProfShadowMemoryDynamicAddress[] = |
76 | "__memprof_shadow_memory_dynamic_address" ; |
77 | |
78 | constexpr char MemProfFilenameVar[] = "__memprof_profile_filename" ; |
79 | |
80 | constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram" ; |
81 | |
82 | // Command-line flags. |
83 | |
84 | static cl::opt<bool> ClInsertVersionCheck( |
85 | "memprof-guard-against-version-mismatch" , |
86 | cl::desc("Guard against compiler/runtime version mismatch." ), cl::Hidden, |
87 | cl::init(Val: true)); |
88 | |
89 | // This flag may need to be replaced with -f[no-]memprof-reads. |
90 | static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads" , |
91 | cl::desc("instrument read instructions" ), |
92 | cl::Hidden, cl::init(Val: true)); |
93 | |
94 | static cl::opt<bool> |
95 | ClInstrumentWrites("memprof-instrument-writes" , |
96 | cl::desc("instrument write instructions" ), cl::Hidden, |
97 | cl::init(Val: true)); |
98 | |
99 | static cl::opt<bool> ClInstrumentAtomics( |
100 | "memprof-instrument-atomics" , |
101 | cl::desc("instrument atomic instructions (rmw, cmpxchg)" ), cl::Hidden, |
102 | cl::init(Val: true)); |
103 | |
104 | static cl::opt<bool> ClUseCalls( |
105 | "memprof-use-callbacks" , |
106 | cl::desc("Use callbacks instead of inline instrumentation sequences." ), |
107 | cl::Hidden, cl::init(Val: false)); |
108 | |
109 | static cl::opt<std::string> |
110 | ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix" , |
111 | cl::desc("Prefix for memory access callbacks" ), |
112 | cl::Hidden, cl::init(Val: "__memprof_" )); |
113 | |
114 | // These flags allow to change the shadow mapping. |
115 | // The shadow mapping looks like |
116 | // Shadow = ((Mem & mask) >> scale) + offset |
117 | |
118 | static cl::opt<int> ClMappingScale("memprof-mapping-scale" , |
119 | cl::desc("scale of memprof shadow mapping" ), |
120 | cl::Hidden, cl::init(Val: DefaultShadowScale)); |
121 | |
122 | static cl::opt<int> |
123 | ClMappingGranularity("memprof-mapping-granularity" , |
124 | cl::desc("granularity of memprof shadow mapping" ), |
125 | cl::Hidden, cl::init(Val: DefaultMemGranularity)); |
126 | |
127 | static cl::opt<bool> ClStack("memprof-instrument-stack" , |
128 | cl::desc("Instrument scalar stack variables" ), |
129 | cl::Hidden, cl::init(Val: false)); |
130 | |
131 | // Debug flags. |
132 | |
133 | static cl::opt<int> ClDebug("memprof-debug" , cl::desc("debug" ), cl::Hidden, |
134 | cl::init(Val: 0)); |
135 | |
136 | static cl::opt<std::string> ClDebugFunc("memprof-debug-func" , cl::Hidden, |
137 | cl::desc("Debug func" )); |
138 | |
139 | static cl::opt<int> ClDebugMin("memprof-debug-min" , cl::desc("Debug min inst" ), |
140 | cl::Hidden, cl::init(Val: -1)); |
141 | |
142 | static cl::opt<int> ClDebugMax("memprof-debug-max" , cl::desc("Debug max inst" ), |
143 | cl::Hidden, cl::init(Val: -1)); |
144 | |
145 | // By default disable matching of allocation profiles onto operator new that |
146 | // already explicitly pass a hot/cold hint, since we don't currently |
147 | // override these hints anyway. |
148 | static cl::opt<bool> ClMemProfMatchHotColdNew( |
149 | "memprof-match-hot-cold-new" , |
150 | cl::desc( |
151 | "Match allocation profiles onto existing hot/cold operator new calls" ), |
152 | cl::Hidden, cl::init(Val: false)); |
153 | |
154 | static cl::opt<bool> ClHistogram("memprof-histogram" , |
155 | cl::desc("Collect access count histograms" ), |
156 | cl::Hidden, cl::init(Val: false)); |
157 | |
158 | static cl::opt<bool> |
159 | ClPrintMemProfMatchInfo("memprof-print-match-info" , |
160 | cl::desc("Print matching stats for each allocation " |
161 | "context in this module's profiles" ), |
162 | cl::Hidden, cl::init(Val: false)); |
163 | |
164 | extern cl::opt<bool> MemProfReportHintedSizes; |
165 | |
166 | // Instrumentation statistics |
167 | STATISTIC(NumInstrumentedReads, "Number of instrumented reads" ); |
168 | STATISTIC(NumInstrumentedWrites, "Number of instrumented writes" ); |
169 | STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads" ); |
170 | STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes" ); |
171 | |
172 | // Matching statistics |
173 | STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile." ); |
174 | STATISTIC(NumOfMemProfMismatch, |
175 | "Number of functions having mismatched memory profile hash." ); |
176 | STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile." ); |
177 | STATISTIC(NumOfMemProfAllocContextProfiles, |
178 | "Number of alloc contexts in memory profile." ); |
179 | STATISTIC(NumOfMemProfCallSiteProfiles, |
180 | "Number of callsites in memory profile." ); |
181 | STATISTIC(NumOfMemProfMatchedAllocContexts, |
182 | "Number of matched memory profile alloc contexts." ); |
183 | STATISTIC(NumOfMemProfMatchedAllocs, |
184 | "Number of matched memory profile allocs." ); |
185 | STATISTIC(NumOfMemProfMatchedCallSites, |
186 | "Number of matched memory profile callsites." ); |
187 | |
188 | namespace { |
189 | |
190 | /// This struct defines the shadow mapping using the rule: |
191 | /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. |
192 | struct ShadowMapping { |
193 | ShadowMapping() { |
194 | Scale = ClMappingScale; |
195 | Granularity = ClMappingGranularity; |
196 | Mask = ~(Granularity - 1); |
197 | } |
198 | |
199 | int Scale; |
200 | int Granularity; |
201 | uint64_t Mask; // Computed as ~(Granularity-1) |
202 | }; |
203 | |
204 | static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { |
205 | return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority |
206 | : MemProfCtorAndDtorPriority; |
207 | } |
208 | |
209 | struct InterestingMemoryAccess { |
210 | Value *Addr = nullptr; |
211 | bool IsWrite; |
212 | Type *AccessTy; |
213 | Value *MaybeMask = nullptr; |
214 | }; |
215 | |
216 | /// Instrument the code in module to profile memory accesses. |
217 | class MemProfiler { |
218 | public: |
219 | MemProfiler(Module &M) { |
220 | C = &(M.getContext()); |
221 | LongSize = M.getDataLayout().getPointerSizeInBits(); |
222 | IntptrTy = Type::getIntNTy(C&: *C, N: LongSize); |
223 | PtrTy = PointerType::getUnqual(C&: *C); |
224 | } |
225 | |
226 | /// If it is an interesting memory access, populate information |
227 | /// about the access and return a InterestingMemoryAccess struct. |
228 | /// Otherwise return std::nullopt. |
229 | std::optional<InterestingMemoryAccess> |
230 | isInterestingMemoryAccess(Instruction *I) const; |
231 | |
232 | void instrumentMop(Instruction *I, const DataLayout &DL, |
233 | InterestingMemoryAccess &Access); |
234 | void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, |
235 | Value *Addr, bool IsWrite); |
236 | void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, |
237 | Instruction *I, Value *Addr, Type *AccessTy, |
238 | bool IsWrite); |
239 | void instrumentMemIntrinsic(MemIntrinsic *MI); |
240 | Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); |
241 | bool instrumentFunction(Function &F); |
242 | bool maybeInsertMemProfInitAtFunctionEntry(Function &F); |
243 | bool insertDynamicShadowAtFunctionEntry(Function &F); |
244 | |
245 | private: |
246 | void initializeCallbacks(Module &M); |
247 | |
248 | LLVMContext *C; |
249 | int LongSize; |
250 | Type *IntptrTy; |
251 | PointerType *PtrTy; |
252 | ShadowMapping Mapping; |
253 | |
254 | // These arrays is indexed by AccessIsWrite |
255 | FunctionCallee MemProfMemoryAccessCallback[2]; |
256 | |
257 | FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; |
258 | Value *DynamicShadowOffset = nullptr; |
259 | }; |
260 | |
261 | class ModuleMemProfiler { |
262 | public: |
263 | ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } |
264 | |
265 | bool instrumentModule(Module &); |
266 | |
267 | private: |
268 | Triple TargetTriple; |
269 | ShadowMapping Mapping; |
270 | Function *MemProfCtorFunction = nullptr; |
271 | }; |
272 | |
273 | } // end anonymous namespace |
274 | |
275 | MemProfilerPass::MemProfilerPass() = default; |
276 | |
277 | PreservedAnalyses MemProfilerPass::run(Function &F, |
278 | AnalysisManager<Function> &AM) { |
279 | Module &M = *F.getParent(); |
280 | MemProfiler Profiler(M); |
281 | if (Profiler.instrumentFunction(F)) |
282 | return PreservedAnalyses::none(); |
283 | return PreservedAnalyses::all(); |
284 | } |
285 | |
286 | ModuleMemProfilerPass::ModuleMemProfilerPass() = default; |
287 | |
288 | PreservedAnalyses ModuleMemProfilerPass::run(Module &M, |
289 | AnalysisManager<Module> &AM) { |
290 | |
291 | assert((!ClHistogram || (ClHistogram && ClUseCalls)) && |
292 | "Cannot use -memprof-histogram without Callbacks. Set " |
293 | "memprof-use-callbacks" ); |
294 | |
295 | ModuleMemProfiler Profiler(M); |
296 | if (Profiler.instrumentModule(M)) |
297 | return PreservedAnalyses::none(); |
298 | return PreservedAnalyses::all(); |
299 | } |
300 | |
301 | Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { |
302 | // (Shadow & mask) >> scale |
303 | Shadow = IRB.CreateAnd(LHS: Shadow, RHS: Mapping.Mask); |
304 | Shadow = IRB.CreateLShr(LHS: Shadow, RHS: Mapping.Scale); |
305 | // (Shadow >> scale) | offset |
306 | assert(DynamicShadowOffset); |
307 | return IRB.CreateAdd(LHS: Shadow, RHS: DynamicShadowOffset); |
308 | } |
309 | |
310 | // Instrument memset/memmove/memcpy |
311 | void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { |
312 | IRBuilder<> IRB(MI); |
313 | if (isa<MemTransferInst>(Val: MI)) { |
314 | IRB.CreateCall(Callee: isa<MemMoveInst>(Val: MI) ? MemProfMemmove : MemProfMemcpy, |
315 | Args: {MI->getOperand(i_nocapture: 0), MI->getOperand(i_nocapture: 1), |
316 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)}); |
317 | } else if (isa<MemSetInst>(Val: MI)) { |
318 | IRB.CreateCall( |
319 | Callee: MemProfMemset, |
320 | Args: {MI->getOperand(i_nocapture: 0), |
321 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 1), DestTy: IRB.getInt32Ty(), isSigned: false), |
322 | IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)}); |
323 | } |
324 | MI->eraseFromParent(); |
325 | } |
326 | |
327 | std::optional<InterestingMemoryAccess> |
328 | MemProfiler::isInterestingMemoryAccess(Instruction *I) const { |
329 | // Do not instrument the load fetching the dynamic shadow address. |
330 | if (DynamicShadowOffset == I) |
331 | return std::nullopt; |
332 | |
333 | InterestingMemoryAccess Access; |
334 | |
335 | if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) { |
336 | if (!ClInstrumentReads) |
337 | return std::nullopt; |
338 | Access.IsWrite = false; |
339 | Access.AccessTy = LI->getType(); |
340 | Access.Addr = LI->getPointerOperand(); |
341 | } else if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) { |
342 | if (!ClInstrumentWrites) |
343 | return std::nullopt; |
344 | Access.IsWrite = true; |
345 | Access.AccessTy = SI->getValueOperand()->getType(); |
346 | Access.Addr = SI->getPointerOperand(); |
347 | } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) { |
348 | if (!ClInstrumentAtomics) |
349 | return std::nullopt; |
350 | Access.IsWrite = true; |
351 | Access.AccessTy = RMW->getValOperand()->getType(); |
352 | Access.Addr = RMW->getPointerOperand(); |
353 | } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I)) { |
354 | if (!ClInstrumentAtomics) |
355 | return std::nullopt; |
356 | Access.IsWrite = true; |
357 | Access.AccessTy = XCHG->getCompareOperand()->getType(); |
358 | Access.Addr = XCHG->getPointerOperand(); |
359 | } else if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
360 | auto *F = CI->getCalledFunction(); |
361 | if (F && (F->getIntrinsicID() == Intrinsic::masked_load || |
362 | F->getIntrinsicID() == Intrinsic::masked_store)) { |
363 | unsigned OpOffset = 0; |
364 | if (F->getIntrinsicID() == Intrinsic::masked_store) { |
365 | if (!ClInstrumentWrites) |
366 | return std::nullopt; |
367 | // Masked store has an initial operand for the value. |
368 | OpOffset = 1; |
369 | Access.AccessTy = CI->getArgOperand(i: 0)->getType(); |
370 | Access.IsWrite = true; |
371 | } else { |
372 | if (!ClInstrumentReads) |
373 | return std::nullopt; |
374 | Access.AccessTy = CI->getType(); |
375 | Access.IsWrite = false; |
376 | } |
377 | |
378 | auto *BasePtr = CI->getOperand(i_nocapture: 0 + OpOffset); |
379 | Access.MaybeMask = CI->getOperand(i_nocapture: 2 + OpOffset); |
380 | Access.Addr = BasePtr; |
381 | } |
382 | } |
383 | |
384 | if (!Access.Addr) |
385 | return std::nullopt; |
386 | |
387 | // Do not instrument accesses from different address spaces; we cannot deal |
388 | // with them. |
389 | Type *PtrTy = cast<PointerType>(Val: Access.Addr->getType()->getScalarType()); |
390 | if (PtrTy->getPointerAddressSpace() != 0) |
391 | return std::nullopt; |
392 | |
393 | // Ignore swifterror addresses. |
394 | // swifterror memory addresses are mem2reg promoted by instruction |
395 | // selection. As such they cannot have regular uses like an instrumentation |
396 | // function and it makes no sense to track them as memory. |
397 | if (Access.Addr->isSwiftError()) |
398 | return std::nullopt; |
399 | |
400 | // Peel off GEPs and BitCasts. |
401 | auto *Addr = Access.Addr->stripInBoundsOffsets(); |
402 | |
403 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: Addr)) { |
404 | // Do not instrument PGO counter updates. |
405 | if (GV->hasSection()) { |
406 | StringRef SectionName = GV->getSection(); |
407 | // Check if the global is in the PGO counters section. |
408 | auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); |
409 | if (SectionName.ends_with( |
410 | Suffix: getInstrProfSectionName(IPSK: IPSK_cnts, OF, /*AddSegmentInfo=*/false))) |
411 | return std::nullopt; |
412 | } |
413 | |
414 | // Do not instrument accesses to LLVM internal variables. |
415 | if (GV->getName().starts_with(Prefix: "__llvm" )) |
416 | return std::nullopt; |
417 | } |
418 | |
419 | return Access; |
420 | } |
421 | |
422 | void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, |
423 | Instruction *I, Value *Addr, |
424 | Type *AccessTy, bool IsWrite) { |
425 | auto *VTy = cast<FixedVectorType>(Val: AccessTy); |
426 | unsigned Num = VTy->getNumElements(); |
427 | auto *Zero = ConstantInt::get(Ty: IntptrTy, V: 0); |
428 | for (unsigned Idx = 0; Idx < Num; ++Idx) { |
429 | Value *InstrumentedAddress = nullptr; |
430 | Instruction *InsertBefore = I; |
431 | if (auto *Vector = dyn_cast<ConstantVector>(Val: Mask)) { |
432 | // dyn_cast as we might get UndefValue |
433 | if (auto *Masked = dyn_cast<ConstantInt>(Val: Vector->getOperand(i_nocapture: Idx))) { |
434 | if (Masked->isZero()) |
435 | // Mask is constant false, so no instrumentation needed. |
436 | continue; |
437 | // If we have a true or undef value, fall through to instrumentAddress. |
438 | // with InsertBefore == I |
439 | } |
440 | } else { |
441 | IRBuilder<> IRB(I); |
442 | Value *MaskElem = IRB.CreateExtractElement(Vec: Mask, Idx); |
443 | Instruction *ThenTerm = SplitBlockAndInsertIfThen(Cond: MaskElem, SplitBefore: I, Unreachable: false); |
444 | InsertBefore = ThenTerm; |
445 | } |
446 | |
447 | IRBuilder<> IRB(InsertBefore); |
448 | InstrumentedAddress = |
449 | IRB.CreateGEP(Ty: VTy, Ptr: Addr, IdxList: {Zero, ConstantInt::get(Ty: IntptrTy, V: Idx)}); |
450 | instrumentAddress(OrigIns: I, InsertBefore, Addr: InstrumentedAddress, IsWrite); |
451 | } |
452 | } |
453 | |
454 | void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, |
455 | InterestingMemoryAccess &Access) { |
456 | // Skip instrumentation of stack accesses unless requested. |
457 | if (!ClStack && isa<AllocaInst>(Val: getUnderlyingObject(V: Access.Addr))) { |
458 | if (Access.IsWrite) |
459 | ++NumSkippedStackWrites; |
460 | else |
461 | ++NumSkippedStackReads; |
462 | return; |
463 | } |
464 | |
465 | if (Access.IsWrite) |
466 | NumInstrumentedWrites++; |
467 | else |
468 | NumInstrumentedReads++; |
469 | |
470 | if (Access.MaybeMask) { |
471 | instrumentMaskedLoadOrStore(DL, Mask: Access.MaybeMask, I, Addr: Access.Addr, |
472 | AccessTy: Access.AccessTy, IsWrite: Access.IsWrite); |
473 | } else { |
474 | // Since the access counts will be accumulated across the entire allocation, |
475 | // we only update the shadow access count for the first location and thus |
476 | // don't need to worry about alignment and type size. |
477 | instrumentAddress(OrigIns: I, InsertBefore: I, Addr: Access.Addr, IsWrite: Access.IsWrite); |
478 | } |
479 | } |
480 | |
481 | void MemProfiler::instrumentAddress(Instruction *OrigIns, |
482 | Instruction *InsertBefore, Value *Addr, |
483 | bool IsWrite) { |
484 | IRBuilder<> IRB(InsertBefore); |
485 | Value *AddrLong = IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy); |
486 | |
487 | if (ClUseCalls) { |
488 | IRB.CreateCall(Callee: MemProfMemoryAccessCallback[IsWrite], Args: AddrLong); |
489 | return; |
490 | } |
491 | |
492 | // Create an inline sequence to compute shadow location, and increment the |
493 | // value by one. |
494 | Type *ShadowTy = Type::getInt64Ty(C&: *C); |
495 | Type *ShadowPtrTy = PointerType::get(ElementType: ShadowTy, AddressSpace: 0); |
496 | Value *ShadowPtr = memToShadow(Shadow: AddrLong, IRB); |
497 | Value *ShadowAddr = IRB.CreateIntToPtr(V: ShadowPtr, DestTy: ShadowPtrTy); |
498 | Value *ShadowValue = IRB.CreateLoad(Ty: ShadowTy, Ptr: ShadowAddr); |
499 | Value *Inc = ConstantInt::get(Ty: Type::getInt64Ty(C&: *C), V: 1); |
500 | ShadowValue = IRB.CreateAdd(LHS: ShadowValue, RHS: Inc); |
501 | IRB.CreateStore(Val: ShadowValue, Ptr: ShadowAddr); |
502 | } |
503 | |
504 | // Create the variable for the profile file name. |
505 | void createProfileFileNameVar(Module &M) { |
506 | const MDString *MemProfFilename = |
507 | dyn_cast_or_null<MDString>(Val: M.getModuleFlag(Key: "MemProfProfileFilename" )); |
508 | if (!MemProfFilename) |
509 | return; |
510 | assert(!MemProfFilename->getString().empty() && |
511 | "Unexpected MemProfProfileFilename metadata with empty string" ); |
512 | Constant *ProfileNameConst = ConstantDataArray::getString( |
513 | Context&: M.getContext(), Initializer: MemProfFilename->getString(), AddNull: true); |
514 | GlobalVariable *ProfileNameVar = new GlobalVariable( |
515 | M, ProfileNameConst->getType(), /*isConstant=*/true, |
516 | GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); |
517 | Triple TT(M.getTargetTriple()); |
518 | if (TT.supportsCOMDAT()) { |
519 | ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); |
520 | ProfileNameVar->setComdat(M.getOrInsertComdat(Name: MemProfFilenameVar)); |
521 | } |
522 | } |
523 | |
524 | // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible |
525 | // to the runtime, changing shadow count behavior. |
526 | void createMemprofHistogramFlagVar(Module &M) { |
527 | const StringRef VarName(MemProfHistogramFlagVar); |
528 | Type *IntTy1 = Type::getInt1Ty(C&: M.getContext()); |
529 | auto MemprofHistogramFlag = new GlobalVariable( |
530 | M, IntTy1, true, GlobalValue::WeakAnyLinkage, |
531 | Constant::getIntegerValue(Ty: IntTy1, V: APInt(1, ClHistogram)), VarName); |
532 | Triple TT(M.getTargetTriple()); |
533 | if (TT.supportsCOMDAT()) { |
534 | MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); |
535 | MemprofHistogramFlag->setComdat(M.getOrInsertComdat(Name: VarName)); |
536 | } |
537 | appendToCompilerUsed(M, Values: MemprofHistogramFlag); |
538 | } |
539 | |
540 | bool ModuleMemProfiler::instrumentModule(Module &M) { |
541 | |
542 | // Create a module constructor. |
543 | std::string MemProfVersion = std::to_string(val: LLVM_MEM_PROFILER_VERSION); |
544 | std::string VersionCheckName = |
545 | ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) |
546 | : "" ; |
547 | std::tie(args&: MemProfCtorFunction, args: std::ignore) = |
548 | createSanitizerCtorAndInitFunctions(M, CtorName: MemProfModuleCtorName, |
549 | InitName: MemProfInitName, /*InitArgTypes=*/{}, |
550 | /*InitArgs=*/{}, VersionCheckName); |
551 | |
552 | const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); |
553 | appendToGlobalCtors(M, F: MemProfCtorFunction, Priority); |
554 | |
555 | createProfileFileNameVar(M); |
556 | |
557 | createMemprofHistogramFlagVar(M); |
558 | |
559 | return true; |
560 | } |
561 | |
562 | void MemProfiler::initializeCallbacks(Module &M) { |
563 | IRBuilder<> IRB(*C); |
564 | |
565 | for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { |
566 | const std::string TypeStr = AccessIsWrite ? "store" : "load" ; |
567 | const std::string HistPrefix = ClHistogram ? "hist_" : "" ; |
568 | |
569 | SmallVector<Type *, 2> Args1{1, IntptrTy}; |
570 | MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( |
571 | Name: ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, |
572 | T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args1, isVarArg: false)); |
573 | } |
574 | MemProfMemmove = M.getOrInsertFunction( |
575 | Name: ClMemoryAccessCallbackPrefix + "memmove" , RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy); |
576 | MemProfMemcpy = M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + "memcpy" , |
577 | RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy); |
578 | MemProfMemset = |
579 | M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + "memset" , RetTy: PtrTy, |
580 | Args: PtrTy, Args: IRB.getInt32Ty(), Args: IntptrTy); |
581 | } |
582 | |
583 | bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { |
584 | // For each NSObject descendant having a +load method, this method is invoked |
585 | // by the ObjC runtime before any of the static constructors is called. |
586 | // Therefore we need to instrument such methods with a call to __memprof_init |
587 | // at the beginning in order to initialize our runtime before any access to |
588 | // the shadow memory. |
589 | // We cannot just ignore these methods, because they may call other |
590 | // instrumented functions. |
591 | if (F.getName().contains(Other: " load]" )) { |
592 | FunctionCallee MemProfInitFunction = |
593 | declareSanitizerInitFunction(M&: *F.getParent(), InitName: MemProfInitName, InitArgTypes: {}); |
594 | IRBuilder<> IRB(&F.front(), F.front().begin()); |
595 | IRB.CreateCall(Callee: MemProfInitFunction, Args: {}); |
596 | return true; |
597 | } |
598 | return false; |
599 | } |
600 | |
601 | bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { |
602 | IRBuilder<> IRB(&F.front().front()); |
603 | Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( |
604 | Name: MemProfShadowMemoryDynamicAddress, Ty: IntptrTy); |
605 | if (F.getParent()->getPICLevel() == PICLevel::NotPIC) |
606 | cast<GlobalVariable>(Val: GlobalDynamicAddress)->setDSOLocal(true); |
607 | DynamicShadowOffset = IRB.CreateLoad(Ty: IntptrTy, Ptr: GlobalDynamicAddress); |
608 | return true; |
609 | } |
610 | |
611 | bool MemProfiler::instrumentFunction(Function &F) { |
612 | if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) |
613 | return false; |
614 | if (ClDebugFunc == F.getName()) |
615 | return false; |
616 | if (F.getName().starts_with(Prefix: "__memprof_" )) |
617 | return false; |
618 | |
619 | bool FunctionModified = false; |
620 | |
621 | // If needed, insert __memprof_init. |
622 | // This function needs to be called even if the function body is not |
623 | // instrumented. |
624 | if (maybeInsertMemProfInitAtFunctionEntry(F)) |
625 | FunctionModified = true; |
626 | |
627 | LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n" ); |
628 | |
629 | initializeCallbacks(M&: *F.getParent()); |
630 | |
631 | SmallVector<Instruction *, 16> ToInstrument; |
632 | |
633 | // Fill the set of memory operations to instrument. |
634 | for (auto &BB : F) { |
635 | for (auto &Inst : BB) { |
636 | if (isInterestingMemoryAccess(I: &Inst) || isa<MemIntrinsic>(Val: Inst)) |
637 | ToInstrument.push_back(Elt: &Inst); |
638 | } |
639 | } |
640 | |
641 | if (ToInstrument.empty()) { |
642 | LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified |
643 | << " " << F << "\n" ); |
644 | |
645 | return FunctionModified; |
646 | } |
647 | |
648 | FunctionModified |= insertDynamicShadowAtFunctionEntry(F); |
649 | |
650 | int NumInstrumented = 0; |
651 | for (auto *Inst : ToInstrument) { |
652 | if (ClDebugMin < 0 || ClDebugMax < 0 || |
653 | (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { |
654 | std::optional<InterestingMemoryAccess> Access = |
655 | isInterestingMemoryAccess(I: Inst); |
656 | if (Access) |
657 | instrumentMop(I: Inst, DL: F.getDataLayout(), Access&: *Access); |
658 | else |
659 | instrumentMemIntrinsic(MI: cast<MemIntrinsic>(Val: Inst)); |
660 | } |
661 | NumInstrumented++; |
662 | } |
663 | |
664 | if (NumInstrumented > 0) |
665 | FunctionModified = true; |
666 | |
667 | LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " |
668 | << F << "\n" ); |
669 | |
670 | return FunctionModified; |
671 | } |
672 | |
673 | static void addCallsiteMetadata(Instruction &I, |
674 | std::vector<uint64_t> &InlinedCallStack, |
675 | LLVMContext &Ctx) { |
676 | I.setMetadata(KindID: LLVMContext::MD_callsite, |
677 | Node: buildCallstackMetadata(CallStack: InlinedCallStack, Ctx)); |
678 | } |
679 | |
680 | static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, |
681 | uint32_t Column) { |
682 | llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> |
683 | HashBuilder; |
684 | HashBuilder.add(Args: Function, Args: LineOffset, Args: Column); |
685 | llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
686 | uint64_t Id; |
687 | std::memcpy(dest: &Id, src: Hash.data(), n: sizeof(Hash)); |
688 | return Id; |
689 | } |
690 | |
691 | static uint64_t computeStackId(const memprof::Frame &Frame) { |
692 | return computeStackId(Function: Frame.Function, LineOffset: Frame.LineOffset, Column: Frame.Column); |
693 | } |
694 | |
695 | // Helper to generate a single hash id for a given callstack, used for emitting |
696 | // matching statistics and useful for uniquing such statistics across modules. |
697 | static uint64_t |
698 | computeFullStackId(const std::vector<memprof::Frame> &CallStack) { |
699 | llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> |
700 | HashBuilder; |
701 | for (auto &F : CallStack) |
702 | HashBuilder.add(Args: F.Function, Args: F.LineOffset, Args: F.Column); |
703 | llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
704 | uint64_t Id; |
705 | std::memcpy(dest: &Id, src: Hash.data(), n: sizeof(Hash)); |
706 | return Id; |
707 | } |
708 | |
709 | static AllocationType addCallStack(CallStackTrie &AllocTrie, |
710 | const AllocationInfo *AllocInfo) { |
711 | SmallVector<uint64_t> StackIds; |
712 | for (const auto &StackFrame : AllocInfo->CallStack) |
713 | StackIds.push_back(Elt: computeStackId(Frame: StackFrame)); |
714 | auto AllocType = getAllocType(TotalLifetimeAccessDensity: AllocInfo->Info.getTotalLifetimeAccessDensity(), |
715 | AllocCount: AllocInfo->Info.getAllocCount(), |
716 | TotalLifetime: AllocInfo->Info.getTotalLifetime()); |
717 | uint64_t TotalSize = 0; |
718 | if (MemProfReportHintedSizes) { |
719 | TotalSize = AllocInfo->Info.getTotalSize(); |
720 | assert(TotalSize); |
721 | } |
722 | AllocTrie.addCallStack(AllocType, StackIds, TotalSize); |
723 | return AllocType; |
724 | } |
725 | |
726 | // Helper to compare the InlinedCallStack computed from an instruction's debug |
727 | // info to a list of Frames from profile data (either the allocation data or a |
728 | // callsite). For callsites, the StartIndex to use in the Frame array may be |
729 | // non-zero. |
730 | static bool |
731 | stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, |
732 | ArrayRef<uint64_t> InlinedCallStack, |
733 | unsigned StartIndex = 0) { |
734 | auto StackFrame = ProfileCallStack.begin() + StartIndex; |
735 | auto InlCallStackIter = InlinedCallStack.begin(); |
736 | for (; StackFrame != ProfileCallStack.end() && |
737 | InlCallStackIter != InlinedCallStack.end(); |
738 | ++StackFrame, ++InlCallStackIter) { |
739 | uint64_t StackId = computeStackId(Frame: *StackFrame); |
740 | if (StackId != *InlCallStackIter) |
741 | return false; |
742 | } |
743 | // Return true if we found and matched all stack ids from the call |
744 | // instruction. |
745 | return InlCallStackIter == InlinedCallStack.end(); |
746 | } |
747 | |
748 | static bool isNewWithHotColdVariant(Function *Callee, |
749 | const TargetLibraryInfo &TLI) { |
750 | if (!Callee) |
751 | return false; |
752 | LibFunc Func; |
753 | if (!TLI.getLibFunc(FDecl: *Callee, F&: Func)) |
754 | return false; |
755 | switch (Func) { |
756 | case LibFunc_Znwm: |
757 | case LibFunc_ZnwmRKSt9nothrow_t: |
758 | case LibFunc_ZnwmSt11align_val_t: |
759 | case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: |
760 | case LibFunc_Znam: |
761 | case LibFunc_ZnamRKSt9nothrow_t: |
762 | case LibFunc_ZnamSt11align_val_t: |
763 | case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: |
764 | return true; |
765 | case LibFunc_Znwm12__hot_cold_t: |
766 | case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: |
767 | case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: |
768 | case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: |
769 | case LibFunc_Znam12__hot_cold_t: |
770 | case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: |
771 | case LibFunc_ZnamSt11align_val_t12__hot_cold_t: |
772 | case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: |
773 | return ClMemProfMatchHotColdNew; |
774 | default: |
775 | return false; |
776 | } |
777 | } |
778 | |
779 | struct AllocMatchInfo { |
780 | uint64_t TotalSize = 0; |
781 | AllocationType AllocType = AllocationType::None; |
782 | bool Matched = false; |
783 | }; |
784 | |
785 | static void |
786 | readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, |
787 | const TargetLibraryInfo &TLI, |
788 | std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) { |
789 | auto &Ctx = M.getContext(); |
790 | // Previously we used getIRPGOFuncName() here. If F is local linkage, |
791 | // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But |
792 | // llvm-profdata uses FuncName in dwarf to create GUID which doesn't |
793 | // contain FileName's prefix. It caused local linkage function can't |
794 | // find MemProfRecord. So we use getName() now. |
795 | // 'unique-internal-linkage-names' can make MemProf work better for local |
796 | // linkage function. |
797 | auto FuncName = F.getName(); |
798 | auto FuncGUID = Function::getGUID(GlobalName: FuncName); |
799 | std::optional<memprof::MemProfRecord> MemProfRec; |
800 | auto Err = MemProfReader->getMemProfRecord(FuncNameHash: FuncGUID).moveInto(Value&: MemProfRec); |
801 | if (Err) { |
802 | handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) { |
803 | auto Err = IPE.get(); |
804 | bool SkipWarning = false; |
805 | LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName |
806 | << ": " ); |
807 | if (Err == instrprof_error::unknown_function) { |
808 | NumOfMemProfMissing++; |
809 | SkipWarning = !PGOWarnMissing; |
810 | LLVM_DEBUG(dbgs() << "unknown function" ); |
811 | } else if (Err == instrprof_error::hash_mismatch) { |
812 | NumOfMemProfMismatch++; |
813 | SkipWarning = |
814 | NoPGOWarnMismatch || |
815 | (NoPGOWarnMismatchComdatWeak && |
816 | (F.hasComdat() || |
817 | F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
818 | LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")" ); |
819 | } |
820 | |
821 | if (SkipWarning) |
822 | return; |
823 | |
824 | std::string Msg = (IPE.message() + Twine(" " ) + F.getName().str() + |
825 | Twine(" Hash = " ) + std::to_string(val: FuncGUID)) |
826 | .str(); |
827 | |
828 | Ctx.diagnose( |
829 | DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
830 | }); |
831 | return; |
832 | } |
833 | |
834 | NumOfMemProfFunc++; |
835 | |
836 | // Detect if there are non-zero column numbers in the profile. If not, |
837 | // treat all column numbers as 0 when matching (i.e. ignore any non-zero |
838 | // columns in the IR). The profiled binary might have been built with |
839 | // column numbers disabled, for example. |
840 | bool ProfileHasColumns = false; |
841 | |
842 | // Build maps of the location hash to all profile data with that leaf location |
843 | // (allocation info and the callsites). |
844 | std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; |
845 | // For the callsites we need to record the index of the associated frame in |
846 | // the frame array (see comments below where the map entries are added). |
847 | std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>> |
848 | LocHashToCallSites; |
849 | for (auto &AI : MemProfRec->AllocSites) { |
850 | NumOfMemProfAllocContextProfiles++; |
851 | // Associate the allocation info with the leaf frame. The later matching |
852 | // code will match any inlined call sequences in the IR with a longer prefix |
853 | // of call stack frames. |
854 | uint64_t StackId = computeStackId(Frame: AI.CallStack[0]); |
855 | LocHashToAllocInfo[StackId].insert(x: &AI); |
856 | ProfileHasColumns |= AI.CallStack[0].Column; |
857 | } |
858 | for (auto &CS : MemProfRec->CallSites) { |
859 | NumOfMemProfCallSiteProfiles++; |
860 | // Need to record all frames from leaf up to and including this function, |
861 | // as any of these may or may not have been inlined at this point. |
862 | unsigned Idx = 0; |
863 | for (auto &StackFrame : CS) { |
864 | uint64_t StackId = computeStackId(Frame: StackFrame); |
865 | LocHashToCallSites[StackId].insert(x: std::make_pair(x: &CS, y: Idx++)); |
866 | ProfileHasColumns |= StackFrame.Column; |
867 | // Once we find this function, we can stop recording. |
868 | if (StackFrame.Function == FuncGUID) |
869 | break; |
870 | } |
871 | assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); |
872 | } |
873 | |
874 | auto GetOffset = [](const DILocation *DIL) { |
875 | return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & |
876 | 0xffff; |
877 | }; |
878 | |
879 | // Now walk the instructions, looking up the associated profile data using |
880 | // debug locations. |
881 | for (auto &BB : F) { |
882 | for (auto &I : BB) { |
883 | if (I.isDebugOrPseudoInst()) |
884 | continue; |
885 | // We are only interested in calls (allocation or interior call stack |
886 | // context calls). |
887 | auto *CI = dyn_cast<CallBase>(Val: &I); |
888 | if (!CI) |
889 | continue; |
890 | auto *CalledFunction = CI->getCalledFunction(); |
891 | if (CalledFunction && CalledFunction->isIntrinsic()) |
892 | continue; |
893 | // List of call stack ids computed from the location hashes on debug |
894 | // locations (leaf to inlined at root). |
895 | std::vector<uint64_t> InlinedCallStack; |
896 | // Was the leaf location found in one of the profile maps? |
897 | bool LeafFound = false; |
898 | // If leaf was found in a map, iterators pointing to its location in both |
899 | // of the maps. It might exist in neither, one, or both (the latter case |
900 | // can happen because we don't currently have discriminators to |
901 | // distinguish the case when a single line/col maps to both an allocation |
902 | // and another callsite). |
903 | std::map<uint64_t, std::set<const AllocationInfo *>>::iterator |
904 | AllocInfoIter; |
905 | std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, |
906 | unsigned>>>::iterator CallSitesIter; |
907 | for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; |
908 | DIL = DIL->getInlinedAt()) { |
909 | // Use C++ linkage name if possible. Need to compile with |
910 | // -fdebug-info-for-profiling to get linkage name. |
911 | StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); |
912 | if (Name.empty()) |
913 | Name = DIL->getScope()->getSubprogram()->getName(); |
914 | auto CalleeGUID = Function::getGUID(GlobalName: Name); |
915 | auto StackId = computeStackId(Function: CalleeGUID, LineOffset: GetOffset(DIL), |
916 | Column: ProfileHasColumns ? DIL->getColumn() : 0); |
917 | // Check if we have found the profile's leaf frame. If yes, collect |
918 | // the rest of the call's inlined context starting here. If not, see if |
919 | // we find a match further up the inlined context (in case the profile |
920 | // was missing debug frames at the leaf). |
921 | if (!LeafFound) { |
922 | AllocInfoIter = LocHashToAllocInfo.find(x: StackId); |
923 | CallSitesIter = LocHashToCallSites.find(x: StackId); |
924 | if (AllocInfoIter != LocHashToAllocInfo.end() || |
925 | CallSitesIter != LocHashToCallSites.end()) |
926 | LeafFound = true; |
927 | } |
928 | if (LeafFound) |
929 | InlinedCallStack.push_back(x: StackId); |
930 | } |
931 | // If leaf not in either of the maps, skip inst. |
932 | if (!LeafFound) |
933 | continue; |
934 | |
935 | // First add !memprof metadata from allocation info, if we found the |
936 | // instruction's leaf location in that map, and if the rest of the |
937 | // instruction's locations match the prefix Frame locations on an |
938 | // allocation context with the same leaf. |
939 | if (AllocInfoIter != LocHashToAllocInfo.end()) { |
940 | // Only consider allocations via new, to reduce unnecessary metadata, |
941 | // since those are the only allocations that will be targeted initially. |
942 | if (!isNewWithHotColdVariant(Callee: CI->getCalledFunction(), TLI)) |
943 | continue; |
944 | // We may match this instruction's location list to multiple MIB |
945 | // contexts. Add them to a Trie specialized for trimming the contexts to |
946 | // the minimal needed to disambiguate contexts with unique behavior. |
947 | CallStackTrie AllocTrie; |
948 | for (auto *AllocInfo : AllocInfoIter->second) { |
949 | // Check the full inlined call stack against this one. |
950 | // If we found and thus matched all frames on the call, include |
951 | // this MIB. |
952 | if (stackFrameIncludesInlinedCallStack(ProfileCallStack: AllocInfo->CallStack, |
953 | InlinedCallStack)) { |
954 | NumOfMemProfMatchedAllocContexts++; |
955 | auto AllocType = addCallStack(AllocTrie, AllocInfo); |
956 | // Record information about the allocation if match info printing |
957 | // was requested. |
958 | if (ClPrintMemProfMatchInfo) { |
959 | auto FullStackId = computeFullStackId(CallStack: AllocInfo->CallStack); |
960 | FullStackIdToAllocMatchInfo[FullStackId] = { |
961 | .TotalSize: AllocInfo->Info.getTotalSize(), .AllocType: AllocType, /*Matched=*/true}; |
962 | } |
963 | } |
964 | } |
965 | // We might not have matched any to the full inlined call stack. |
966 | // But if we did, create and attach metadata, or a function attribute if |
967 | // all contexts have identical profiled behavior. |
968 | if (!AllocTrie.empty()) { |
969 | NumOfMemProfMatchedAllocs++; |
970 | // MemprofMDAttached will be false if a function attribute was |
971 | // attached. |
972 | bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); |
973 | assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); |
974 | if (MemprofMDAttached) { |
975 | // Add callsite metadata for the instruction's location list so that |
976 | // it simpler later on to identify which part of the MIB contexts |
977 | // are from this particular instruction (including during inlining, |
978 | // when the callsite metadata will be updated appropriately). |
979 | // FIXME: can this be changed to strip out the matching stack |
980 | // context ids from the MIB contexts and not add any callsite |
981 | // metadata here to save space? |
982 | addCallsiteMetadata(I, InlinedCallStack, Ctx); |
983 | } |
984 | } |
985 | continue; |
986 | } |
987 | |
988 | // Otherwise, add callsite metadata. If we reach here then we found the |
989 | // instruction's leaf location in the callsites map and not the allocation |
990 | // map. |
991 | assert(CallSitesIter != LocHashToCallSites.end()); |
992 | for (auto CallStackIdx : CallSitesIter->second) { |
993 | // If we found and thus matched all frames on the call, create and |
994 | // attach call stack metadata. |
995 | if (stackFrameIncludesInlinedCallStack( |
996 | ProfileCallStack: *CallStackIdx.first, InlinedCallStack, StartIndex: CallStackIdx.second)) { |
997 | NumOfMemProfMatchedCallSites++; |
998 | addCallsiteMetadata(I, InlinedCallStack, Ctx); |
999 | // Only need to find one with a matching call stack and add a single |
1000 | // callsite metadata. |
1001 | break; |
1002 | } |
1003 | } |
1004 | } |
1005 | } |
1006 | } |
1007 | |
1008 | MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, |
1009 | IntrusiveRefCntPtr<vfs::FileSystem> FS) |
1010 | : MemoryProfileFileName(MemoryProfileFile), FS(FS) { |
1011 | if (!FS) |
1012 | this->FS = vfs::getRealFileSystem(); |
1013 | } |
1014 | |
1015 | PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { |
1016 | LLVM_DEBUG(dbgs() << "Read in memory profile:" ); |
1017 | auto &Ctx = M.getContext(); |
1018 | auto ReaderOrErr = IndexedInstrProfReader::create(Path: MemoryProfileFileName, FS&: *FS); |
1019 | if (Error E = ReaderOrErr.takeError()) { |
1020 | handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) { |
1021 | Ctx.diagnose( |
1022 | DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); |
1023 | }); |
1024 | return PreservedAnalyses::all(); |
1025 | } |
1026 | |
1027 | std::unique_ptr<IndexedInstrProfReader> MemProfReader = |
1028 | std::move(ReaderOrErr.get()); |
1029 | if (!MemProfReader) { |
1030 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1031 | MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader" ))); |
1032 | return PreservedAnalyses::all(); |
1033 | } |
1034 | |
1035 | if (!MemProfReader->hasMemoryProfile()) { |
1036 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), |
1037 | "Not a memory profile" )); |
1038 | return PreservedAnalyses::all(); |
1039 | } |
1040 | |
1041 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1042 | |
1043 | // Map from the stack has of each allocation context in the function profiles |
1044 | // to the total profiled size (bytes), allocation type, and whether we matched |
1045 | // it to an allocation in the IR. |
1046 | std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; |
1047 | |
1048 | for (auto &F : M) { |
1049 | if (F.isDeclaration()) |
1050 | continue; |
1051 | |
1052 | const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
1053 | readMemprof(M, F, MemProfReader: MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); |
1054 | } |
1055 | |
1056 | if (ClPrintMemProfMatchInfo) { |
1057 | for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) |
1058 | errs() << "MemProf " << getAllocTypeAttributeString(Type: Info.AllocType) |
1059 | << " context with id " << Id << " has total profiled size " |
1060 | << Info.TotalSize << (Info.Matched ? " is" : " not" ) |
1061 | << " matched\n" ; |
1062 | } |
1063 | |
1064 | return PreservedAnalyses::none(); |
1065 | } |
1066 | |