1//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading MemProf profiling data.
10//
11//===----------------------------------------------------------------------===//
12
13#include <cstdint>
14#include <memory>
15#include <type_traits>
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SetVector.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/Twine.h"
24#include "llvm/DebugInfo/DWARF/DWARFContext.h"
25#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
26#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/BuildID.h"
29#include "llvm/Object/ELFObjectFile.h"
30#include "llvm/Object/ObjectFile.h"
31#include "llvm/ProfileData/InstrProf.h"
32#include "llvm/ProfileData/MemProf.h"
33#include "llvm/ProfileData/MemProfData.inc"
34#include "llvm/ProfileData/MemProfReader.h"
35#include "llvm/ProfileData/MemProfSummaryBuilder.h"
36#include "llvm/ProfileData/MemProfYAML.h"
37#include "llvm/ProfileData/SampleProf.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Support/Endian.h"
40#include "llvm/Support/Error.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/MemoryBuffer.h"
43#include "llvm/Support/Path.h"
44
45#define DEBUG_TYPE "memprof"
46
47namespace llvm {
48namespace memprof {
49namespace {
50template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
51 static_assert(std::is_integral_v<T>, "Not an integral type");
52 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
53 return *reinterpret_cast<const T *>(Ptr);
54}
55
56Error checkBuffer(const MemoryBuffer &Buffer) {
57 if (!RawMemProfReader::hasFormat(DataBuffer: Buffer))
58 return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
59
60 if (Buffer.getBufferSize() == 0)
61 return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile);
62
63 if (Buffer.getBufferSize() < sizeof(Header)) {
64 return make_error<InstrProfError>(Args: instrprof_error::truncated);
65 }
66
67 // The size of the buffer can be > header total size since we allow repeated
68 // serialization of memprof profiles to the same file.
69 uint64_t TotalSize = 0;
70 const char *Next = Buffer.getBufferStart();
71 while (Next < Buffer.getBufferEnd()) {
72 const auto *H = reinterpret_cast<const Header *>(Next);
73
74 // Check if the version in header is among the supported versions.
75 bool IsSupported = false;
76 for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) {
77 if (H->Version == SupportedVersion)
78 IsSupported = true;
79 }
80 if (!IsSupported) {
81 return make_error<InstrProfError>(Args: instrprof_error::unsupported_version);
82 }
83
84 TotalSize += H->TotalSize;
85 Next += H->TotalSize;
86 }
87
88 if (Buffer.getBufferSize() != TotalSize) {
89 return make_error<InstrProfError>(Args: instrprof_error::malformed);
90 }
91 return Error::success();
92}
93
94llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
95 using namespace support;
96
97 const uint64_t NumItemsToRead =
98 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
99 llvm::SmallVector<SegmentEntry> Items;
100 for (uint64_t I = 0; I < NumItemsToRead; I++) {
101 Items.push_back(Elt: *reinterpret_cast<const SegmentEntry *>(
102 Ptr + I * sizeof(SegmentEntry)));
103 }
104 return Items;
105}
106
107llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
108readMemInfoBlocksV3(const char *Ptr) {
109 using namespace support;
110
111 const uint64_t NumItemsToRead =
112 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
113
114 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
115 for (uint64_t I = 0; I < NumItemsToRead; I++) {
116 const uint64_t Id =
117 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
118
119 // We cheat a bit here and remove the const from cast to set the
120 // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and
121 // V4 do not have the same fields. V3 is missing AccessHistogramSize and
122 // AccessHistogram. This means we read "dirty" data in here, but it should
123 // not segfault, since there will be callstack data placed after this in the
124 // binary format.
125 MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
126 // Overwrite dirty data.
127 MIB.AccessHistogramSize = 0;
128 MIB.AccessHistogram = 0;
129
130 Items.push_back(Elt: {Id, MIB});
131 // Only increment by the size of MIB in V3.
132 Ptr += MEMPROF_V3_MIB_SIZE;
133 }
134 return Items;
135}
136
137llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
138readMemInfoBlocksCommon(const char *Ptr, bool IsHistogramEncoded = false) {
139 using namespace support;
140
141 const uint64_t NumItemsToRead =
142 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
143
144 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
145 for (uint64_t I = 0; I < NumItemsToRead; I++) {
146 const uint64_t Id =
147 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
148
149 MemInfoBlock MIB;
150#define READ_MIB_FIELD(FIELD) \
151 MIB.FIELD = endian::readNext<decltype(MIB.FIELD), llvm::endianness::little, \
152 unaligned>(Ptr)
153
154 READ_MIB_FIELD(AllocCount);
155 READ_MIB_FIELD(TotalAccessCount);
156 READ_MIB_FIELD(MinAccessCount);
157 READ_MIB_FIELD(MaxAccessCount);
158 READ_MIB_FIELD(TotalSize);
159 READ_MIB_FIELD(MinSize);
160 READ_MIB_FIELD(MaxSize);
161 READ_MIB_FIELD(AllocTimestamp);
162 READ_MIB_FIELD(DeallocTimestamp);
163 READ_MIB_FIELD(TotalLifetime);
164 READ_MIB_FIELD(MinLifetime);
165 READ_MIB_FIELD(MaxLifetime);
166 READ_MIB_FIELD(AllocCpuId);
167 READ_MIB_FIELD(DeallocCpuId);
168 READ_MIB_FIELD(NumMigratedCpu);
169 READ_MIB_FIELD(NumLifetimeOverlaps);
170 READ_MIB_FIELD(NumSameAllocCpu);
171 READ_MIB_FIELD(NumSameDeallocCpu);
172 READ_MIB_FIELD(DataTypeId);
173 READ_MIB_FIELD(TotalAccessDensity);
174 READ_MIB_FIELD(MinAccessDensity);
175 READ_MIB_FIELD(MaxAccessDensity);
176 READ_MIB_FIELD(TotalLifetimeAccessDensity);
177 READ_MIB_FIELD(MinLifetimeAccessDensity);
178 READ_MIB_FIELD(MaxLifetimeAccessDensity);
179 READ_MIB_FIELD(AccessHistogramSize);
180 READ_MIB_FIELD(AccessHistogram);
181#undef READ_MIB_FIELD
182
183 if (MIB.AccessHistogramSize > 0) {
184 // The in-memory representation uses uint64_t for histogram entries.
185 MIB.AccessHistogram =
186 (uintptr_t)malloc(size: MIB.AccessHistogramSize * sizeof(uint64_t));
187 for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
188 if (!IsHistogramEncoded) {
189 ((uint64_t *)MIB.AccessHistogram)[J] =
190 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
191 memory&: Ptr);
192 } else {
193 // The encoded on-disk format (V5 onwards) uses uint16_t.
194 const uint16_t Val =
195 endian::readNext<uint16_t, llvm::endianness::little, unaligned>(
196 memory&: Ptr);
197 ((uint64_t *)MIB.AccessHistogram)[J] = decodeHistogramCount(EncodedValue: Val);
198 }
199 }
200 }
201 Items.push_back(Elt: {Id, MIB});
202 }
203 return Items;
204}
205
206llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
207readMemInfoBlocksV4(const char *Ptr) {
208 return readMemInfoBlocksCommon(Ptr);
209}
210
211llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
212readMemInfoBlocksV5(const char *Ptr) {
213 return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true);
214}
215
216CallStackMap readStackInfo(const char *Ptr) {
217 using namespace support;
218
219 const uint64_t NumItemsToRead =
220 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
221 CallStackMap Items;
222
223 for (uint64_t I = 0; I < NumItemsToRead; I++) {
224 const uint64_t StackId =
225 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
226 const uint64_t NumPCs =
227 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
228
229 SmallVector<uint64_t> CallStack;
230 CallStack.reserve(N: NumPCs);
231 for (uint64_t J = 0; J < NumPCs; J++) {
232 CallStack.push_back(
233 Elt: endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr));
234 }
235
236 Items[StackId] = CallStack;
237 }
238 return Items;
239}
240
241// Merges the contents of stack information in \p From to \p To. Returns true if
242// any stack ids observed previously map to a different set of program counter
243// addresses.
244bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
245 for (const auto &[Id, Stack] : From) {
246 auto [It, Inserted] = To.try_emplace(Key: Id, Args: Stack);
247 // Check that the PCs are the same (in order).
248 if (!Inserted && Stack != It->second)
249 return true;
250 }
251 return false;
252}
253
254Error report(Error E, const StringRef Context) {
255 return joinErrors(E1: createStringError(EC: inconvertibleErrorCode(), S: Context),
256 E2: std::move(E));
257}
258
259bool isRuntimePath(const StringRef Path) {
260 const StringRef Filename = llvm::sys::path::filename(path: Path);
261 // This list should be updated in case new files with additional interceptors
262 // are added to the memprof runtime.
263 return Filename == "memprof_malloc_linux.cpp" ||
264 Filename == "memprof_interceptors.cpp" ||
265 Filename == "memprof_new_delete.cpp";
266}
267
268std::string getBuildIdString(const SegmentEntry &Entry) {
269 // If the build id is unset print a helpful string instead of all zeros.
270 if (Entry.BuildIdSize == 0)
271 return "<None>";
272
273 std::string Str;
274 raw_string_ostream OS(Str);
275 for (size_t I = 0; I < Entry.BuildIdSize; I++) {
276 OS << format_hex_no_prefix(N: Entry.BuildId[I], Width: 2);
277 }
278 return OS.str();
279}
280} // namespace
281
282Expected<std::unique_ptr<RawMemProfReader>>
283RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
284 bool KeepName) {
285 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path);
286 if (std::error_code EC = BufferOr.getError())
287 return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef());
288
289 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
290 return create(Buffer: std::move(Buffer), ProfiledBinary, KeepName);
291}
292
293Expected<std::unique_ptr<RawMemProfReader>>
294RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
295 const StringRef ProfiledBinary, bool KeepName) {
296 if (Error E = checkBuffer(Buffer: *Buffer))
297 return report(E: std::move(E), Context: Buffer->getBufferIdentifier());
298
299 if (ProfiledBinary.empty()) {
300 // Peek the build ids to print a helpful error message.
301 const std::vector<std::string> BuildIds = peekBuildIds(DataBuffer: Buffer.get());
302 std::string ErrorMessage(
303 R"(Path to profiled binary is empty, expected binary with one of the following build ids:
304)");
305 for (const auto &Id : BuildIds) {
306 ErrorMessage += "\n BuildId: ";
307 ErrorMessage += Id;
308 }
309 return report(
310 E: make_error<StringError>(Args&: ErrorMessage, Args: inconvertibleErrorCode()),
311 /*Context=*/"");
312 }
313
314 auto BinaryOr = llvm::object::createBinary(Path: ProfiledBinary);
315 if (!BinaryOr) {
316 return report(E: BinaryOr.takeError(), Context: ProfiledBinary);
317 }
318
319 // Use new here since constructor is private.
320 std::unique_ptr<RawMemProfReader> Reader(
321 new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
322 if (Error E = Reader->initialize(DataBuffer: std::move(Buffer))) {
323 return std::move(E);
324 }
325 return std::move(Reader);
326}
327
328// We need to make sure that all leftover MIB histograms that have not been
329// freed by merge are freed here.
330RawMemProfReader::~RawMemProfReader() {
331 for (auto &[_, MIB] : CallstackProfileData) {
332 if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) {
333 free(ptr: (void *)MIB.AccessHistogram);
334 }
335 }
336}
337
338bool RawMemProfReader::hasFormat(const StringRef Path) {
339 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path);
340 if (!BufferOr)
341 return false;
342
343 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
344 return hasFormat(DataBuffer: *Buffer);
345}
346
347bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
348 if (Buffer.getBufferSize() < sizeof(uint64_t))
349 return false;
350 // Aligned read to sanity check that the buffer was allocated with at least 8b
351 // alignment.
352 const uint64_t Magic = alignedRead(Ptr: Buffer.getBufferStart());
353 return Magic == MEMPROF_RAW_MAGIC_64;
354}
355
356void RawMemProfReader::printYAML(raw_ostream &OS) {
357 MemProfSummaryBuilder MemProfSumBuilder;
358 uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
359 for (const auto &KV : MemProfData.Records) {
360 MemProfSumBuilder.addRecord(KV.second);
361 const size_t NumAllocSites = KV.second.AllocSites.size();
362 if (NumAllocSites > 0) {
363 NumAllocFunctions++;
364 NumMibInfo += NumAllocSites;
365 }
366 }
367
368 // Print the summary first, as it is printed as YAML comments.
369 auto MemProfSum = MemProfSumBuilder.getSummary();
370 MemProfSum->printSummaryYaml(OS);
371
372 OS << "MemprofProfile:\n";
373 OS << " Summary:\n";
374 OS << " Version: " << MemprofRawVersion << "\n";
375 OS << " NumSegments: " << SegmentInfo.size() << "\n";
376 OS << " NumMibInfo: " << NumMibInfo << "\n";
377 OS << " NumAllocFunctions: " << NumAllocFunctions << "\n";
378 OS << " NumStackOffsets: " << StackMap.size() << "\n";
379 // Print out the segment information.
380 OS << " Segments:\n";
381 for (const auto &Entry : SegmentInfo) {
382 OS << " -\n";
383 OS << " BuildId: " << getBuildIdString(Entry) << "\n";
384 OS << " Start: 0x" << llvm::utohexstr(X: Entry.Start) << "\n";
385 OS << " End: 0x" << llvm::utohexstr(X: Entry.End) << "\n";
386 OS << " Offset: 0x" << llvm::utohexstr(X: Entry.Offset) << "\n";
387 }
388 // Print out the merged contents of the profiles.
389 OS << " Records:\n";
390 for (const auto &[GUID, Record] : *this) {
391 OS << " -\n";
392 OS << " FunctionGUID: " << GUID << "\n";
393 Record.print(OS);
394 }
395}
396
397Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
398 const StringRef FileName = Binary.getBinary()->getFileName();
399
400 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Val: Binary.getBinary());
401 if (!ElfObject) {
402 return report(E: make_error<StringError>(Args: Twine("Not an ELF file: "),
403 Args: inconvertibleErrorCode()),
404 Context: FileName);
405 }
406
407 // Check whether the profiled binary was built with position independent code
408 // (PIC). Perform sanity checks for assumptions we rely on to simplify
409 // symbolization.
410 auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(Val: ElfObject);
411 const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile();
412 auto PHdrsOr = ElfFile.program_headers();
413 if (!PHdrsOr)
414 return report(
415 E: make_error<StringError>(Args: Twine("Could not read program headers: "),
416 Args: inconvertibleErrorCode()),
417 Context: FileName);
418
419 int NumExecutableSegments = 0;
420 for (const auto &Phdr : *PHdrsOr) {
421 if (Phdr.p_type == ELF::PT_LOAD) {
422 if (Phdr.p_flags & ELF::PF_X) {
423 // We assume only one text segment in the main binary for simplicity and
424 // reduce the overhead of checking multiple ranges during symbolization.
425 if (++NumExecutableSegments > 1) {
426 return report(
427 E: make_error<StringError>(
428 Args: "Expect only one executable load segment in the binary",
429 Args: inconvertibleErrorCode()),
430 Context: FileName);
431 }
432 // Segment will always be loaded at a page boundary, expect it to be
433 // aligned already. Assume 4K pagesize for the machine from which the
434 // profile has been collected. This should be fine for now, in case we
435 // want to support other pagesizes it can be recorded in the raw profile
436 // during collection.
437 PreferredTextSegmentAddress = Phdr.p_vaddr;
438 assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
439 "Expect p_vaddr to always be page aligned");
440 assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization.");
441 }
442 }
443 }
444
445 auto Triple = ElfObject->makeTriple();
446 if (!Triple.isX86())
447 return report(E: make_error<StringError>(Args: Twine("Unsupported target: ") +
448 Triple.getArchName(),
449 Args: inconvertibleErrorCode()),
450 Context: FileName);
451
452 // Process the raw profile.
453 if (Error E = readRawProfile(DataBuffer: std::move(DataBuffer)))
454 return E;
455
456 if (Error E = setupForSymbolization())
457 return E;
458
459 auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary());
460 std::unique_ptr<DIContext> Context = DWARFContext::create(
461 Obj: *Object, RelocAction: DWARFContext::ProcessDebugRelocations::Process);
462
463 auto SOFOr = symbolize::SymbolizableObjectFile::create(
464 Obj: Object, DICtx: std::move(Context), /*UntagAddresses=*/false);
465 if (!SOFOr)
466 return report(E: SOFOr.takeError(), Context: FileName);
467 auto Symbolizer = std::move(SOFOr.get());
468
469 // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
470 // that it is freed automatically at the end, when it is no longer used. This
471 // reduces peak memory since it won't be live while also mapping the raw
472 // profile into records afterwards.
473 if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Symbolizer)))
474 return E;
475
476 return mapRawProfileToRecords();
477}
478
479Error RawMemProfReader::setupForSymbolization() {
480 auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary());
481 object::BuildIDRef BinaryId = object::getBuildID(Obj: Object);
482 if (BinaryId.empty())
483 return make_error<StringError>(Args: Twine("No build id found in binary ") +
484 Binary.getBinary()->getFileName(),
485 Args: inconvertibleErrorCode());
486
487 int NumMatched = 0;
488 for (const auto &Entry : SegmentInfo) {
489 llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
490 if (BinaryId == SegmentId) {
491 // We assume only one text segment in the main binary for simplicity and
492 // reduce the overhead of checking multiple ranges during symbolization.
493 if (++NumMatched > 1) {
494 return make_error<StringError>(
495 Args: "We expect only one executable segment in the profiled binary",
496 Args: inconvertibleErrorCode());
497 }
498 ProfiledTextSegmentStart = Entry.Start;
499 ProfiledTextSegmentEnd = Entry.End;
500 }
501 }
502 if (NumMatched == 0)
503 return make_error<StringError>(
504 Args: Twine("No matching executable segments found in binary ") +
505 Binary.getBinary()->getFileName(),
506 Args: inconvertibleErrorCode());
507 assert((PreferredTextSegmentAddress == 0 ||
508 (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
509 "Expect text segment address to be 0 or equal to profiled text "
510 "segment start.");
511 return Error::success();
512}
513
514Error RawMemProfReader::mapRawProfileToRecords() {
515 // Hold a mapping from function to each callsite location we encounter within
516 // it that is part of some dynamic allocation context. The location is stored
517 // as a pointer to a symbolized list of inline frames.
518 using LocationPtr = const llvm::SmallVector<FrameId> *;
519 llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
520 PerFunctionCallSites;
521
522 // Convert the raw profile callstack data into memprof records. While doing so
523 // keep track of related contexts so that we can fill these in later.
524 for (const auto &[StackId, MIB] : CallstackProfileData) {
525 auto It = StackMap.find(Val: StackId);
526 if (It == StackMap.end())
527 return make_error<InstrProfError>(
528 Args: instrprof_error::malformed,
529 Args: "memprof callstack record does not contain id: " + Twine(StackId));
530
531 // Construct the symbolized callstack.
532 llvm::SmallVector<FrameId> Callstack;
533 Callstack.reserve(N: It->getSecond().size());
534
535 llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
536 for (size_t I = 0; I < Addresses.size(); I++) {
537 const uint64_t Address = Addresses[I];
538 assert(SymbolizedFrame.count(Address) > 0 &&
539 "Address not found in SymbolizedFrame map");
540 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
541
542 assert(!idToFrame(Frames.back()).IsInlineFrame &&
543 "The last frame should not be inlined");
544
545 // Record the callsites for each function. Skip the first frame of the
546 // first address since it is the allocation site itself that is recorded
547 // as an alloc site.
548 for (size_t J = 0; J < Frames.size(); J++) {
549 if (I == 0 && J == 0)
550 continue;
551 // We attach the entire bottom-up frame here for the callsite even
552 // though we only need the frames up to and including the frame for
553 // Frames[J].Function. This will enable better deduplication for
554 // compression in the future.
555 const GlobalValue::GUID Guid = idToFrame(Id: Frames[J]).Function;
556 PerFunctionCallSites[Guid].insert(X: &Frames);
557 }
558
559 // Add all the frames to the current allocation callstack.
560 Callstack.append(in_start: Frames.begin(), in_end: Frames.end());
561 }
562
563 CallStackId CSId = MemProfData.addCallStack(CS: Callstack);
564
565 // We attach the memprof record to each function bottom-up including the
566 // first non-inline frame.
567 for (size_t I = 0; /*Break out using the condition below*/; I++) {
568 const Frame &F = idToFrame(Id: Callstack[I]);
569 IndexedMemProfRecord &Record = MemProfData.Records[F.Function];
570 Record.AllocSites.emplace_back(Args&: CSId, Args: MIB);
571
572 if (!F.IsInlineFrame)
573 break;
574 }
575 }
576
577 // Fill in the related callsites per function.
578 for (const auto &[Id, Locs] : PerFunctionCallSites) {
579 // Some functions may have only callsite data and no allocation data. Here
580 // we insert a new entry for callsite data if we need to.
581 IndexedMemProfRecord &Record = MemProfData.Records[Id];
582 for (LocationPtr Loc : Locs)
583 Record.CallSites.emplace_back(Args: MemProfData.addCallStack(CS: *Loc));
584 }
585
586 return Error::success();
587}
588
589Error RawMemProfReader::symbolizeAndFilterStackFrames(
590 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
591 // The specifier to use when symbolization is requested.
592 const DILineInfoSpecifier Specifier(
593 DILineInfoSpecifier::FileLineInfoKind::RawValue,
594 DILineInfoSpecifier::FunctionNameKind::LinkageName);
595
596 // For entries where all PCs in the callstack are discarded, we erase the
597 // entry from the stack map.
598 llvm::SmallVector<uint64_t> EntriesToErase;
599 // We keep track of all prior discarded entries so that we can avoid invoking
600 // the symbolizer for such entries.
601 llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
602 for (auto &Entry : StackMap) {
603 for (const uint64_t VAddr : Entry.getSecond()) {
604 // Check if we have already symbolized and cached the result or if we
605 // don't want to attempt symbolization since we know this address is bad.
606 // In this case the address is also removed from the current callstack.
607 if (SymbolizedFrame.count(Val: VAddr) > 0 ||
608 AllVAddrsToDiscard.contains(V: VAddr))
609 continue;
610
611 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
612 ModuleOffset: getModuleOffset(VirtualAddress: VAddr), LineInfoSpecifier: Specifier, /*UseSymbolTable=*/false);
613 if (!DIOr)
614 return DIOr.takeError();
615 DIInliningInfo DI = DIOr.get();
616
617 // Drop frames which we can't symbolize or if they belong to the runtime.
618 if (DI.getFrame(Index: 0).FunctionName == DILineInfo::BadString ||
619 isRuntimePath(Path: DI.getFrame(Index: 0).FileName)) {
620 AllVAddrsToDiscard.insert(V: VAddr);
621 continue;
622 }
623
624 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
625 I++) {
626 const auto &DIFrame = DI.getFrame(Index: I);
627 const uint64_t Guid = memprof::getGUID(FunctionName: DIFrame.FunctionName);
628 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
629 // Only the last entry is not an inlined location.
630 I != NumFrames - 1);
631 // Here we retain a mapping from the GUID to canonical symbol name
632 // instead of adding it to the frame object directly to reduce memory
633 // overhead. This is because there can be many unique frames,
634 // particularly for callsite frames.
635 if (KeepSymbolName) {
636 StringRef CanonicalName =
637 sampleprof::FunctionSamples::getCanonicalFnName(
638 FnName: DIFrame.FunctionName);
639 GuidToSymbolName.insert(KV: {Guid, CanonicalName.str()});
640 }
641
642 SymbolizedFrame[VAddr].push_back(Elt: MemProfData.addFrame(F));
643 }
644 }
645
646 auto &CallStack = Entry.getSecond();
647 llvm::erase_if(C&: CallStack, P: [&AllVAddrsToDiscard](const uint64_t A) {
648 return AllVAddrsToDiscard.contains(V: A);
649 });
650 if (CallStack.empty())
651 EntriesToErase.push_back(Elt: Entry.getFirst());
652 }
653
654 // Drop the entries where the callstack is empty.
655 for (const uint64_t Id : EntriesToErase) {
656 StackMap.erase(Val: Id);
657 if (auto It = CallstackProfileData.find(Key: Id);
658 It != CallstackProfileData.end()) {
659 if (It->second.AccessHistogramSize > 0)
660 free(ptr: (void *)It->second.AccessHistogram);
661 CallstackProfileData.erase(Iterator: It);
662 }
663 }
664
665 if (StackMap.empty())
666 return make_error<InstrProfError>(
667 Args: instrprof_error::malformed,
668 Args: "no entries in callstack map after symbolization");
669
670 return Error::success();
671}
672
673std::vector<std::string>
674RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
675 const char *Next = DataBuffer->getBufferStart();
676 // Use a SetVector since a profile file may contain multiple raw profile
677 // dumps, each with segment information. We want them unique and in order they
678 // were stored in the profile; the profiled binary should be the first entry.
679 // The runtime uses dl_iterate_phdr and the "... first object visited by
680 // callback is the main program."
681 // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
682 llvm::SetVector<std::string, std::vector<std::string>,
683 llvm::SmallSet<std::string, 10>>
684 BuildIds;
685 while (Next < DataBuffer->getBufferEnd()) {
686 const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
687
688 const llvm::SmallVector<SegmentEntry> Entries =
689 readSegmentEntries(Ptr: Next + Header->SegmentOffset);
690
691 for (const auto &Entry : Entries)
692 BuildIds.insert(X: getBuildIdString(Entry));
693
694 Next += Header->TotalSize;
695 }
696 return BuildIds.takeVector();
697}
698
699// FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This
700// will help being able to deserialize different versions raw memprof versions
701// more easily.
702llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
703RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
704 if (MemprofRawVersion == 3ULL)
705 return readMemInfoBlocksV3(Ptr);
706 if (MemprofRawVersion == 4ULL)
707 return readMemInfoBlocksV4(Ptr);
708 if (MemprofRawVersion == 5ULL)
709 return readMemInfoBlocksV5(Ptr);
710 llvm_unreachable(
711 "Panic: Unsupported version number when reading MemInfoBlocks");
712}
713
714Error RawMemProfReader::readRawProfile(
715 std::unique_ptr<MemoryBuffer> DataBuffer) {
716 const char *Next = DataBuffer->getBufferStart();
717
718 while (Next < DataBuffer->getBufferEnd()) {
719 const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
720
721 // Set Reader version to memprof raw version of profile. Checking if version
722 // is supported is checked before creating the reader.
723 MemprofRawVersion = Header->Version;
724
725 // Read in the segment information, check whether its the same across all
726 // profiles in this binary file.
727 const llvm::SmallVector<SegmentEntry> Entries =
728 readSegmentEntries(Ptr: Next + Header->SegmentOffset);
729 if (!SegmentInfo.empty() && SegmentInfo != Entries) {
730 // We do not expect segment information to change when deserializing from
731 // the same binary profile file. This can happen if dynamic libraries are
732 // loaded/unloaded between profile dumping.
733 return make_error<InstrProfError>(
734 Args: instrprof_error::malformed,
735 Args: "memprof raw profile has different segment information");
736 }
737 SegmentInfo.assign(in_start: Entries.begin(), in_end: Entries.end());
738
739 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
740 // raw profiles in the same binary file are from the same process so the
741 // stackdepot ids are the same.
742 for (const auto &[Id, MIB] : readMemInfoBlocks(Ptr: Next + Header->MIBOffset)) {
743 if (CallstackProfileData.count(Key: Id)) {
744
745 if (MemprofRawVersion >= 4ULL &&
746 (CallstackProfileData[Id].AccessHistogramSize > 0 ||
747 MIB.AccessHistogramSize > 0)) {
748 uintptr_t ShorterHistogram;
749 if (CallstackProfileData[Id].AccessHistogramSize >
750 MIB.AccessHistogramSize)
751 ShorterHistogram = MIB.AccessHistogram;
752 else
753 ShorterHistogram = CallstackProfileData[Id].AccessHistogram;
754 CallstackProfileData[Id].Merge(newMIB: MIB);
755 free(ptr: (void *)ShorterHistogram);
756 } else {
757 CallstackProfileData[Id].Merge(newMIB: MIB);
758 }
759 } else {
760 CallstackProfileData[Id] = MIB;
761 }
762 }
763
764 // Read in the callstack for each ids. For multiple raw profiles in the same
765 // file, we expect that the callstack is the same for a unique id.
766 const CallStackMap CSM = readStackInfo(Ptr: Next + Header->StackOffset);
767 if (StackMap.empty()) {
768 StackMap = CSM;
769 } else {
770 if (mergeStackMap(From: CSM, To&: StackMap))
771 return make_error<InstrProfError>(
772 Args: instrprof_error::malformed,
773 Args: "memprof raw profile got different call stack for same id");
774 }
775
776 Next += Header->TotalSize;
777 }
778
779 return Error::success();
780}
781
782object::SectionedAddress
783RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
784 if (VirtualAddress > ProfiledTextSegmentStart &&
785 VirtualAddress <= ProfiledTextSegmentEnd) {
786 // For PIE binaries, the preferred address is zero and we adjust the virtual
787 // address by start of the profiled segment assuming that the offset of the
788 // segment in the binary is zero. For non-PIE binaries the preferred and
789 // profiled segment addresses should be equal and this is a no-op.
790 const uint64_t AdjustedAddress =
791 VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
792 return object::SectionedAddress{.Address: AdjustedAddress};
793 }
794 // Addresses which do not originate from the profiled text segment in the
795 // binary are not adjusted. These will fail symbolization and be filtered out
796 // during processing.
797 return object::SectionedAddress{.Address: VirtualAddress};
798}
799
800Error RawMemProfReader::readNextRecord(
801 GuidMemProfRecordPair &GuidRecord,
802 std::function<const Frame(const FrameId)> Callback) {
803 // Create a new callback for the RawMemProfRecord iterator so that we can
804 // provide the symbol name if the reader was initialized with KeepSymbolName =
805 // true. This is useful for debugging and testing.
806 auto IdToFrameCallback = [this](const FrameId Id) {
807 Frame F = this->idToFrame(Id);
808 if (!this->KeepSymbolName)
809 return F;
810 auto Iter = this->GuidToSymbolName.find(Val: F.Function);
811 assert(Iter != this->GuidToSymbolName.end());
812 F.SymbolName = std::make_unique<std::string>(args&: Iter->getSecond());
813 return F;
814 };
815 return MemProfReader::readNextRecord(GuidRecord, Callback: IdToFrameCallback);
816}
817
818Expected<std::unique_ptr<YAMLMemProfReader>>
819YAMLMemProfReader::create(const Twine &Path) {
820 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true);
821 if (std::error_code EC = BufferOr.getError())
822 return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef());
823
824 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
825 return create(Buffer: std::move(Buffer));
826}
827
828Expected<std::unique_ptr<YAMLMemProfReader>>
829YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
830 auto Reader = std::make_unique<YAMLMemProfReader>();
831 Reader->parse(YAMLData: Buffer->getBuffer());
832 return std::move(Reader);
833}
834
835bool YAMLMemProfReader::hasFormat(const StringRef Path) {
836 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true);
837 if (!BufferOr)
838 return false;
839
840 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
841 return hasFormat(DataBuffer: *Buffer);
842}
843
844bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
845 return Buffer.getBuffer().starts_with(Prefix: "---");
846}
847
848void YAMLMemProfReader::parse(StringRef YAMLData) {
849 memprof::AllMemProfData Doc;
850 yaml::Input Yin(YAMLData);
851
852 Yin >> Doc;
853 if (Yin.error())
854 return;
855
856 // Add a call stack to MemProfData.CallStacks and return its CallStackId.
857 auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId {
858 SmallVector<FrameId> IndexedCallStack;
859 IndexedCallStack.reserve(N: CallStack.size());
860 for (const Frame &F : CallStack)
861 IndexedCallStack.push_back(Elt: MemProfData.addFrame(F));
862 return MemProfData.addCallStack(CS: std::move(IndexedCallStack));
863 };
864
865 for (const auto &[GUID, Record] : Doc.HeapProfileRecords) {
866 IndexedMemProfRecord IndexedRecord;
867
868 // Convert AllocationInfo to IndexedAllocationInfo.
869 for (const AllocationInfo &AI : Record.AllocSites) {
870 CallStackId CSId = AddCallStack(AI.CallStack);
871 IndexedRecord.AllocSites.emplace_back(Args&: CSId, Args: AI.Info);
872 }
873
874 // Populate CallSites with CalleeGuids.
875 for (const auto &CallSite : Record.CallSites) {
876 CallStackId CSId = AddCallStack(CallSite.Frames);
877 IndexedRecord.CallSites.emplace_back(Args&: CSId, Args: CallSite.CalleeGuids);
878 }
879
880 MemProfData.Records.try_emplace(Key: GUID, Args: std::move(IndexedRecord));
881 }
882
883 if (Doc.YamlifiedDataAccessProfiles.isEmpty())
884 return;
885
886 auto ToSymHandleRef =
887 [](const memprof::SymbolHandle &Handle) -> memprof::SymbolHandleRef {
888 if (std::holds_alternative<std::string>(v: Handle))
889 return StringRef(std::get<std::string>(v: Handle));
890 return std::get<uint64_t>(v: Handle);
891 };
892
893 auto DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
894 for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records)
895 if (Error E = DataAccessProfileData->setDataAccessProfile(
896 SymbolID: ToSymHandleRef(Record.SymHandle), AccessCount: Record.AccessCount,
897 Locations: Record.Locations))
898 reportFatalInternalError(Err: std::move(E));
899
900 for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdStrHashes)
901 if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Hash))
902 reportFatalInternalError(Err: std::move(E));
903
904 for (const std::string &Sym :
905 Doc.YamlifiedDataAccessProfiles.KnownColdSymbols)
906 if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Sym))
907 reportFatalInternalError(Err: std::move(E));
908
909 setDataAccessProfileData(std::move(DataAccessProfileData));
910}
911} // namespace memprof
912} // namespace llvm
913