1 | //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for reading MemProf profiling data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include <cstdint> |
14 | #include <memory> |
15 | #include <type_traits> |
16 | |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringExtras.h" |
23 | #include "llvm/ADT/Twine.h" |
24 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
25 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
26 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
27 | #include "llvm/Object/Binary.h" |
28 | #include "llvm/Object/BuildID.h" |
29 | #include "llvm/Object/ELFObjectFile.h" |
30 | #include "llvm/Object/ObjectFile.h" |
31 | #include "llvm/ProfileData/InstrProf.h" |
32 | #include "llvm/ProfileData/MemProf.h" |
33 | #include "llvm/ProfileData/MemProfData.inc" |
34 | #include "llvm/ProfileData/MemProfReader.h" |
35 | #include "llvm/ProfileData/MemProfSummaryBuilder.h" |
36 | #include "llvm/ProfileData/MemProfYAML.h" |
37 | #include "llvm/ProfileData/SampleProf.h" |
38 | #include "llvm/Support/Debug.h" |
39 | #include "llvm/Support/Endian.h" |
40 | #include "llvm/Support/Error.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/MemoryBuffer.h" |
43 | #include "llvm/Support/Path.h" |
44 | |
45 | #define DEBUG_TYPE "memprof" |
46 | |
47 | namespace llvm { |
48 | namespace memprof { |
49 | namespace { |
50 | template <class T = uint64_t> inline T alignedRead(const char *Ptr) { |
51 | static_assert(std::is_integral_v<T>, "Not an integral type" ); |
52 | assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read" ); |
53 | return *reinterpret_cast<const T *>(Ptr); |
54 | } |
55 | |
56 | Error checkBuffer(const MemoryBuffer &Buffer) { |
57 | if (!RawMemProfReader::hasFormat(DataBuffer: Buffer)) |
58 | return make_error<InstrProfError>(Args: instrprof_error::bad_magic); |
59 | |
60 | if (Buffer.getBufferSize() == 0) |
61 | return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile); |
62 | |
63 | if (Buffer.getBufferSize() < sizeof(Header)) { |
64 | return make_error<InstrProfError>(Args: instrprof_error::truncated); |
65 | } |
66 | |
67 | // The size of the buffer can be > header total size since we allow repeated |
68 | // serialization of memprof profiles to the same file. |
69 | uint64_t TotalSize = 0; |
70 | const char *Next = Buffer.getBufferStart(); |
71 | while (Next < Buffer.getBufferEnd()) { |
72 | const auto *H = reinterpret_cast<const Header *>(Next); |
73 | |
74 | // Check if the version in header is among the supported versions. |
75 | bool IsSupported = false; |
76 | for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) { |
77 | if (H->Version == SupportedVersion) |
78 | IsSupported = true; |
79 | } |
80 | if (!IsSupported) { |
81 | return make_error<InstrProfError>(Args: instrprof_error::unsupported_version); |
82 | } |
83 | |
84 | TotalSize += H->TotalSize; |
85 | Next += H->TotalSize; |
86 | } |
87 | |
88 | if (Buffer.getBufferSize() != TotalSize) { |
89 | return make_error<InstrProfError>(Args: instrprof_error::malformed); |
90 | } |
91 | return Error::success(); |
92 | } |
93 | |
94 | llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { |
95 | using namespace support; |
96 | |
97 | const uint64_t NumItemsToRead = |
98 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
99 | llvm::SmallVector<SegmentEntry> Items; |
100 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
101 | Items.push_back(Elt: *reinterpret_cast<const SegmentEntry *>( |
102 | Ptr + I * sizeof(SegmentEntry))); |
103 | } |
104 | return Items; |
105 | } |
106 | |
107 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
108 | readMemInfoBlocksV3(const char *Ptr) { |
109 | using namespace support; |
110 | |
111 | const uint64_t NumItemsToRead = |
112 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
113 | |
114 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
115 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
116 | const uint64_t Id = |
117 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
118 | |
119 | // We cheat a bit here and remove the const from cast to set the |
120 | // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and |
121 | // V4 do not have the same fields. V3 is missing AccessHistogramSize and |
122 | // AccessHistogram. This means we read "dirty" data in here, but it should |
123 | // not segfault, since there will be callstack data placed after this in the |
124 | // binary format. |
125 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
126 | // Overwrite dirty data. |
127 | MIB.AccessHistogramSize = 0; |
128 | MIB.AccessHistogram = 0; |
129 | |
130 | Items.push_back(Elt: {Id, MIB}); |
131 | // Only increment by the size of MIB in V3. |
132 | Ptr += MEMPROF_V3_MIB_SIZE; |
133 | } |
134 | return Items; |
135 | } |
136 | |
137 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
138 | readMemInfoBlocksV4(const char *Ptr) { |
139 | using namespace support; |
140 | |
141 | const uint64_t NumItemsToRead = |
142 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
143 | |
144 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
145 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
146 | const uint64_t Id = |
147 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
148 | // We cheat a bit here and remove the const from cast to set the |
149 | // Histogram Pointer to newly allocated buffer. |
150 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
151 | |
152 | // Only increment by size of MIB since readNext implicitly increments. |
153 | Ptr += sizeof(MemInfoBlock); |
154 | |
155 | if (MIB.AccessHistogramSize > 0) { |
156 | MIB.AccessHistogram = |
157 | (uintptr_t)malloc(size: MIB.AccessHistogramSize * sizeof(uint64_t)); |
158 | } |
159 | |
160 | for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { |
161 | ((uint64_t *)MIB.AccessHistogram)[J] = |
162 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
163 | } |
164 | Items.push_back(Elt: {Id, MIB}); |
165 | } |
166 | return Items; |
167 | } |
168 | |
169 | CallStackMap readStackInfo(const char *Ptr) { |
170 | using namespace support; |
171 | |
172 | const uint64_t NumItemsToRead = |
173 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
174 | CallStackMap Items; |
175 | |
176 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
177 | const uint64_t StackId = |
178 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
179 | const uint64_t NumPCs = |
180 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
181 | |
182 | SmallVector<uint64_t> CallStack; |
183 | CallStack.reserve(N: NumPCs); |
184 | for (uint64_t J = 0; J < NumPCs; J++) { |
185 | CallStack.push_back( |
186 | Elt: endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr)); |
187 | } |
188 | |
189 | Items[StackId] = CallStack; |
190 | } |
191 | return Items; |
192 | } |
193 | |
194 | // Merges the contents of stack information in \p From to \p To. Returns true if |
195 | // any stack ids observed previously map to a different set of program counter |
196 | // addresses. |
197 | bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { |
198 | for (const auto &[Id, Stack] : From) { |
199 | auto [It, Inserted] = To.try_emplace(Key: Id, Args: Stack); |
200 | // Check that the PCs are the same (in order). |
201 | if (!Inserted && Stack != It->second) |
202 | return true; |
203 | } |
204 | return false; |
205 | } |
206 | |
207 | Error report(Error E, const StringRef Context) { |
208 | return joinErrors(E1: createStringError(EC: inconvertibleErrorCode(), S: Context), |
209 | E2: std::move(E)); |
210 | } |
211 | |
212 | bool isRuntimePath(const StringRef Path) { |
213 | const StringRef Filename = llvm::sys::path::filename(path: Path); |
214 | // This list should be updated in case new files with additional interceptors |
215 | // are added to the memprof runtime. |
216 | return Filename == "memprof_malloc_linux.cpp" || |
217 | Filename == "memprof_interceptors.cpp" || |
218 | Filename == "memprof_new_delete.cpp" ; |
219 | } |
220 | |
221 | std::string getBuildIdString(const SegmentEntry &Entry) { |
222 | // If the build id is unset print a helpful string instead of all zeros. |
223 | if (Entry.BuildIdSize == 0) |
224 | return "<None>" ; |
225 | |
226 | std::string Str; |
227 | raw_string_ostream OS(Str); |
228 | for (size_t I = 0; I < Entry.BuildIdSize; I++) { |
229 | OS << format_hex_no_prefix(N: Entry.BuildId[I], Width: 2); |
230 | } |
231 | return OS.str(); |
232 | } |
233 | } // namespace |
234 | |
235 | Expected<std::unique_ptr<RawMemProfReader>> |
236 | RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, |
237 | bool KeepName) { |
238 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
239 | if (std::error_code EC = BufferOr.getError()) |
240 | return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef()); |
241 | |
242 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
243 | return create(Buffer: std::move(Buffer), ProfiledBinary, KeepName); |
244 | } |
245 | |
246 | Expected<std::unique_ptr<RawMemProfReader>> |
247 | RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, |
248 | const StringRef ProfiledBinary, bool KeepName) { |
249 | if (Error E = checkBuffer(Buffer: *Buffer)) |
250 | return report(E: std::move(E), Context: Buffer->getBufferIdentifier()); |
251 | |
252 | if (ProfiledBinary.empty()) { |
253 | // Peek the build ids to print a helpful error message. |
254 | const std::vector<std::string> BuildIds = peekBuildIds(DataBuffer: Buffer.get()); |
255 | std::string ErrorMessage( |
256 | R"(Path to profiled binary is empty, expected binary with one of the following build ids: |
257 | )" ); |
258 | for (const auto &Id : BuildIds) { |
259 | ErrorMessage += "\n BuildId: " ; |
260 | ErrorMessage += Id; |
261 | } |
262 | return report( |
263 | E: make_error<StringError>(Args&: ErrorMessage, Args: inconvertibleErrorCode()), |
264 | /*Context=*/"" ); |
265 | } |
266 | |
267 | auto BinaryOr = llvm::object::createBinary(Path: ProfiledBinary); |
268 | if (!BinaryOr) { |
269 | return report(E: BinaryOr.takeError(), Context: ProfiledBinary); |
270 | } |
271 | |
272 | // Use new here since constructor is private. |
273 | std::unique_ptr<RawMemProfReader> Reader( |
274 | new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); |
275 | if (Error E = Reader->initialize(DataBuffer: std::move(Buffer))) { |
276 | return std::move(E); |
277 | } |
278 | return std::move(Reader); |
279 | } |
280 | |
281 | // We need to make sure that all leftover MIB histograms that have not been |
282 | // freed by merge are freed here. |
283 | RawMemProfReader::~RawMemProfReader() { |
284 | for (auto &[_, MIB] : CallstackProfileData) { |
285 | if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) { |
286 | free(ptr: (void *)MIB.AccessHistogram); |
287 | } |
288 | } |
289 | } |
290 | |
291 | bool RawMemProfReader::hasFormat(const StringRef Path) { |
292 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
293 | if (!BufferOr) |
294 | return false; |
295 | |
296 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
297 | return hasFormat(DataBuffer: *Buffer); |
298 | } |
299 | |
300 | bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { |
301 | if (Buffer.getBufferSize() < sizeof(uint64_t)) |
302 | return false; |
303 | // Aligned read to sanity check that the buffer was allocated with at least 8b |
304 | // alignment. |
305 | const uint64_t Magic = alignedRead(Ptr: Buffer.getBufferStart()); |
306 | return Magic == MEMPROF_RAW_MAGIC_64; |
307 | } |
308 | |
309 | void RawMemProfReader::printYAML(raw_ostream &OS) { |
310 | MemProfSummaryBuilder MemProfSumBuilder; |
311 | uint64_t NumAllocFunctions = 0, NumMibInfo = 0; |
312 | for (const auto &KV : MemProfData.Records) { |
313 | MemProfSumBuilder.addRecord(KV.second); |
314 | const size_t NumAllocSites = KV.second.AllocSites.size(); |
315 | if (NumAllocSites > 0) { |
316 | NumAllocFunctions++; |
317 | NumMibInfo += NumAllocSites; |
318 | } |
319 | } |
320 | |
321 | // Print the summary first, as it is printed as YAML comments. |
322 | auto MemProfSum = MemProfSumBuilder.getSummary(); |
323 | MemProfSum->printSummaryYaml(OS); |
324 | |
325 | OS << "MemprofProfile:\n" ; |
326 | OS << " Summary:\n" ; |
327 | OS << " Version: " << MemprofRawVersion << "\n" ; |
328 | OS << " NumSegments: " << SegmentInfo.size() << "\n" ; |
329 | OS << " NumMibInfo: " << NumMibInfo << "\n" ; |
330 | OS << " NumAllocFunctions: " << NumAllocFunctions << "\n" ; |
331 | OS << " NumStackOffsets: " << StackMap.size() << "\n" ; |
332 | // Print out the segment information. |
333 | OS << " Segments:\n" ; |
334 | for (const auto &Entry : SegmentInfo) { |
335 | OS << " -\n" ; |
336 | OS << " BuildId: " << getBuildIdString(Entry) << "\n" ; |
337 | OS << " Start: 0x" << llvm::utohexstr(X: Entry.Start) << "\n" ; |
338 | OS << " End: 0x" << llvm::utohexstr(X: Entry.End) << "\n" ; |
339 | OS << " Offset: 0x" << llvm::utohexstr(X: Entry.Offset) << "\n" ; |
340 | } |
341 | // Print out the merged contents of the profiles. |
342 | OS << " Records:\n" ; |
343 | for (const auto &[GUID, Record] : *this) { |
344 | OS << " -\n" ; |
345 | OS << " FunctionGUID: " << GUID << "\n" ; |
346 | Record.print(OS); |
347 | } |
348 | } |
349 | |
350 | Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { |
351 | const StringRef FileName = Binary.getBinary()->getFileName(); |
352 | |
353 | auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Val: Binary.getBinary()); |
354 | if (!ElfObject) { |
355 | return report(E: make_error<StringError>(Args: Twine("Not an ELF file: " ), |
356 | Args: inconvertibleErrorCode()), |
357 | Context: FileName); |
358 | } |
359 | |
360 | // Check whether the profiled binary was built with position independent code |
361 | // (PIC). Perform sanity checks for assumptions we rely on to simplify |
362 | // symbolization. |
363 | auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(Val: ElfObject); |
364 | const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); |
365 | auto PHdrsOr = ElfFile.program_headers(); |
366 | if (!PHdrsOr) |
367 | return report( |
368 | E: make_error<StringError>(Args: Twine("Could not read program headers: " ), |
369 | Args: inconvertibleErrorCode()), |
370 | Context: FileName); |
371 | |
372 | int NumExecutableSegments = 0; |
373 | for (const auto &Phdr : *PHdrsOr) { |
374 | if (Phdr.p_type == ELF::PT_LOAD) { |
375 | if (Phdr.p_flags & ELF::PF_X) { |
376 | // We assume only one text segment in the main binary for simplicity and |
377 | // reduce the overhead of checking multiple ranges during symbolization. |
378 | if (++NumExecutableSegments > 1) { |
379 | return report( |
380 | E: make_error<StringError>( |
381 | Args: "Expect only one executable load segment in the binary" , |
382 | Args: inconvertibleErrorCode()), |
383 | Context: FileName); |
384 | } |
385 | // Segment will always be loaded at a page boundary, expect it to be |
386 | // aligned already. Assume 4K pagesize for the machine from which the |
387 | // profile has been collected. This should be fine for now, in case we |
388 | // want to support other pagesizes it can be recorded in the raw profile |
389 | // during collection. |
390 | PreferredTextSegmentAddress = Phdr.p_vaddr; |
391 | assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && |
392 | "Expect p_vaddr to always be page aligned" ); |
393 | assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization." ); |
394 | } |
395 | } |
396 | } |
397 | |
398 | auto Triple = ElfObject->makeTriple(); |
399 | if (!Triple.isX86()) |
400 | return report(E: make_error<StringError>(Args: Twine("Unsupported target: " ) + |
401 | Triple.getArchName(), |
402 | Args: inconvertibleErrorCode()), |
403 | Context: FileName); |
404 | |
405 | // Process the raw profile. |
406 | if (Error E = readRawProfile(DataBuffer: std::move(DataBuffer))) |
407 | return E; |
408 | |
409 | if (Error E = setupForSymbolization()) |
410 | return E; |
411 | |
412 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
413 | std::unique_ptr<DIContext> Context = DWARFContext::create( |
414 | Obj: *Object, RelocAction: DWARFContext::ProcessDebugRelocations::Process); |
415 | |
416 | auto SOFOr = symbolize::SymbolizableObjectFile::create( |
417 | Obj: Object, DICtx: std::move(Context), /*UntagAddresses=*/false); |
418 | if (!SOFOr) |
419 | return report(E: SOFOr.takeError(), Context: FileName); |
420 | auto Symbolizer = std::move(SOFOr.get()); |
421 | |
422 | // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so |
423 | // that it is freed automatically at the end, when it is no longer used. This |
424 | // reduces peak memory since it won't be live while also mapping the raw |
425 | // profile into records afterwards. |
426 | if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Symbolizer))) |
427 | return E; |
428 | |
429 | return mapRawProfileToRecords(); |
430 | } |
431 | |
432 | Error RawMemProfReader::setupForSymbolization() { |
433 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
434 | object::BuildIDRef BinaryId = object::getBuildID(Obj: Object); |
435 | if (BinaryId.empty()) |
436 | return make_error<StringError>(Args: Twine("No build id found in binary " ) + |
437 | Binary.getBinary()->getFileName(), |
438 | Args: inconvertibleErrorCode()); |
439 | |
440 | int NumMatched = 0; |
441 | for (const auto &Entry : SegmentInfo) { |
442 | llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); |
443 | if (BinaryId == SegmentId) { |
444 | // We assume only one text segment in the main binary for simplicity and |
445 | // reduce the overhead of checking multiple ranges during symbolization. |
446 | if (++NumMatched > 1) { |
447 | return make_error<StringError>( |
448 | Args: "We expect only one executable segment in the profiled binary" , |
449 | Args: inconvertibleErrorCode()); |
450 | } |
451 | ProfiledTextSegmentStart = Entry.Start; |
452 | ProfiledTextSegmentEnd = Entry.End; |
453 | } |
454 | } |
455 | if (NumMatched == 0) |
456 | return make_error<StringError>( |
457 | Args: Twine("No matching executable segments found in binary " ) + |
458 | Binary.getBinary()->getFileName(), |
459 | Args: inconvertibleErrorCode()); |
460 | assert((PreferredTextSegmentAddress == 0 || |
461 | (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && |
462 | "Expect text segment address to be 0 or equal to profiled text " |
463 | "segment start." ); |
464 | return Error::success(); |
465 | } |
466 | |
467 | Error RawMemProfReader::mapRawProfileToRecords() { |
468 | // Hold a mapping from function to each callsite location we encounter within |
469 | // it that is part of some dynamic allocation context. The location is stored |
470 | // as a pointer to a symbolized list of inline frames. |
471 | using LocationPtr = const llvm::SmallVector<FrameId> *; |
472 | llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> |
473 | PerFunctionCallSites; |
474 | |
475 | // Convert the raw profile callstack data into memprof records. While doing so |
476 | // keep track of related contexts so that we can fill these in later. |
477 | for (const auto &[StackId, MIB] : CallstackProfileData) { |
478 | auto It = StackMap.find(Val: StackId); |
479 | if (It == StackMap.end()) |
480 | return make_error<InstrProfError>( |
481 | Args: instrprof_error::malformed, |
482 | Args: "memprof callstack record does not contain id: " + Twine(StackId)); |
483 | |
484 | // Construct the symbolized callstack. |
485 | llvm::SmallVector<FrameId> Callstack; |
486 | Callstack.reserve(N: It->getSecond().size()); |
487 | |
488 | llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); |
489 | for (size_t I = 0; I < Addresses.size(); I++) { |
490 | const uint64_t Address = Addresses[I]; |
491 | assert(SymbolizedFrame.count(Address) > 0 && |
492 | "Address not found in SymbolizedFrame map" ); |
493 | const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; |
494 | |
495 | assert(!idToFrame(Frames.back()).IsInlineFrame && |
496 | "The last frame should not be inlined" ); |
497 | |
498 | // Record the callsites for each function. Skip the first frame of the |
499 | // first address since it is the allocation site itself that is recorded |
500 | // as an alloc site. |
501 | for (size_t J = 0; J < Frames.size(); J++) { |
502 | if (I == 0 && J == 0) |
503 | continue; |
504 | // We attach the entire bottom-up frame here for the callsite even |
505 | // though we only need the frames up to and including the frame for |
506 | // Frames[J].Function. This will enable better deduplication for |
507 | // compression in the future. |
508 | const GlobalValue::GUID Guid = idToFrame(Id: Frames[J]).Function; |
509 | PerFunctionCallSites[Guid].insert(X: &Frames); |
510 | } |
511 | |
512 | // Add all the frames to the current allocation callstack. |
513 | Callstack.append(in_start: Frames.begin(), in_end: Frames.end()); |
514 | } |
515 | |
516 | CallStackId CSId = MemProfData.addCallStack(CS: Callstack); |
517 | |
518 | // We attach the memprof record to each function bottom-up including the |
519 | // first non-inline frame. |
520 | for (size_t I = 0; /*Break out using the condition below*/; I++) { |
521 | const Frame &F = idToFrame(Id: Callstack[I]); |
522 | IndexedMemProfRecord &Record = MemProfData.Records[F.Function]; |
523 | Record.AllocSites.emplace_back(Args&: CSId, Args: MIB); |
524 | |
525 | if (!F.IsInlineFrame) |
526 | break; |
527 | } |
528 | } |
529 | |
530 | // Fill in the related callsites per function. |
531 | for (const auto &[Id, Locs] : PerFunctionCallSites) { |
532 | // Some functions may have only callsite data and no allocation data. Here |
533 | // we insert a new entry for callsite data if we need to. |
534 | IndexedMemProfRecord &Record = MemProfData.Records[Id]; |
535 | for (LocationPtr Loc : Locs) |
536 | Record.CallSites.emplace_back(Args: MemProfData.addCallStack(CS: *Loc)); |
537 | } |
538 | |
539 | return Error::success(); |
540 | } |
541 | |
542 | Error RawMemProfReader::symbolizeAndFilterStackFrames( |
543 | std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { |
544 | // The specifier to use when symbolization is requested. |
545 | const DILineInfoSpecifier Specifier( |
546 | DILineInfoSpecifier::FileLineInfoKind::RawValue, |
547 | DILineInfoSpecifier::FunctionNameKind::LinkageName); |
548 | |
549 | // For entries where all PCs in the callstack are discarded, we erase the |
550 | // entry from the stack map. |
551 | llvm::SmallVector<uint64_t> EntriesToErase; |
552 | // We keep track of all prior discarded entries so that we can avoid invoking |
553 | // the symbolizer for such entries. |
554 | llvm::DenseSet<uint64_t> AllVAddrsToDiscard; |
555 | for (auto &Entry : StackMap) { |
556 | for (const uint64_t VAddr : Entry.getSecond()) { |
557 | // Check if we have already symbolized and cached the result or if we |
558 | // don't want to attempt symbolization since we know this address is bad. |
559 | // In this case the address is also removed from the current callstack. |
560 | if (SymbolizedFrame.count(Val: VAddr) > 0 || |
561 | AllVAddrsToDiscard.contains(V: VAddr)) |
562 | continue; |
563 | |
564 | Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( |
565 | ModuleOffset: getModuleOffset(VirtualAddress: VAddr), LineInfoSpecifier: Specifier, /*UseSymbolTable=*/false); |
566 | if (!DIOr) |
567 | return DIOr.takeError(); |
568 | DIInliningInfo DI = DIOr.get(); |
569 | |
570 | // Drop frames which we can't symbolize or if they belong to the runtime. |
571 | if (DI.getFrame(Index: 0).FunctionName == DILineInfo::BadString || |
572 | isRuntimePath(Path: DI.getFrame(Index: 0).FileName)) { |
573 | AllVAddrsToDiscard.insert(V: VAddr); |
574 | continue; |
575 | } |
576 | |
577 | for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; |
578 | I++) { |
579 | const auto &DIFrame = DI.getFrame(Index: I); |
580 | const uint64_t Guid = memprof::getGUID(FunctionName: DIFrame.FunctionName); |
581 | const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, |
582 | // Only the last entry is not an inlined location. |
583 | I != NumFrames - 1); |
584 | // Here we retain a mapping from the GUID to canonical symbol name |
585 | // instead of adding it to the frame object directly to reduce memory |
586 | // overhead. This is because there can be many unique frames, |
587 | // particularly for callsite frames. |
588 | if (KeepSymbolName) { |
589 | StringRef CanonicalName = |
590 | sampleprof::FunctionSamples::getCanonicalFnName( |
591 | FnName: DIFrame.FunctionName); |
592 | GuidToSymbolName.insert(KV: {Guid, CanonicalName.str()}); |
593 | } |
594 | |
595 | SymbolizedFrame[VAddr].push_back(Elt: MemProfData.addFrame(F)); |
596 | } |
597 | } |
598 | |
599 | auto &CallStack = Entry.getSecond(); |
600 | llvm::erase_if(C&: CallStack, P: [&AllVAddrsToDiscard](const uint64_t A) { |
601 | return AllVAddrsToDiscard.contains(V: A); |
602 | }); |
603 | if (CallStack.empty()) |
604 | EntriesToErase.push_back(Elt: Entry.getFirst()); |
605 | } |
606 | |
607 | // Drop the entries where the callstack is empty. |
608 | for (const uint64_t Id : EntriesToErase) { |
609 | StackMap.erase(Val: Id); |
610 | if (auto It = CallstackProfileData.find(Key: Id); |
611 | It != CallstackProfileData.end()) { |
612 | if (It->second.AccessHistogramSize > 0) |
613 | free(ptr: (void *)It->second.AccessHistogram); |
614 | CallstackProfileData.erase(Iterator: It); |
615 | } |
616 | } |
617 | |
618 | if (StackMap.empty()) |
619 | return make_error<InstrProfError>( |
620 | Args: instrprof_error::malformed, |
621 | Args: "no entries in callstack map after symbolization" ); |
622 | |
623 | return Error::success(); |
624 | } |
625 | |
626 | std::vector<std::string> |
627 | RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { |
628 | const char *Next = DataBuffer->getBufferStart(); |
629 | // Use a SetVector since a profile file may contain multiple raw profile |
630 | // dumps, each with segment information. We want them unique and in order they |
631 | // were stored in the profile; the profiled binary should be the first entry. |
632 | // The runtime uses dl_iterate_phdr and the "... first object visited by |
633 | // callback is the main program." |
634 | // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html |
635 | llvm::SetVector<std::string, std::vector<std::string>, |
636 | llvm::SmallSet<std::string, 10>> |
637 | BuildIds; |
638 | while (Next < DataBuffer->getBufferEnd()) { |
639 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
640 | |
641 | const llvm::SmallVector<SegmentEntry> Entries = |
642 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
643 | |
644 | for (const auto &Entry : Entries) |
645 | BuildIds.insert(X: getBuildIdString(Entry)); |
646 | |
647 | Next += Header->TotalSize; |
648 | } |
649 | return BuildIds.takeVector(); |
650 | } |
651 | |
652 | // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This |
653 | // will help being able to deserialize different versions raw memprof versions |
654 | // more easily. |
655 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
656 | RawMemProfReader::readMemInfoBlocks(const char *Ptr) { |
657 | if (MemprofRawVersion == 3ULL) |
658 | return readMemInfoBlocksV3(Ptr); |
659 | if (MemprofRawVersion == 4ULL) |
660 | return readMemInfoBlocksV4(Ptr); |
661 | llvm_unreachable( |
662 | "Panic: Unsupported version number when reading MemInfoBlocks" ); |
663 | } |
664 | |
665 | Error RawMemProfReader::readRawProfile( |
666 | std::unique_ptr<MemoryBuffer> DataBuffer) { |
667 | const char *Next = DataBuffer->getBufferStart(); |
668 | |
669 | while (Next < DataBuffer->getBufferEnd()) { |
670 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
671 | |
672 | // Set Reader version to memprof raw version of profile. Checking if version |
673 | // is supported is checked before creating the reader. |
674 | MemprofRawVersion = Header->Version; |
675 | |
676 | // Read in the segment information, check whether its the same across all |
677 | // profiles in this binary file. |
678 | const llvm::SmallVector<SegmentEntry> Entries = |
679 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
680 | if (!SegmentInfo.empty() && SegmentInfo != Entries) { |
681 | // We do not expect segment information to change when deserializing from |
682 | // the same binary profile file. This can happen if dynamic libraries are |
683 | // loaded/unloaded between profile dumping. |
684 | return make_error<InstrProfError>( |
685 | Args: instrprof_error::malformed, |
686 | Args: "memprof raw profile has different segment information" ); |
687 | } |
688 | SegmentInfo.assign(in_start: Entries.begin(), in_end: Entries.end()); |
689 | |
690 | // Read in the MemInfoBlocks. Merge them based on stack id - we assume that |
691 | // raw profiles in the same binary file are from the same process so the |
692 | // stackdepot ids are the same. |
693 | for (const auto &[Id, MIB] : readMemInfoBlocks(Ptr: Next + Header->MIBOffset)) { |
694 | if (CallstackProfileData.count(Key: Id)) { |
695 | |
696 | if (MemprofRawVersion >= 4ULL && |
697 | (CallstackProfileData[Id].AccessHistogramSize > 0 || |
698 | MIB.AccessHistogramSize > 0)) { |
699 | uintptr_t ShorterHistogram; |
700 | if (CallstackProfileData[Id].AccessHistogramSize > |
701 | MIB.AccessHistogramSize) |
702 | ShorterHistogram = MIB.AccessHistogram; |
703 | else |
704 | ShorterHistogram = CallstackProfileData[Id].AccessHistogram; |
705 | CallstackProfileData[Id].Merge(newMIB: MIB); |
706 | free(ptr: (void *)ShorterHistogram); |
707 | } else { |
708 | CallstackProfileData[Id].Merge(newMIB: MIB); |
709 | } |
710 | } else { |
711 | CallstackProfileData[Id] = MIB; |
712 | } |
713 | } |
714 | |
715 | // Read in the callstack for each ids. For multiple raw profiles in the same |
716 | // file, we expect that the callstack is the same for a unique id. |
717 | const CallStackMap CSM = readStackInfo(Ptr: Next + Header->StackOffset); |
718 | if (StackMap.empty()) { |
719 | StackMap = CSM; |
720 | } else { |
721 | if (mergeStackMap(From: CSM, To&: StackMap)) |
722 | return make_error<InstrProfError>( |
723 | Args: instrprof_error::malformed, |
724 | Args: "memprof raw profile got different call stack for same id" ); |
725 | } |
726 | |
727 | Next += Header->TotalSize; |
728 | } |
729 | |
730 | return Error::success(); |
731 | } |
732 | |
733 | object::SectionedAddress |
734 | RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { |
735 | if (VirtualAddress > ProfiledTextSegmentStart && |
736 | VirtualAddress <= ProfiledTextSegmentEnd) { |
737 | // For PIE binaries, the preferred address is zero and we adjust the virtual |
738 | // address by start of the profiled segment assuming that the offset of the |
739 | // segment in the binary is zero. For non-PIE binaries the preferred and |
740 | // profiled segment addresses should be equal and this is a no-op. |
741 | const uint64_t AdjustedAddress = |
742 | VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; |
743 | return object::SectionedAddress{.Address: AdjustedAddress}; |
744 | } |
745 | // Addresses which do not originate from the profiled text segment in the |
746 | // binary are not adjusted. These will fail symbolization and be filtered out |
747 | // during processing. |
748 | return object::SectionedAddress{.Address: VirtualAddress}; |
749 | } |
750 | |
751 | Error RawMemProfReader::readNextRecord( |
752 | GuidMemProfRecordPair &GuidRecord, |
753 | std::function<const Frame(const FrameId)> Callback) { |
754 | // Create a new callback for the RawMemProfRecord iterator so that we can |
755 | // provide the symbol name if the reader was initialized with KeepSymbolName = |
756 | // true. This is useful for debugging and testing. |
757 | auto IdToFrameCallback = [this](const FrameId Id) { |
758 | Frame F = this->idToFrame(Id); |
759 | if (!this->KeepSymbolName) |
760 | return F; |
761 | auto Iter = this->GuidToSymbolName.find(Val: F.Function); |
762 | assert(Iter != this->GuidToSymbolName.end()); |
763 | F.SymbolName = std::make_unique<std::string>(args&: Iter->getSecond()); |
764 | return F; |
765 | }; |
766 | return MemProfReader::readNextRecord(GuidRecord, Callback: IdToFrameCallback); |
767 | } |
768 | |
769 | Expected<std::unique_ptr<YAMLMemProfReader>> |
770 | YAMLMemProfReader::create(const Twine &Path) { |
771 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true); |
772 | if (std::error_code EC = BufferOr.getError()) |
773 | return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef()); |
774 | |
775 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
776 | return create(Buffer: std::move(Buffer)); |
777 | } |
778 | |
779 | Expected<std::unique_ptr<YAMLMemProfReader>> |
780 | YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { |
781 | auto Reader = std::make_unique<YAMLMemProfReader>(); |
782 | Reader->parse(YAMLData: Buffer->getBuffer()); |
783 | return std::move(Reader); |
784 | } |
785 | |
786 | bool YAMLMemProfReader::hasFormat(const StringRef Path) { |
787 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true); |
788 | if (!BufferOr) |
789 | return false; |
790 | |
791 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
792 | return hasFormat(DataBuffer: *Buffer); |
793 | } |
794 | |
795 | bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) { |
796 | return Buffer.getBuffer().starts_with(Prefix: "---" ); |
797 | } |
798 | |
799 | void YAMLMemProfReader::parse(StringRef YAMLData) { |
800 | memprof::AllMemProfData Doc; |
801 | yaml::Input Yin(YAMLData); |
802 | |
803 | Yin >> Doc; |
804 | if (Yin.error()) |
805 | return; |
806 | |
807 | // Add a call stack to MemProfData.CallStacks and return its CallStackId. |
808 | auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId { |
809 | SmallVector<FrameId> IndexedCallStack; |
810 | IndexedCallStack.reserve(N: CallStack.size()); |
811 | for (const Frame &F : CallStack) |
812 | IndexedCallStack.push_back(Elt: MemProfData.addFrame(F)); |
813 | return MemProfData.addCallStack(CS: std::move(IndexedCallStack)); |
814 | }; |
815 | |
816 | for (const auto &[GUID, Record] : Doc.HeapProfileRecords) { |
817 | IndexedMemProfRecord IndexedRecord; |
818 | |
819 | // Convert AllocationInfo to IndexedAllocationInfo. |
820 | for (const AllocationInfo &AI : Record.AllocSites) { |
821 | CallStackId CSId = AddCallStack(AI.CallStack); |
822 | IndexedRecord.AllocSites.emplace_back(Args&: CSId, Args: AI.Info); |
823 | } |
824 | |
825 | // Populate CallSites with CalleeGuids. |
826 | for (const auto &CallSite : Record.CallSites) { |
827 | CallStackId CSId = AddCallStack(CallSite.Frames); |
828 | IndexedRecord.CallSites.emplace_back(Args&: CSId, Args: CallSite.CalleeGuids); |
829 | } |
830 | |
831 | MemProfData.Records.try_emplace(Key: GUID, Args: std::move(IndexedRecord)); |
832 | } |
833 | |
834 | if (Doc.YamlifiedDataAccessProfiles.isEmpty()) |
835 | return; |
836 | |
837 | auto ToSymHandleRef = |
838 | [](const memprof::SymbolHandle &Handle) -> memprof::SymbolHandleRef { |
839 | if (std::holds_alternative<std::string>(v: Handle)) |
840 | return StringRef(std::get<std::string>(v: Handle)); |
841 | return std::get<uint64_t>(v: Handle); |
842 | }; |
843 | |
844 | auto DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>(); |
845 | for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records) |
846 | if (Error E = DataAccessProfileData->setDataAccessProfile( |
847 | SymbolID: ToSymHandleRef(Record.SymHandle), AccessCount: Record.AccessCount, |
848 | Locations: Record.Locations)) |
849 | reportFatalInternalError(Err: std::move(E)); |
850 | |
851 | for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdStrHashes) |
852 | if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Hash)) |
853 | reportFatalInternalError(Err: std::move(E)); |
854 | |
855 | for (const std::string &Sym : |
856 | Doc.YamlifiedDataAccessProfiles.KnownColdSymbols) |
857 | if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Sym)) |
858 | reportFatalInternalError(Err: std::move(E)); |
859 | |
860 | setDataAccessProfileData(std::move(DataAccessProfileData)); |
861 | } |
862 | } // namespace memprof |
863 | } // namespace llvm |
864 | |