1 | //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for reading MemProf profiling data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include <algorithm> |
14 | #include <cstdint> |
15 | #include <memory> |
16 | #include <type_traits> |
17 | |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/DenseMap.h" |
20 | #include "llvm/ADT/SetVector.h" |
21 | #include "llvm/ADT/SmallSet.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/ADT/Twine.h" |
25 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
26 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
27 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
28 | #include "llvm/Object/Binary.h" |
29 | #include "llvm/Object/BuildID.h" |
30 | #include "llvm/Object/ELFObjectFile.h" |
31 | #include "llvm/Object/ObjectFile.h" |
32 | #include "llvm/ProfileData/InstrProf.h" |
33 | #include "llvm/ProfileData/MemProf.h" |
34 | #include "llvm/ProfileData/MemProfData.inc" |
35 | #include "llvm/ProfileData/MemProfReader.h" |
36 | #include "llvm/ProfileData/SampleProf.h" |
37 | #include "llvm/Support/Debug.h" |
38 | #include "llvm/Support/Endian.h" |
39 | #include "llvm/Support/Error.h" |
40 | #include "llvm/Support/MemoryBuffer.h" |
41 | #include "llvm/Support/Path.h" |
42 | |
43 | #define DEBUG_TYPE "memprof" |
44 | namespace llvm { |
45 | namespace memprof { |
46 | namespace { |
47 | template <class T = uint64_t> inline T alignedRead(const char *Ptr) { |
48 | static_assert(std::is_pod<T>::value, "Not a pod type." ); |
49 | assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read" ); |
50 | return *reinterpret_cast<const T *>(Ptr); |
51 | } |
52 | |
53 | Error checkBuffer(const MemoryBuffer &Buffer) { |
54 | if (!RawMemProfReader::hasFormat(DataBuffer: Buffer)) |
55 | return make_error<InstrProfError>(Args: instrprof_error::bad_magic); |
56 | |
57 | if (Buffer.getBufferSize() == 0) |
58 | return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile); |
59 | |
60 | if (Buffer.getBufferSize() < sizeof(Header)) { |
61 | return make_error<InstrProfError>(Args: instrprof_error::truncated); |
62 | } |
63 | |
64 | // The size of the buffer can be > header total size since we allow repeated |
65 | // serialization of memprof profiles to the same file. |
66 | uint64_t TotalSize = 0; |
67 | const char *Next = Buffer.getBufferStart(); |
68 | while (Next < Buffer.getBufferEnd()) { |
69 | const auto *H = reinterpret_cast<const Header *>(Next); |
70 | |
71 | // Check if the version in header is among the supported versions. |
72 | bool IsSupported = false; |
73 | for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) { |
74 | if (H->Version == SupportedVersion) |
75 | IsSupported = true; |
76 | } |
77 | if (!IsSupported) { |
78 | return make_error<InstrProfError>(Args: instrprof_error::unsupported_version); |
79 | } |
80 | |
81 | TotalSize += H->TotalSize; |
82 | Next += H->TotalSize; |
83 | } |
84 | |
85 | if (Buffer.getBufferSize() != TotalSize) { |
86 | return make_error<InstrProfError>(Args: instrprof_error::malformed); |
87 | } |
88 | return Error::success(); |
89 | } |
90 | |
91 | llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { |
92 | using namespace support; |
93 | |
94 | const uint64_t NumItemsToRead = |
95 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
96 | llvm::SmallVector<SegmentEntry> Items; |
97 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
98 | Items.push_back(Elt: *reinterpret_cast<const SegmentEntry *>( |
99 | Ptr + I * sizeof(SegmentEntry))); |
100 | } |
101 | return Items; |
102 | } |
103 | |
104 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
105 | readMemInfoBlocksV3(const char *Ptr) { |
106 | using namespace support; |
107 | |
108 | const uint64_t NumItemsToRead = |
109 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
110 | |
111 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
112 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
113 | const uint64_t Id = |
114 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
115 | |
116 | // We cheat a bit here and remove the const from cast to set the |
117 | // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and |
118 | // V4 do not have the same fields. V3 is missing AccessHistogramSize and |
119 | // AccessHistogram. This means we read "dirty" data in here, but it should |
120 | // not segfault, since there will be callstack data placed after this in the |
121 | // binary format. |
122 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
123 | // Overwrite dirty data. |
124 | MIB.AccessHistogramSize = 0; |
125 | MIB.AccessHistogram = 0; |
126 | |
127 | Items.push_back(Elt: {Id, MIB}); |
128 | // Only increment by the size of MIB in V3. |
129 | Ptr += MEMPROF_V3_MIB_SIZE; |
130 | } |
131 | return Items; |
132 | } |
133 | |
134 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
135 | readMemInfoBlocksV4(const char *Ptr) { |
136 | using namespace support; |
137 | |
138 | const uint64_t NumItemsToRead = |
139 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
140 | |
141 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
142 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
143 | const uint64_t Id = |
144 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
145 | // We cheat a bit here and remove the const from cast to set the |
146 | // Histogram Pointer to newly allocated buffer. |
147 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
148 | |
149 | // Only increment by size of MIB since readNext implicitly increments. |
150 | Ptr += sizeof(MemInfoBlock); |
151 | |
152 | if (MIB.AccessHistogramSize > 0) { |
153 | MIB.AccessHistogram = |
154 | (uintptr_t)malloc(size: MIB.AccessHistogramSize * sizeof(uint64_t)); |
155 | } |
156 | |
157 | for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { |
158 | ((uint64_t *)MIB.AccessHistogram)[J] = |
159 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
160 | } |
161 | Items.push_back(Elt: {Id, MIB}); |
162 | } |
163 | return Items; |
164 | } |
165 | |
166 | CallStackMap readStackInfo(const char *Ptr) { |
167 | using namespace support; |
168 | |
169 | const uint64_t NumItemsToRead = |
170 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
171 | CallStackMap Items; |
172 | |
173 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
174 | const uint64_t StackId = |
175 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
176 | const uint64_t NumPCs = |
177 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
178 | |
179 | SmallVector<uint64_t> CallStack; |
180 | CallStack.reserve(N: NumPCs); |
181 | for (uint64_t J = 0; J < NumPCs; J++) { |
182 | CallStack.push_back( |
183 | Elt: endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr)); |
184 | } |
185 | |
186 | Items[StackId] = CallStack; |
187 | } |
188 | return Items; |
189 | } |
190 | |
191 | // Merges the contents of stack information in \p From to \p To. Returns true if |
192 | // any stack ids observed previously map to a different set of program counter |
193 | // addresses. |
194 | bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { |
195 | for (const auto &[Id, Stack] : From) { |
196 | auto I = To.find(Val: Id); |
197 | if (I == To.end()) { |
198 | To[Id] = Stack; |
199 | } else { |
200 | // Check that the PCs are the same (in order). |
201 | if (Stack != I->second) |
202 | return true; |
203 | } |
204 | } |
205 | return false; |
206 | } |
207 | |
208 | Error report(Error E, const StringRef Context) { |
209 | return joinErrors(E1: createStringError(EC: inconvertibleErrorCode(), S: Context), |
210 | E2: std::move(E)); |
211 | } |
212 | |
213 | bool isRuntimePath(const StringRef Path) { |
214 | const StringRef Filename = llvm::sys::path::filename(path: Path); |
215 | // This list should be updated in case new files with additional interceptors |
216 | // are added to the memprof runtime. |
217 | return Filename == "memprof_malloc_linux.cpp" || |
218 | Filename == "memprof_interceptors.cpp" || |
219 | Filename == "memprof_new_delete.cpp" ; |
220 | } |
221 | |
222 | std::string getBuildIdString(const SegmentEntry &Entry) { |
223 | // If the build id is unset print a helpful string instead of all zeros. |
224 | if (Entry.BuildIdSize == 0) |
225 | return "<None>" ; |
226 | |
227 | std::string Str; |
228 | raw_string_ostream OS(Str); |
229 | for (size_t I = 0; I < Entry.BuildIdSize; I++) { |
230 | OS << format_hex_no_prefix(N: Entry.BuildId[I], Width: 2); |
231 | } |
232 | return OS.str(); |
233 | } |
234 | } // namespace |
235 | |
236 | MemProfReader::MemProfReader( |
237 | llvm::DenseMap<FrameId, Frame> FrameIdMap, |
238 | llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData) |
239 | : IdToFrame(std::move(FrameIdMap)), |
240 | FunctionProfileData(std::move(ProfData)) { |
241 | // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord |
242 | // while storing CallStack in CSIdToCallStack. |
243 | for (auto &KV : FunctionProfileData) { |
244 | IndexedMemProfRecord &Record = KV.second; |
245 | for (auto &AS : Record.AllocSites) { |
246 | CallStackId CSId = hashCallStack(CS: AS.CallStack); |
247 | AS.CSId = CSId; |
248 | CSIdToCallStack.insert(KV: {CSId, AS.CallStack}); |
249 | } |
250 | for (auto &CS : Record.CallSites) { |
251 | CallStackId CSId = hashCallStack(CS); |
252 | Record.CallSiteIds.push_back(Elt: CSId); |
253 | CSIdToCallStack.insert(KV: {CSId, CS}); |
254 | } |
255 | } |
256 | } |
257 | |
258 | Expected<std::unique_ptr<RawMemProfReader>> |
259 | RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, |
260 | bool KeepName) { |
261 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
262 | if (std::error_code EC = BufferOr.getError()) |
263 | return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef()); |
264 | |
265 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
266 | return create(Buffer: std::move(Buffer), ProfiledBinary, KeepName); |
267 | } |
268 | |
269 | Expected<std::unique_ptr<RawMemProfReader>> |
270 | RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, |
271 | const StringRef ProfiledBinary, bool KeepName) { |
272 | if (Error E = checkBuffer(Buffer: *Buffer)) |
273 | return report(E: std::move(E), Context: Buffer->getBufferIdentifier()); |
274 | |
275 | if (ProfiledBinary.empty()) { |
276 | // Peek the build ids to print a helpful error message. |
277 | const std::vector<std::string> BuildIds = peekBuildIds(DataBuffer: Buffer.get()); |
278 | std::string ErrorMessage( |
279 | R"(Path to profiled binary is empty, expected binary with one of the following build ids: |
280 | )" ); |
281 | for (const auto &Id : BuildIds) { |
282 | ErrorMessage += "\n BuildId: " ; |
283 | ErrorMessage += Id; |
284 | } |
285 | return report( |
286 | E: make_error<StringError>(Args&: ErrorMessage, Args: inconvertibleErrorCode()), |
287 | /*Context=*/"" ); |
288 | } |
289 | |
290 | auto BinaryOr = llvm::object::createBinary(Path: ProfiledBinary); |
291 | if (!BinaryOr) { |
292 | return report(E: BinaryOr.takeError(), Context: ProfiledBinary); |
293 | } |
294 | |
295 | // Use new here since constructor is private. |
296 | std::unique_ptr<RawMemProfReader> Reader( |
297 | new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); |
298 | if (Error E = Reader->initialize(DataBuffer: std::move(Buffer))) { |
299 | return std::move(E); |
300 | } |
301 | return std::move(Reader); |
302 | } |
303 | |
304 | // We need to make sure that all leftover MIB histograms that have not been |
305 | // freed by merge are freed here. |
306 | RawMemProfReader::~RawMemProfReader() { |
307 | for (auto &[_, MIB] : CallstackProfileData) { |
308 | if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) { |
309 | free(ptr: (void *)MIB.AccessHistogram); |
310 | } |
311 | } |
312 | } |
313 | |
314 | bool RawMemProfReader::hasFormat(const StringRef Path) { |
315 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
316 | if (!BufferOr) |
317 | return false; |
318 | |
319 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
320 | return hasFormat(DataBuffer: *Buffer); |
321 | } |
322 | |
323 | bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { |
324 | if (Buffer.getBufferSize() < sizeof(uint64_t)) |
325 | return false; |
326 | // Aligned read to sanity check that the buffer was allocated with at least 8b |
327 | // alignment. |
328 | const uint64_t Magic = alignedRead(Ptr: Buffer.getBufferStart()); |
329 | return Magic == MEMPROF_RAW_MAGIC_64; |
330 | } |
331 | |
332 | void RawMemProfReader::printYAML(raw_ostream &OS) { |
333 | uint64_t NumAllocFunctions = 0, NumMibInfo = 0; |
334 | for (const auto &KV : FunctionProfileData) { |
335 | const size_t NumAllocSites = KV.second.AllocSites.size(); |
336 | if (NumAllocSites > 0) { |
337 | NumAllocFunctions++; |
338 | NumMibInfo += NumAllocSites; |
339 | } |
340 | } |
341 | |
342 | OS << "MemprofProfile:\n" ; |
343 | OS << " Summary:\n" ; |
344 | OS << " Version: " << MemprofRawVersion << "\n" ; |
345 | OS << " NumSegments: " << SegmentInfo.size() << "\n" ; |
346 | OS << " NumMibInfo: " << NumMibInfo << "\n" ; |
347 | OS << " NumAllocFunctions: " << NumAllocFunctions << "\n" ; |
348 | OS << " NumStackOffsets: " << StackMap.size() << "\n" ; |
349 | // Print out the segment information. |
350 | OS << " Segments:\n" ; |
351 | for (const auto &Entry : SegmentInfo) { |
352 | OS << " -\n" ; |
353 | OS << " BuildId: " << getBuildIdString(Entry) << "\n" ; |
354 | OS << " Start: 0x" << llvm::utohexstr(X: Entry.Start) << "\n" ; |
355 | OS << " End: 0x" << llvm::utohexstr(X: Entry.End) << "\n" ; |
356 | OS << " Offset: 0x" << llvm::utohexstr(X: Entry.Offset) << "\n" ; |
357 | } |
358 | // Print out the merged contents of the profiles. |
359 | OS << " Records:\n" ; |
360 | for (const auto &[GUID, Record] : *this) { |
361 | OS << " -\n" ; |
362 | OS << " FunctionGUID: " << GUID << "\n" ; |
363 | Record.print(OS); |
364 | } |
365 | } |
366 | |
367 | Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { |
368 | const StringRef FileName = Binary.getBinary()->getFileName(); |
369 | |
370 | auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Val: Binary.getBinary()); |
371 | if (!ElfObject) { |
372 | return report(E: make_error<StringError>(Args: Twine("Not an ELF file: " ), |
373 | Args: inconvertibleErrorCode()), |
374 | Context: FileName); |
375 | } |
376 | |
377 | // Check whether the profiled binary was built with position independent code |
378 | // (PIC). Perform sanity checks for assumptions we rely on to simplify |
379 | // symbolization. |
380 | auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(Val: ElfObject); |
381 | const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); |
382 | auto PHdrsOr = ElfFile.program_headers(); |
383 | if (!PHdrsOr) |
384 | return report( |
385 | E: make_error<StringError>(Args: Twine("Could not read program headers: " ), |
386 | Args: inconvertibleErrorCode()), |
387 | Context: FileName); |
388 | |
389 | int NumExecutableSegments = 0; |
390 | for (const auto &Phdr : *PHdrsOr) { |
391 | if (Phdr.p_type == ELF::PT_LOAD) { |
392 | if (Phdr.p_flags & ELF::PF_X) { |
393 | // We assume only one text segment in the main binary for simplicity and |
394 | // reduce the overhead of checking multiple ranges during symbolization. |
395 | if (++NumExecutableSegments > 1) { |
396 | return report( |
397 | E: make_error<StringError>( |
398 | Args: "Expect only one executable load segment in the binary" , |
399 | Args: inconvertibleErrorCode()), |
400 | Context: FileName); |
401 | } |
402 | // Segment will always be loaded at a page boundary, expect it to be |
403 | // aligned already. Assume 4K pagesize for the machine from which the |
404 | // profile has been collected. This should be fine for now, in case we |
405 | // want to support other pagesizes it can be recorded in the raw profile |
406 | // during collection. |
407 | PreferredTextSegmentAddress = Phdr.p_vaddr; |
408 | assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && |
409 | "Expect p_vaddr to always be page aligned" ); |
410 | assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization." ); |
411 | } |
412 | } |
413 | } |
414 | |
415 | auto Triple = ElfObject->makeTriple(); |
416 | if (!Triple.isX86()) |
417 | return report(E: make_error<StringError>(Args: Twine("Unsupported target: " ) + |
418 | Triple.getArchName(), |
419 | Args: inconvertibleErrorCode()), |
420 | Context: FileName); |
421 | |
422 | // Process the raw profile. |
423 | if (Error E = readRawProfile(DataBuffer: std::move(DataBuffer))) |
424 | return E; |
425 | |
426 | if (Error E = setupForSymbolization()) |
427 | return E; |
428 | |
429 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
430 | std::unique_ptr<DIContext> Context = DWARFContext::create( |
431 | Obj: *Object, RelocAction: DWARFContext::ProcessDebugRelocations::Process); |
432 | |
433 | auto SOFOr = symbolize::SymbolizableObjectFile::create( |
434 | Obj: Object, DICtx: std::move(Context), /*UntagAddresses=*/false); |
435 | if (!SOFOr) |
436 | return report(E: SOFOr.takeError(), Context: FileName); |
437 | auto Symbolizer = std::move(SOFOr.get()); |
438 | |
439 | // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so |
440 | // that it is freed automatically at the end, when it is no longer used. This |
441 | // reduces peak memory since it won't be live while also mapping the raw |
442 | // profile into records afterwards. |
443 | if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Symbolizer))) |
444 | return E; |
445 | |
446 | return mapRawProfileToRecords(); |
447 | } |
448 | |
449 | Error RawMemProfReader::setupForSymbolization() { |
450 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
451 | object::BuildIDRef BinaryId = object::getBuildID(Obj: Object); |
452 | if (BinaryId.empty()) |
453 | return make_error<StringError>(Args: Twine("No build id found in binary " ) + |
454 | Binary.getBinary()->getFileName(), |
455 | Args: inconvertibleErrorCode()); |
456 | |
457 | int NumMatched = 0; |
458 | for (const auto &Entry : SegmentInfo) { |
459 | llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); |
460 | if (BinaryId == SegmentId) { |
461 | // We assume only one text segment in the main binary for simplicity and |
462 | // reduce the overhead of checking multiple ranges during symbolization. |
463 | if (++NumMatched > 1) { |
464 | return make_error<StringError>( |
465 | Args: "We expect only one executable segment in the profiled binary" , |
466 | Args: inconvertibleErrorCode()); |
467 | } |
468 | ProfiledTextSegmentStart = Entry.Start; |
469 | ProfiledTextSegmentEnd = Entry.End; |
470 | } |
471 | } |
472 | assert(NumMatched != 0 && "No matching executable segments in segment info." ); |
473 | assert((PreferredTextSegmentAddress == 0 || |
474 | (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && |
475 | "Expect text segment address to be 0 or equal to profiled text " |
476 | "segment start." ); |
477 | return Error::success(); |
478 | } |
479 | |
480 | Error RawMemProfReader::mapRawProfileToRecords() { |
481 | // Hold a mapping from function to each callsite location we encounter within |
482 | // it that is part of some dynamic allocation context. The location is stored |
483 | // as a pointer to a symbolized list of inline frames. |
484 | using LocationPtr = const llvm::SmallVector<FrameId> *; |
485 | llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> |
486 | PerFunctionCallSites; |
487 | |
488 | // Convert the raw profile callstack data into memprof records. While doing so |
489 | // keep track of related contexts so that we can fill these in later. |
490 | for (const auto &[StackId, MIB] : CallstackProfileData) { |
491 | auto It = StackMap.find(Val: StackId); |
492 | if (It == StackMap.end()) |
493 | return make_error<InstrProfError>( |
494 | Args: instrprof_error::malformed, |
495 | Args: "memprof callstack record does not contain id: " + Twine(StackId)); |
496 | |
497 | // Construct the symbolized callstack. |
498 | llvm::SmallVector<FrameId> Callstack; |
499 | Callstack.reserve(N: It->getSecond().size()); |
500 | |
501 | llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); |
502 | for (size_t I = 0; I < Addresses.size(); I++) { |
503 | const uint64_t Address = Addresses[I]; |
504 | assert(SymbolizedFrame.count(Address) > 0 && |
505 | "Address not found in SymbolizedFrame map" ); |
506 | const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; |
507 | |
508 | assert(!idToFrame(Frames.back()).IsInlineFrame && |
509 | "The last frame should not be inlined" ); |
510 | |
511 | // Record the callsites for each function. Skip the first frame of the |
512 | // first address since it is the allocation site itself that is recorded |
513 | // as an alloc site. |
514 | for (size_t J = 0; J < Frames.size(); J++) { |
515 | if (I == 0 && J == 0) |
516 | continue; |
517 | // We attach the entire bottom-up frame here for the callsite even |
518 | // though we only need the frames up to and including the frame for |
519 | // Frames[J].Function. This will enable better deduplication for |
520 | // compression in the future. |
521 | const GlobalValue::GUID Guid = idToFrame(Id: Frames[J]).Function; |
522 | PerFunctionCallSites[Guid].insert(X: &Frames); |
523 | } |
524 | |
525 | // Add all the frames to the current allocation callstack. |
526 | Callstack.append(in_start: Frames.begin(), in_end: Frames.end()); |
527 | } |
528 | |
529 | CallStackId CSId = hashCallStack(CS: Callstack); |
530 | CSIdToCallStack.insert(KV: {CSId, Callstack}); |
531 | |
532 | // We attach the memprof record to each function bottom-up including the |
533 | // first non-inline frame. |
534 | for (size_t I = 0; /*Break out using the condition below*/; I++) { |
535 | const Frame &F = idToFrame(Id: Callstack[I]); |
536 | auto Result = |
537 | FunctionProfileData.insert(KV: {F.Function, IndexedMemProfRecord()}); |
538 | IndexedMemProfRecord &Record = Result.first->second; |
539 | Record.AllocSites.emplace_back(Args&: Callstack, Args&: CSId, Args: MIB); |
540 | |
541 | if (!F.IsInlineFrame) |
542 | break; |
543 | } |
544 | } |
545 | |
546 | // Fill in the related callsites per function. |
547 | for (const auto &[Id, Locs] : PerFunctionCallSites) { |
548 | // Some functions may have only callsite data and no allocation data. Here |
549 | // we insert a new entry for callsite data if we need to. |
550 | auto Result = FunctionProfileData.insert(KV: {Id, IndexedMemProfRecord()}); |
551 | IndexedMemProfRecord &Record = Result.first->second; |
552 | for (LocationPtr Loc : Locs) { |
553 | CallStackId CSId = hashCallStack(CS: *Loc); |
554 | CSIdToCallStack.insert(KV: {CSId, *Loc}); |
555 | Record.CallSites.push_back(Elt: *Loc); |
556 | Record.CallSiteIds.push_back(Elt: CSId); |
557 | } |
558 | } |
559 | |
560 | verifyFunctionProfileData(FunctionProfileData); |
561 | |
562 | return Error::success(); |
563 | } |
564 | |
565 | Error RawMemProfReader::symbolizeAndFilterStackFrames( |
566 | std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { |
567 | // The specifier to use when symbolization is requested. |
568 | const DILineInfoSpecifier Specifier( |
569 | DILineInfoSpecifier::FileLineInfoKind::RawValue, |
570 | DILineInfoSpecifier::FunctionNameKind::LinkageName); |
571 | |
572 | // For entries where all PCs in the callstack are discarded, we erase the |
573 | // entry from the stack map. |
574 | llvm::SmallVector<uint64_t> EntriesToErase; |
575 | // We keep track of all prior discarded entries so that we can avoid invoking |
576 | // the symbolizer for such entries. |
577 | llvm::DenseSet<uint64_t> AllVAddrsToDiscard; |
578 | for (auto &Entry : StackMap) { |
579 | for (const uint64_t VAddr : Entry.getSecond()) { |
580 | // Check if we have already symbolized and cached the result or if we |
581 | // don't want to attempt symbolization since we know this address is bad. |
582 | // In this case the address is also removed from the current callstack. |
583 | if (SymbolizedFrame.count(Val: VAddr) > 0 || |
584 | AllVAddrsToDiscard.contains(V: VAddr)) |
585 | continue; |
586 | |
587 | Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( |
588 | ModuleOffset: getModuleOffset(VirtualAddress: VAddr), LineInfoSpecifier: Specifier, /*UseSymbolTable=*/false); |
589 | if (!DIOr) |
590 | return DIOr.takeError(); |
591 | DIInliningInfo DI = DIOr.get(); |
592 | |
593 | // Drop frames which we can't symbolize or if they belong to the runtime. |
594 | if (DI.getFrame(Index: 0).FunctionName == DILineInfo::BadString || |
595 | isRuntimePath(Path: DI.getFrame(Index: 0).FileName)) { |
596 | AllVAddrsToDiscard.insert(V: VAddr); |
597 | continue; |
598 | } |
599 | |
600 | for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; |
601 | I++) { |
602 | const auto &DIFrame = DI.getFrame(Index: I); |
603 | const uint64_t Guid = |
604 | IndexedMemProfRecord::getGUID(FunctionName: DIFrame.FunctionName); |
605 | const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, |
606 | // Only the last entry is not an inlined location. |
607 | I != NumFrames - 1); |
608 | // Here we retain a mapping from the GUID to canonical symbol name |
609 | // instead of adding it to the frame object directly to reduce memory |
610 | // overhead. This is because there can be many unique frames, |
611 | // particularly for callsite frames. |
612 | if (KeepSymbolName) { |
613 | StringRef CanonicalName = |
614 | sampleprof::FunctionSamples::getCanonicalFnName( |
615 | FnName: DIFrame.FunctionName); |
616 | GuidToSymbolName.insert(KV: {Guid, CanonicalName.str()}); |
617 | } |
618 | |
619 | const FrameId Hash = F.hash(); |
620 | IdToFrame.insert(KV: {Hash, F}); |
621 | SymbolizedFrame[VAddr].push_back(Elt: Hash); |
622 | } |
623 | } |
624 | |
625 | auto &CallStack = Entry.getSecond(); |
626 | llvm::erase_if(C&: CallStack, P: [&AllVAddrsToDiscard](const uint64_t A) { |
627 | return AllVAddrsToDiscard.contains(V: A); |
628 | }); |
629 | if (CallStack.empty()) |
630 | EntriesToErase.push_back(Elt: Entry.getFirst()); |
631 | } |
632 | |
633 | // Drop the entries where the callstack is empty. |
634 | for (const uint64_t Id : EntriesToErase) { |
635 | StackMap.erase(Val: Id); |
636 | if(CallstackProfileData[Id].AccessHistogramSize > 0) |
637 | free(ptr: (void*) CallstackProfileData[Id].AccessHistogram); |
638 | CallstackProfileData.erase(Key: Id); |
639 | } |
640 | |
641 | if (StackMap.empty()) |
642 | return make_error<InstrProfError>( |
643 | Args: instrprof_error::malformed, |
644 | Args: "no entries in callstack map after symbolization" ); |
645 | |
646 | return Error::success(); |
647 | } |
648 | |
649 | std::vector<std::string> |
650 | RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { |
651 | const char *Next = DataBuffer->getBufferStart(); |
652 | // Use a SetVector since a profile file may contain multiple raw profile |
653 | // dumps, each with segment information. We want them unique and in order they |
654 | // were stored in the profile; the profiled binary should be the first entry. |
655 | // The runtime uses dl_iterate_phdr and the "... first object visited by |
656 | // callback is the main program." |
657 | // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html |
658 | llvm::SetVector<std::string, std::vector<std::string>, |
659 | llvm::SmallSet<std::string, 10>> |
660 | BuildIds; |
661 | while (Next < DataBuffer->getBufferEnd()) { |
662 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
663 | |
664 | const llvm::SmallVector<SegmentEntry> Entries = |
665 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
666 | |
667 | for (const auto &Entry : Entries) |
668 | BuildIds.insert(X: getBuildIdString(Entry)); |
669 | |
670 | Next += Header->TotalSize; |
671 | } |
672 | return BuildIds.takeVector(); |
673 | } |
674 | |
675 | // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This |
676 | // will help being able to deserialize different versions raw memprof versions |
677 | // more easily. |
678 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
679 | RawMemProfReader::readMemInfoBlocks(const char *Ptr) { |
680 | if (MemprofRawVersion == 3ULL) |
681 | return readMemInfoBlocksV3(Ptr); |
682 | if (MemprofRawVersion == 4ULL) |
683 | return readMemInfoBlocksV4(Ptr); |
684 | llvm_unreachable( |
685 | "Panic: Unsupported version number when reading MemInfoBlocks" ); |
686 | } |
687 | |
688 | Error RawMemProfReader::readRawProfile( |
689 | std::unique_ptr<MemoryBuffer> DataBuffer) { |
690 | const char *Next = DataBuffer->getBufferStart(); |
691 | |
692 | while (Next < DataBuffer->getBufferEnd()) { |
693 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
694 | |
695 | // Set Reader version to memprof raw version of profile. Checking if version |
696 | // is supported is checked before creating the reader. |
697 | MemprofRawVersion = Header->Version; |
698 | |
699 | // Read in the segment information, check whether its the same across all |
700 | // profiles in this binary file. |
701 | const llvm::SmallVector<SegmentEntry> Entries = |
702 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
703 | if (!SegmentInfo.empty() && SegmentInfo != Entries) { |
704 | // We do not expect segment information to change when deserializing from |
705 | // the same binary profile file. This can happen if dynamic libraries are |
706 | // loaded/unloaded between profile dumping. |
707 | return make_error<InstrProfError>( |
708 | Args: instrprof_error::malformed, |
709 | Args: "memprof raw profile has different segment information" ); |
710 | } |
711 | SegmentInfo.assign(in_start: Entries.begin(), in_end: Entries.end()); |
712 | |
713 | // Read in the MemInfoBlocks. Merge them based on stack id - we assume that |
714 | // raw profiles in the same binary file are from the same process so the |
715 | // stackdepot ids are the same. |
716 | for (const auto &[Id, MIB] : readMemInfoBlocks(Ptr: Next + Header->MIBOffset)) { |
717 | if (CallstackProfileData.count(Key: Id)) { |
718 | |
719 | if (MemprofRawVersion >= 4ULL && |
720 | (CallstackProfileData[Id].AccessHistogramSize > 0 || |
721 | MIB.AccessHistogramSize > 0)) { |
722 | uintptr_t ShorterHistogram; |
723 | if (CallstackProfileData[Id].AccessHistogramSize > |
724 | MIB.AccessHistogramSize) |
725 | ShorterHistogram = MIB.AccessHistogram; |
726 | else |
727 | ShorterHistogram = CallstackProfileData[Id].AccessHistogram; |
728 | CallstackProfileData[Id].Merge(newMIB: MIB); |
729 | free(ptr: (void *)ShorterHistogram); |
730 | } else { |
731 | CallstackProfileData[Id].Merge(newMIB: MIB); |
732 | } |
733 | } else { |
734 | CallstackProfileData[Id] = MIB; |
735 | } |
736 | } |
737 | |
738 | // Read in the callstack for each ids. For multiple raw profiles in the same |
739 | // file, we expect that the callstack is the same for a unique id. |
740 | const CallStackMap CSM = readStackInfo(Ptr: Next + Header->StackOffset); |
741 | if (StackMap.empty()) { |
742 | StackMap = CSM; |
743 | } else { |
744 | if (mergeStackMap(From: CSM, To&: StackMap)) |
745 | return make_error<InstrProfError>( |
746 | Args: instrprof_error::malformed, |
747 | Args: "memprof raw profile got different call stack for same id" ); |
748 | } |
749 | |
750 | Next += Header->TotalSize; |
751 | } |
752 | |
753 | return Error::success(); |
754 | } |
755 | |
756 | object::SectionedAddress |
757 | RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { |
758 | if (VirtualAddress > ProfiledTextSegmentStart && |
759 | VirtualAddress <= ProfiledTextSegmentEnd) { |
760 | // For PIE binaries, the preferred address is zero and we adjust the virtual |
761 | // address by start of the profiled segment assuming that the offset of the |
762 | // segment in the binary is zero. For non-PIE binaries the preferred and |
763 | // profiled segment addresses should be equal and this is a no-op. |
764 | const uint64_t AdjustedAddress = |
765 | VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; |
766 | return object::SectionedAddress{.Address: AdjustedAddress}; |
767 | } |
768 | // Addresses which do not originate from the profiled text segment in the |
769 | // binary are not adjusted. These will fail symbolization and be filtered out |
770 | // during processing. |
771 | return object::SectionedAddress{.Address: VirtualAddress}; |
772 | } |
773 | |
774 | Error RawMemProfReader::readNextRecord( |
775 | GuidMemProfRecordPair &GuidRecord, |
776 | std::function<const Frame(const FrameId)> Callback) { |
777 | // Create a new callback for the RawMemProfRecord iterator so that we can |
778 | // provide the symbol name if the reader was initialized with KeepSymbolName = |
779 | // true. This is useful for debugging and testing. |
780 | auto IdToFrameCallback = [this](const FrameId Id) { |
781 | Frame F = this->idToFrame(Id); |
782 | if (!this->KeepSymbolName) |
783 | return F; |
784 | auto Iter = this->GuidToSymbolName.find(Val: F.Function); |
785 | assert(Iter != this->GuidToSymbolName.end()); |
786 | F.SymbolName = std::make_unique<std::string>(args&: Iter->getSecond()); |
787 | return F; |
788 | }; |
789 | return MemProfReader::readNextRecord(GuidRecord, Callback: IdToFrameCallback); |
790 | } |
791 | } // namespace memprof |
792 | } // namespace llvm |
793 | |