| 1 | //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains support for reading MemProf profiling data. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include <cstdint> |
| 14 | #include <memory> |
| 15 | #include <type_traits> |
| 16 | |
| 17 | #include "llvm/ADT/ArrayRef.h" |
| 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/SetVector.h" |
| 20 | #include "llvm/ADT/SmallSet.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/ADT/StringExtras.h" |
| 23 | #include "llvm/ADT/Twine.h" |
| 24 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| 25 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
| 26 | #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" |
| 27 | #include "llvm/Object/Binary.h" |
| 28 | #include "llvm/Object/BuildID.h" |
| 29 | #include "llvm/Object/ELFObjectFile.h" |
| 30 | #include "llvm/Object/ObjectFile.h" |
| 31 | #include "llvm/ProfileData/InstrProf.h" |
| 32 | #include "llvm/ProfileData/MemProf.h" |
| 33 | #include "llvm/ProfileData/MemProfData.inc" |
| 34 | #include "llvm/ProfileData/MemProfReader.h" |
| 35 | #include "llvm/ProfileData/MemProfSummaryBuilder.h" |
| 36 | #include "llvm/ProfileData/MemProfYAML.h" |
| 37 | #include "llvm/ProfileData/SampleProf.h" |
| 38 | #include "llvm/Support/Debug.h" |
| 39 | #include "llvm/Support/Endian.h" |
| 40 | #include "llvm/Support/Error.h" |
| 41 | #include "llvm/Support/ErrorHandling.h" |
| 42 | #include "llvm/Support/MemoryBuffer.h" |
| 43 | #include "llvm/Support/Path.h" |
| 44 | |
| 45 | #define DEBUG_TYPE "memprof" |
| 46 | |
| 47 | namespace llvm { |
| 48 | namespace memprof { |
| 49 | namespace { |
| 50 | template <class T = uint64_t> inline T alignedRead(const char *Ptr) { |
| 51 | static_assert(std::is_integral_v<T>, "Not an integral type" ); |
| 52 | assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read" ); |
| 53 | return *reinterpret_cast<const T *>(Ptr); |
| 54 | } |
| 55 | |
| 56 | Error checkBuffer(const MemoryBuffer &Buffer) { |
| 57 | if (!RawMemProfReader::hasFormat(DataBuffer: Buffer)) |
| 58 | return make_error<InstrProfError>(Args: instrprof_error::bad_magic); |
| 59 | |
| 60 | if (Buffer.getBufferSize() == 0) |
| 61 | return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile); |
| 62 | |
| 63 | if (Buffer.getBufferSize() < sizeof(Header)) { |
| 64 | return make_error<InstrProfError>(Args: instrprof_error::truncated); |
| 65 | } |
| 66 | |
| 67 | // The size of the buffer can be > header total size since we allow repeated |
| 68 | // serialization of memprof profiles to the same file. |
| 69 | uint64_t TotalSize = 0; |
| 70 | const char *Next = Buffer.getBufferStart(); |
| 71 | while (Next < Buffer.getBufferEnd()) { |
| 72 | const auto *H = reinterpret_cast<const Header *>(Next); |
| 73 | |
| 74 | // Check if the version in header is among the supported versions. |
| 75 | bool IsSupported = false; |
| 76 | for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) { |
| 77 | if (H->Version == SupportedVersion) |
| 78 | IsSupported = true; |
| 79 | } |
| 80 | if (!IsSupported) { |
| 81 | return make_error<InstrProfError>(Args: instrprof_error::unsupported_version); |
| 82 | } |
| 83 | |
| 84 | TotalSize += H->TotalSize; |
| 85 | Next += H->TotalSize; |
| 86 | } |
| 87 | |
| 88 | if (Buffer.getBufferSize() != TotalSize) { |
| 89 | return make_error<InstrProfError>(Args: instrprof_error::malformed); |
| 90 | } |
| 91 | return Error::success(); |
| 92 | } |
| 93 | |
| 94 | llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { |
| 95 | using namespace support; |
| 96 | |
| 97 | const uint64_t NumItemsToRead = |
| 98 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
| 99 | llvm::SmallVector<SegmentEntry> Items; |
| 100 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
| 101 | Items.push_back(Elt: *reinterpret_cast<const SegmentEntry *>( |
| 102 | Ptr + I * sizeof(SegmentEntry))); |
| 103 | } |
| 104 | return Items; |
| 105 | } |
| 106 | |
| 107 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
| 108 | readMemInfoBlocksV3(const char *Ptr) { |
| 109 | using namespace support; |
| 110 | |
| 111 | const uint64_t NumItemsToRead = |
| 112 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
| 113 | |
| 114 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
| 115 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
| 116 | const uint64_t Id = |
| 117 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
| 118 | |
| 119 | // We cheat a bit here and remove the const from cast to set the |
| 120 | // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and |
| 121 | // V4 do not have the same fields. V3 is missing AccessHistogramSize and |
| 122 | // AccessHistogram. This means we read "dirty" data in here, but it should |
| 123 | // not segfault, since there will be callstack data placed after this in the |
| 124 | // binary format. |
| 125 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
| 126 | // Overwrite dirty data. |
| 127 | MIB.AccessHistogramSize = 0; |
| 128 | MIB.AccessHistogram = 0; |
| 129 | |
| 130 | Items.push_back(Elt: {Id, MIB}); |
| 131 | // Only increment by the size of MIB in V3. |
| 132 | Ptr += MEMPROF_V3_MIB_SIZE; |
| 133 | } |
| 134 | return Items; |
| 135 | } |
| 136 | |
| 137 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
| 138 | readMemInfoBlocksV4(const char *Ptr) { |
| 139 | using namespace support; |
| 140 | |
| 141 | const uint64_t NumItemsToRead = |
| 142 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
| 143 | |
| 144 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; |
| 145 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
| 146 | const uint64_t Id = |
| 147 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
| 148 | // We cheat a bit here and remove the const from cast to set the |
| 149 | // Histogram Pointer to newly allocated buffer. |
| 150 | MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); |
| 151 | |
| 152 | // Only increment by size of MIB since readNext implicitly increments. |
| 153 | Ptr += sizeof(MemInfoBlock); |
| 154 | |
| 155 | if (MIB.AccessHistogramSize > 0) { |
| 156 | MIB.AccessHistogram = |
| 157 | (uintptr_t)malloc(size: MIB.AccessHistogramSize * sizeof(uint64_t)); |
| 158 | } |
| 159 | |
| 160 | for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { |
| 161 | ((uint64_t *)MIB.AccessHistogram)[J] = |
| 162 | endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr); |
| 163 | } |
| 164 | Items.push_back(Elt: {Id, MIB}); |
| 165 | } |
| 166 | return Items; |
| 167 | } |
| 168 | |
| 169 | CallStackMap readStackInfo(const char *Ptr) { |
| 170 | using namespace support; |
| 171 | |
| 172 | const uint64_t NumItemsToRead = |
| 173 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
| 174 | CallStackMap Items; |
| 175 | |
| 176 | for (uint64_t I = 0; I < NumItemsToRead; I++) { |
| 177 | const uint64_t StackId = |
| 178 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
| 179 | const uint64_t NumPCs = |
| 180 | endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
| 181 | |
| 182 | SmallVector<uint64_t> CallStack; |
| 183 | CallStack.reserve(N: NumPCs); |
| 184 | for (uint64_t J = 0; J < NumPCs; J++) { |
| 185 | CallStack.push_back( |
| 186 | Elt: endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr)); |
| 187 | } |
| 188 | |
| 189 | Items[StackId] = CallStack; |
| 190 | } |
| 191 | return Items; |
| 192 | } |
| 193 | |
| 194 | // Merges the contents of stack information in \p From to \p To. Returns true if |
| 195 | // any stack ids observed previously map to a different set of program counter |
| 196 | // addresses. |
| 197 | bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { |
| 198 | for (const auto &[Id, Stack] : From) { |
| 199 | auto [It, Inserted] = To.try_emplace(Key: Id, Args: Stack); |
| 200 | // Check that the PCs are the same (in order). |
| 201 | if (!Inserted && Stack != It->second) |
| 202 | return true; |
| 203 | } |
| 204 | return false; |
| 205 | } |
| 206 | |
| 207 | Error report(Error E, const StringRef Context) { |
| 208 | return joinErrors(E1: createStringError(EC: inconvertibleErrorCode(), S: Context), |
| 209 | E2: std::move(E)); |
| 210 | } |
| 211 | |
| 212 | bool isRuntimePath(const StringRef Path) { |
| 213 | const StringRef Filename = llvm::sys::path::filename(path: Path); |
| 214 | // This list should be updated in case new files with additional interceptors |
| 215 | // are added to the memprof runtime. |
| 216 | return Filename == "memprof_malloc_linux.cpp" || |
| 217 | Filename == "memprof_interceptors.cpp" || |
| 218 | Filename == "memprof_new_delete.cpp" ; |
| 219 | } |
| 220 | |
| 221 | std::string getBuildIdString(const SegmentEntry &Entry) { |
| 222 | // If the build id is unset print a helpful string instead of all zeros. |
| 223 | if (Entry.BuildIdSize == 0) |
| 224 | return "<None>" ; |
| 225 | |
| 226 | std::string Str; |
| 227 | raw_string_ostream OS(Str); |
| 228 | for (size_t I = 0; I < Entry.BuildIdSize; I++) { |
| 229 | OS << format_hex_no_prefix(N: Entry.BuildId[I], Width: 2); |
| 230 | } |
| 231 | return OS.str(); |
| 232 | } |
| 233 | } // namespace |
| 234 | |
| 235 | Expected<std::unique_ptr<RawMemProfReader>> |
| 236 | RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, |
| 237 | bool KeepName) { |
| 238 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
| 239 | if (std::error_code EC = BufferOr.getError()) |
| 240 | return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef()); |
| 241 | |
| 242 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
| 243 | return create(Buffer: std::move(Buffer), ProfiledBinary, KeepName); |
| 244 | } |
| 245 | |
| 246 | Expected<std::unique_ptr<RawMemProfReader>> |
| 247 | RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, |
| 248 | const StringRef ProfiledBinary, bool KeepName) { |
| 249 | if (Error E = checkBuffer(Buffer: *Buffer)) |
| 250 | return report(E: std::move(E), Context: Buffer->getBufferIdentifier()); |
| 251 | |
| 252 | if (ProfiledBinary.empty()) { |
| 253 | // Peek the build ids to print a helpful error message. |
| 254 | const std::vector<std::string> BuildIds = peekBuildIds(DataBuffer: Buffer.get()); |
| 255 | std::string ErrorMessage( |
| 256 | R"(Path to profiled binary is empty, expected binary with one of the following build ids: |
| 257 | )" ); |
| 258 | for (const auto &Id : BuildIds) { |
| 259 | ErrorMessage += "\n BuildId: " ; |
| 260 | ErrorMessage += Id; |
| 261 | } |
| 262 | return report( |
| 263 | E: make_error<StringError>(Args&: ErrorMessage, Args: inconvertibleErrorCode()), |
| 264 | /*Context=*/"" ); |
| 265 | } |
| 266 | |
| 267 | auto BinaryOr = llvm::object::createBinary(Path: ProfiledBinary); |
| 268 | if (!BinaryOr) { |
| 269 | return report(E: BinaryOr.takeError(), Context: ProfiledBinary); |
| 270 | } |
| 271 | |
| 272 | // Use new here since constructor is private. |
| 273 | std::unique_ptr<RawMemProfReader> Reader( |
| 274 | new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); |
| 275 | if (Error E = Reader->initialize(DataBuffer: std::move(Buffer))) { |
| 276 | return std::move(E); |
| 277 | } |
| 278 | return std::move(Reader); |
| 279 | } |
| 280 | |
| 281 | // We need to make sure that all leftover MIB histograms that have not been |
| 282 | // freed by merge are freed here. |
| 283 | RawMemProfReader::~RawMemProfReader() { |
| 284 | for (auto &[_, MIB] : CallstackProfileData) { |
| 285 | if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) { |
| 286 | free(ptr: (void *)MIB.AccessHistogram); |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | bool RawMemProfReader::hasFormat(const StringRef Path) { |
| 292 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path); |
| 293 | if (!BufferOr) |
| 294 | return false; |
| 295 | |
| 296 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
| 297 | return hasFormat(DataBuffer: *Buffer); |
| 298 | } |
| 299 | |
| 300 | bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { |
| 301 | if (Buffer.getBufferSize() < sizeof(uint64_t)) |
| 302 | return false; |
| 303 | // Aligned read to sanity check that the buffer was allocated with at least 8b |
| 304 | // alignment. |
| 305 | const uint64_t Magic = alignedRead(Ptr: Buffer.getBufferStart()); |
| 306 | return Magic == MEMPROF_RAW_MAGIC_64; |
| 307 | } |
| 308 | |
| 309 | void RawMemProfReader::printYAML(raw_ostream &OS) { |
| 310 | MemProfSummaryBuilder MemProfSumBuilder; |
| 311 | uint64_t NumAllocFunctions = 0, NumMibInfo = 0; |
| 312 | for (const auto &KV : MemProfData.Records) { |
| 313 | MemProfSumBuilder.addRecord(KV.second); |
| 314 | const size_t NumAllocSites = KV.second.AllocSites.size(); |
| 315 | if (NumAllocSites > 0) { |
| 316 | NumAllocFunctions++; |
| 317 | NumMibInfo += NumAllocSites; |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | // Print the summary first, as it is printed as YAML comments. |
| 322 | auto MemProfSum = MemProfSumBuilder.getSummary(); |
| 323 | MemProfSum->printSummaryYaml(OS); |
| 324 | |
| 325 | OS << "MemprofProfile:\n" ; |
| 326 | OS << " Summary:\n" ; |
| 327 | OS << " Version: " << MemprofRawVersion << "\n" ; |
| 328 | OS << " NumSegments: " << SegmentInfo.size() << "\n" ; |
| 329 | OS << " NumMibInfo: " << NumMibInfo << "\n" ; |
| 330 | OS << " NumAllocFunctions: " << NumAllocFunctions << "\n" ; |
| 331 | OS << " NumStackOffsets: " << StackMap.size() << "\n" ; |
| 332 | // Print out the segment information. |
| 333 | OS << " Segments:\n" ; |
| 334 | for (const auto &Entry : SegmentInfo) { |
| 335 | OS << " -\n" ; |
| 336 | OS << " BuildId: " << getBuildIdString(Entry) << "\n" ; |
| 337 | OS << " Start: 0x" << llvm::utohexstr(X: Entry.Start) << "\n" ; |
| 338 | OS << " End: 0x" << llvm::utohexstr(X: Entry.End) << "\n" ; |
| 339 | OS << " Offset: 0x" << llvm::utohexstr(X: Entry.Offset) << "\n" ; |
| 340 | } |
| 341 | // Print out the merged contents of the profiles. |
| 342 | OS << " Records:\n" ; |
| 343 | for (const auto &[GUID, Record] : *this) { |
| 344 | OS << " -\n" ; |
| 345 | OS << " FunctionGUID: " << GUID << "\n" ; |
| 346 | Record.print(OS); |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { |
| 351 | const StringRef FileName = Binary.getBinary()->getFileName(); |
| 352 | |
| 353 | auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Val: Binary.getBinary()); |
| 354 | if (!ElfObject) { |
| 355 | return report(E: make_error<StringError>(Args: Twine("Not an ELF file: " ), |
| 356 | Args: inconvertibleErrorCode()), |
| 357 | Context: FileName); |
| 358 | } |
| 359 | |
| 360 | // Check whether the profiled binary was built with position independent code |
| 361 | // (PIC). Perform sanity checks for assumptions we rely on to simplify |
| 362 | // symbolization. |
| 363 | auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(Val: ElfObject); |
| 364 | const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); |
| 365 | auto PHdrsOr = ElfFile.program_headers(); |
| 366 | if (!PHdrsOr) |
| 367 | return report( |
| 368 | E: make_error<StringError>(Args: Twine("Could not read program headers: " ), |
| 369 | Args: inconvertibleErrorCode()), |
| 370 | Context: FileName); |
| 371 | |
| 372 | int NumExecutableSegments = 0; |
| 373 | for (const auto &Phdr : *PHdrsOr) { |
| 374 | if (Phdr.p_type == ELF::PT_LOAD) { |
| 375 | if (Phdr.p_flags & ELF::PF_X) { |
| 376 | // We assume only one text segment in the main binary for simplicity and |
| 377 | // reduce the overhead of checking multiple ranges during symbolization. |
| 378 | if (++NumExecutableSegments > 1) { |
| 379 | return report( |
| 380 | E: make_error<StringError>( |
| 381 | Args: "Expect only one executable load segment in the binary" , |
| 382 | Args: inconvertibleErrorCode()), |
| 383 | Context: FileName); |
| 384 | } |
| 385 | // Segment will always be loaded at a page boundary, expect it to be |
| 386 | // aligned already. Assume 4K pagesize for the machine from which the |
| 387 | // profile has been collected. This should be fine for now, in case we |
| 388 | // want to support other pagesizes it can be recorded in the raw profile |
| 389 | // during collection. |
| 390 | PreferredTextSegmentAddress = Phdr.p_vaddr; |
| 391 | assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && |
| 392 | "Expect p_vaddr to always be page aligned" ); |
| 393 | assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization." ); |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | auto Triple = ElfObject->makeTriple(); |
| 399 | if (!Triple.isX86()) |
| 400 | return report(E: make_error<StringError>(Args: Twine("Unsupported target: " ) + |
| 401 | Triple.getArchName(), |
| 402 | Args: inconvertibleErrorCode()), |
| 403 | Context: FileName); |
| 404 | |
| 405 | // Process the raw profile. |
| 406 | if (Error E = readRawProfile(DataBuffer: std::move(DataBuffer))) |
| 407 | return E; |
| 408 | |
| 409 | if (Error E = setupForSymbolization()) |
| 410 | return E; |
| 411 | |
| 412 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
| 413 | std::unique_ptr<DIContext> Context = DWARFContext::create( |
| 414 | Obj: *Object, RelocAction: DWARFContext::ProcessDebugRelocations::Process); |
| 415 | |
| 416 | auto SOFOr = symbolize::SymbolizableObjectFile::create( |
| 417 | Obj: Object, DICtx: std::move(Context), /*UntagAddresses=*/false); |
| 418 | if (!SOFOr) |
| 419 | return report(E: SOFOr.takeError(), Context: FileName); |
| 420 | auto Symbolizer = std::move(SOFOr.get()); |
| 421 | |
| 422 | // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so |
| 423 | // that it is freed automatically at the end, when it is no longer used. This |
| 424 | // reduces peak memory since it won't be live while also mapping the raw |
| 425 | // profile into records afterwards. |
| 426 | if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Symbolizer))) |
| 427 | return E; |
| 428 | |
| 429 | return mapRawProfileToRecords(); |
| 430 | } |
| 431 | |
| 432 | Error RawMemProfReader::setupForSymbolization() { |
| 433 | auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary()); |
| 434 | object::BuildIDRef BinaryId = object::getBuildID(Obj: Object); |
| 435 | if (BinaryId.empty()) |
| 436 | return make_error<StringError>(Args: Twine("No build id found in binary " ) + |
| 437 | Binary.getBinary()->getFileName(), |
| 438 | Args: inconvertibleErrorCode()); |
| 439 | |
| 440 | int NumMatched = 0; |
| 441 | for (const auto &Entry : SegmentInfo) { |
| 442 | llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); |
| 443 | if (BinaryId == SegmentId) { |
| 444 | // We assume only one text segment in the main binary for simplicity and |
| 445 | // reduce the overhead of checking multiple ranges during symbolization. |
| 446 | if (++NumMatched > 1) { |
| 447 | return make_error<StringError>( |
| 448 | Args: "We expect only one executable segment in the profiled binary" , |
| 449 | Args: inconvertibleErrorCode()); |
| 450 | } |
| 451 | ProfiledTextSegmentStart = Entry.Start; |
| 452 | ProfiledTextSegmentEnd = Entry.End; |
| 453 | } |
| 454 | } |
| 455 | if (NumMatched == 0) |
| 456 | return make_error<StringError>( |
| 457 | Args: Twine("No matching executable segments found in binary " ) + |
| 458 | Binary.getBinary()->getFileName(), |
| 459 | Args: inconvertibleErrorCode()); |
| 460 | assert((PreferredTextSegmentAddress == 0 || |
| 461 | (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && |
| 462 | "Expect text segment address to be 0 or equal to profiled text " |
| 463 | "segment start." ); |
| 464 | return Error::success(); |
| 465 | } |
| 466 | |
| 467 | Error RawMemProfReader::mapRawProfileToRecords() { |
| 468 | // Hold a mapping from function to each callsite location we encounter within |
| 469 | // it that is part of some dynamic allocation context. The location is stored |
| 470 | // as a pointer to a symbolized list of inline frames. |
| 471 | using LocationPtr = const llvm::SmallVector<FrameId> *; |
| 472 | llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> |
| 473 | PerFunctionCallSites; |
| 474 | |
| 475 | // Convert the raw profile callstack data into memprof records. While doing so |
| 476 | // keep track of related contexts so that we can fill these in later. |
| 477 | for (const auto &[StackId, MIB] : CallstackProfileData) { |
| 478 | auto It = StackMap.find(Val: StackId); |
| 479 | if (It == StackMap.end()) |
| 480 | return make_error<InstrProfError>( |
| 481 | Args: instrprof_error::malformed, |
| 482 | Args: "memprof callstack record does not contain id: " + Twine(StackId)); |
| 483 | |
| 484 | // Construct the symbolized callstack. |
| 485 | llvm::SmallVector<FrameId> Callstack; |
| 486 | Callstack.reserve(N: It->getSecond().size()); |
| 487 | |
| 488 | llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); |
| 489 | for (size_t I = 0; I < Addresses.size(); I++) { |
| 490 | const uint64_t Address = Addresses[I]; |
| 491 | assert(SymbolizedFrame.count(Address) > 0 && |
| 492 | "Address not found in SymbolizedFrame map" ); |
| 493 | const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; |
| 494 | |
| 495 | assert(!idToFrame(Frames.back()).IsInlineFrame && |
| 496 | "The last frame should not be inlined" ); |
| 497 | |
| 498 | // Record the callsites for each function. Skip the first frame of the |
| 499 | // first address since it is the allocation site itself that is recorded |
| 500 | // as an alloc site. |
| 501 | for (size_t J = 0; J < Frames.size(); J++) { |
| 502 | if (I == 0 && J == 0) |
| 503 | continue; |
| 504 | // We attach the entire bottom-up frame here for the callsite even |
| 505 | // though we only need the frames up to and including the frame for |
| 506 | // Frames[J].Function. This will enable better deduplication for |
| 507 | // compression in the future. |
| 508 | const GlobalValue::GUID Guid = idToFrame(Id: Frames[J]).Function; |
| 509 | PerFunctionCallSites[Guid].insert(X: &Frames); |
| 510 | } |
| 511 | |
| 512 | // Add all the frames to the current allocation callstack. |
| 513 | Callstack.append(in_start: Frames.begin(), in_end: Frames.end()); |
| 514 | } |
| 515 | |
| 516 | CallStackId CSId = MemProfData.addCallStack(CS: Callstack); |
| 517 | |
| 518 | // We attach the memprof record to each function bottom-up including the |
| 519 | // first non-inline frame. |
| 520 | for (size_t I = 0; /*Break out using the condition below*/; I++) { |
| 521 | const Frame &F = idToFrame(Id: Callstack[I]); |
| 522 | IndexedMemProfRecord &Record = MemProfData.Records[F.Function]; |
| 523 | Record.AllocSites.emplace_back(Args&: CSId, Args: MIB); |
| 524 | |
| 525 | if (!F.IsInlineFrame) |
| 526 | break; |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | // Fill in the related callsites per function. |
| 531 | for (const auto &[Id, Locs] : PerFunctionCallSites) { |
| 532 | // Some functions may have only callsite data and no allocation data. Here |
| 533 | // we insert a new entry for callsite data if we need to. |
| 534 | IndexedMemProfRecord &Record = MemProfData.Records[Id]; |
| 535 | for (LocationPtr Loc : Locs) |
| 536 | Record.CallSites.emplace_back(Args: MemProfData.addCallStack(CS: *Loc)); |
| 537 | } |
| 538 | |
| 539 | return Error::success(); |
| 540 | } |
| 541 | |
| 542 | Error RawMemProfReader::symbolizeAndFilterStackFrames( |
| 543 | std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { |
| 544 | // The specifier to use when symbolization is requested. |
| 545 | const DILineInfoSpecifier Specifier( |
| 546 | DILineInfoSpecifier::FileLineInfoKind::RawValue, |
| 547 | DILineInfoSpecifier::FunctionNameKind::LinkageName); |
| 548 | |
| 549 | // For entries where all PCs in the callstack are discarded, we erase the |
| 550 | // entry from the stack map. |
| 551 | llvm::SmallVector<uint64_t> EntriesToErase; |
| 552 | // We keep track of all prior discarded entries so that we can avoid invoking |
| 553 | // the symbolizer for such entries. |
| 554 | llvm::DenseSet<uint64_t> AllVAddrsToDiscard; |
| 555 | for (auto &Entry : StackMap) { |
| 556 | for (const uint64_t VAddr : Entry.getSecond()) { |
| 557 | // Check if we have already symbolized and cached the result or if we |
| 558 | // don't want to attempt symbolization since we know this address is bad. |
| 559 | // In this case the address is also removed from the current callstack. |
| 560 | if (SymbolizedFrame.count(Val: VAddr) > 0 || |
| 561 | AllVAddrsToDiscard.contains(V: VAddr)) |
| 562 | continue; |
| 563 | |
| 564 | Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( |
| 565 | ModuleOffset: getModuleOffset(VirtualAddress: VAddr), LineInfoSpecifier: Specifier, /*UseSymbolTable=*/false); |
| 566 | if (!DIOr) |
| 567 | return DIOr.takeError(); |
| 568 | DIInliningInfo DI = DIOr.get(); |
| 569 | |
| 570 | // Drop frames which we can't symbolize or if they belong to the runtime. |
| 571 | if (DI.getFrame(Index: 0).FunctionName == DILineInfo::BadString || |
| 572 | isRuntimePath(Path: DI.getFrame(Index: 0).FileName)) { |
| 573 | AllVAddrsToDiscard.insert(V: VAddr); |
| 574 | continue; |
| 575 | } |
| 576 | |
| 577 | for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; |
| 578 | I++) { |
| 579 | const auto &DIFrame = DI.getFrame(Index: I); |
| 580 | const uint64_t Guid = memprof::getGUID(FunctionName: DIFrame.FunctionName); |
| 581 | const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, |
| 582 | // Only the last entry is not an inlined location. |
| 583 | I != NumFrames - 1); |
| 584 | // Here we retain a mapping from the GUID to canonical symbol name |
| 585 | // instead of adding it to the frame object directly to reduce memory |
| 586 | // overhead. This is because there can be many unique frames, |
| 587 | // particularly for callsite frames. |
| 588 | if (KeepSymbolName) { |
| 589 | StringRef CanonicalName = |
| 590 | sampleprof::FunctionSamples::getCanonicalFnName( |
| 591 | FnName: DIFrame.FunctionName); |
| 592 | GuidToSymbolName.insert(KV: {Guid, CanonicalName.str()}); |
| 593 | } |
| 594 | |
| 595 | SymbolizedFrame[VAddr].push_back(Elt: MemProfData.addFrame(F)); |
| 596 | } |
| 597 | } |
| 598 | |
| 599 | auto &CallStack = Entry.getSecond(); |
| 600 | llvm::erase_if(C&: CallStack, P: [&AllVAddrsToDiscard](const uint64_t A) { |
| 601 | return AllVAddrsToDiscard.contains(V: A); |
| 602 | }); |
| 603 | if (CallStack.empty()) |
| 604 | EntriesToErase.push_back(Elt: Entry.getFirst()); |
| 605 | } |
| 606 | |
| 607 | // Drop the entries where the callstack is empty. |
| 608 | for (const uint64_t Id : EntriesToErase) { |
| 609 | StackMap.erase(Val: Id); |
| 610 | if (auto It = CallstackProfileData.find(Key: Id); |
| 611 | It != CallstackProfileData.end()) { |
| 612 | if (It->second.AccessHistogramSize > 0) |
| 613 | free(ptr: (void *)It->second.AccessHistogram); |
| 614 | CallstackProfileData.erase(Iterator: It); |
| 615 | } |
| 616 | } |
| 617 | |
| 618 | if (StackMap.empty()) |
| 619 | return make_error<InstrProfError>( |
| 620 | Args: instrprof_error::malformed, |
| 621 | Args: "no entries in callstack map after symbolization" ); |
| 622 | |
| 623 | return Error::success(); |
| 624 | } |
| 625 | |
| 626 | std::vector<std::string> |
| 627 | RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { |
| 628 | const char *Next = DataBuffer->getBufferStart(); |
| 629 | // Use a SetVector since a profile file may contain multiple raw profile |
| 630 | // dumps, each with segment information. We want them unique and in order they |
| 631 | // were stored in the profile; the profiled binary should be the first entry. |
| 632 | // The runtime uses dl_iterate_phdr and the "... first object visited by |
| 633 | // callback is the main program." |
| 634 | // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html |
| 635 | llvm::SetVector<std::string, std::vector<std::string>, |
| 636 | llvm::SmallSet<std::string, 10>> |
| 637 | BuildIds; |
| 638 | while (Next < DataBuffer->getBufferEnd()) { |
| 639 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
| 640 | |
| 641 | const llvm::SmallVector<SegmentEntry> Entries = |
| 642 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
| 643 | |
| 644 | for (const auto &Entry : Entries) |
| 645 | BuildIds.insert(X: getBuildIdString(Entry)); |
| 646 | |
| 647 | Next += Header->TotalSize; |
| 648 | } |
| 649 | return BuildIds.takeVector(); |
| 650 | } |
| 651 | |
| 652 | // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This |
| 653 | // will help being able to deserialize different versions raw memprof versions |
| 654 | // more easily. |
| 655 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
| 656 | RawMemProfReader::readMemInfoBlocks(const char *Ptr) { |
| 657 | if (MemprofRawVersion == 3ULL) |
| 658 | return readMemInfoBlocksV3(Ptr); |
| 659 | if (MemprofRawVersion == 4ULL) |
| 660 | return readMemInfoBlocksV4(Ptr); |
| 661 | llvm_unreachable( |
| 662 | "Panic: Unsupported version number when reading MemInfoBlocks" ); |
| 663 | } |
| 664 | |
| 665 | Error RawMemProfReader::readRawProfile( |
| 666 | std::unique_ptr<MemoryBuffer> DataBuffer) { |
| 667 | const char *Next = DataBuffer->getBufferStart(); |
| 668 | |
| 669 | while (Next < DataBuffer->getBufferEnd()) { |
| 670 | const auto * = reinterpret_cast<const memprof::Header *>(Next); |
| 671 | |
| 672 | // Set Reader version to memprof raw version of profile. Checking if version |
| 673 | // is supported is checked before creating the reader. |
| 674 | MemprofRawVersion = Header->Version; |
| 675 | |
| 676 | // Read in the segment information, check whether its the same across all |
| 677 | // profiles in this binary file. |
| 678 | const llvm::SmallVector<SegmentEntry> Entries = |
| 679 | readSegmentEntries(Ptr: Next + Header->SegmentOffset); |
| 680 | if (!SegmentInfo.empty() && SegmentInfo != Entries) { |
| 681 | // We do not expect segment information to change when deserializing from |
| 682 | // the same binary profile file. This can happen if dynamic libraries are |
| 683 | // loaded/unloaded between profile dumping. |
| 684 | return make_error<InstrProfError>( |
| 685 | Args: instrprof_error::malformed, |
| 686 | Args: "memprof raw profile has different segment information" ); |
| 687 | } |
| 688 | SegmentInfo.assign(in_start: Entries.begin(), in_end: Entries.end()); |
| 689 | |
| 690 | // Read in the MemInfoBlocks. Merge them based on stack id - we assume that |
| 691 | // raw profiles in the same binary file are from the same process so the |
| 692 | // stackdepot ids are the same. |
| 693 | for (const auto &[Id, MIB] : readMemInfoBlocks(Ptr: Next + Header->MIBOffset)) { |
| 694 | if (CallstackProfileData.count(Key: Id)) { |
| 695 | |
| 696 | if (MemprofRawVersion >= 4ULL && |
| 697 | (CallstackProfileData[Id].AccessHistogramSize > 0 || |
| 698 | MIB.AccessHistogramSize > 0)) { |
| 699 | uintptr_t ShorterHistogram; |
| 700 | if (CallstackProfileData[Id].AccessHistogramSize > |
| 701 | MIB.AccessHistogramSize) |
| 702 | ShorterHistogram = MIB.AccessHistogram; |
| 703 | else |
| 704 | ShorterHistogram = CallstackProfileData[Id].AccessHistogram; |
| 705 | CallstackProfileData[Id].Merge(newMIB: MIB); |
| 706 | free(ptr: (void *)ShorterHistogram); |
| 707 | } else { |
| 708 | CallstackProfileData[Id].Merge(newMIB: MIB); |
| 709 | } |
| 710 | } else { |
| 711 | CallstackProfileData[Id] = MIB; |
| 712 | } |
| 713 | } |
| 714 | |
| 715 | // Read in the callstack for each ids. For multiple raw profiles in the same |
| 716 | // file, we expect that the callstack is the same for a unique id. |
| 717 | const CallStackMap CSM = readStackInfo(Ptr: Next + Header->StackOffset); |
| 718 | if (StackMap.empty()) { |
| 719 | StackMap = CSM; |
| 720 | } else { |
| 721 | if (mergeStackMap(From: CSM, To&: StackMap)) |
| 722 | return make_error<InstrProfError>( |
| 723 | Args: instrprof_error::malformed, |
| 724 | Args: "memprof raw profile got different call stack for same id" ); |
| 725 | } |
| 726 | |
| 727 | Next += Header->TotalSize; |
| 728 | } |
| 729 | |
| 730 | return Error::success(); |
| 731 | } |
| 732 | |
| 733 | object::SectionedAddress |
| 734 | RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { |
| 735 | if (VirtualAddress > ProfiledTextSegmentStart && |
| 736 | VirtualAddress <= ProfiledTextSegmentEnd) { |
| 737 | // For PIE binaries, the preferred address is zero and we adjust the virtual |
| 738 | // address by start of the profiled segment assuming that the offset of the |
| 739 | // segment in the binary is zero. For non-PIE binaries the preferred and |
| 740 | // profiled segment addresses should be equal and this is a no-op. |
| 741 | const uint64_t AdjustedAddress = |
| 742 | VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; |
| 743 | return object::SectionedAddress{.Address: AdjustedAddress}; |
| 744 | } |
| 745 | // Addresses which do not originate from the profiled text segment in the |
| 746 | // binary are not adjusted. These will fail symbolization and be filtered out |
| 747 | // during processing. |
| 748 | return object::SectionedAddress{.Address: VirtualAddress}; |
| 749 | } |
| 750 | |
| 751 | Error RawMemProfReader::readNextRecord( |
| 752 | GuidMemProfRecordPair &GuidRecord, |
| 753 | std::function<const Frame(const FrameId)> Callback) { |
| 754 | // Create a new callback for the RawMemProfRecord iterator so that we can |
| 755 | // provide the symbol name if the reader was initialized with KeepSymbolName = |
| 756 | // true. This is useful for debugging and testing. |
| 757 | auto IdToFrameCallback = [this](const FrameId Id) { |
| 758 | Frame F = this->idToFrame(Id); |
| 759 | if (!this->KeepSymbolName) |
| 760 | return F; |
| 761 | auto Iter = this->GuidToSymbolName.find(Val: F.Function); |
| 762 | assert(Iter != this->GuidToSymbolName.end()); |
| 763 | F.SymbolName = std::make_unique<std::string>(args&: Iter->getSecond()); |
| 764 | return F; |
| 765 | }; |
| 766 | return MemProfReader::readNextRecord(GuidRecord, Callback: IdToFrameCallback); |
| 767 | } |
| 768 | |
| 769 | Expected<std::unique_ptr<YAMLMemProfReader>> |
| 770 | YAMLMemProfReader::create(const Twine &Path) { |
| 771 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true); |
| 772 | if (std::error_code EC = BufferOr.getError()) |
| 773 | return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef()); |
| 774 | |
| 775 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
| 776 | return create(Buffer: std::move(Buffer)); |
| 777 | } |
| 778 | |
| 779 | Expected<std::unique_ptr<YAMLMemProfReader>> |
| 780 | YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { |
| 781 | auto Reader = std::make_unique<YAMLMemProfReader>(); |
| 782 | Reader->parse(YAMLData: Buffer->getBuffer()); |
| 783 | return std::move(Reader); |
| 784 | } |
| 785 | |
| 786 | bool YAMLMemProfReader::hasFormat(const StringRef Path) { |
| 787 | auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path, /*IsText=*/true); |
| 788 | if (!BufferOr) |
| 789 | return false; |
| 790 | |
| 791 | std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); |
| 792 | return hasFormat(DataBuffer: *Buffer); |
| 793 | } |
| 794 | |
| 795 | bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) { |
| 796 | return Buffer.getBuffer().starts_with(Prefix: "---" ); |
| 797 | } |
| 798 | |
| 799 | void YAMLMemProfReader::parse(StringRef YAMLData) { |
| 800 | memprof::AllMemProfData Doc; |
| 801 | yaml::Input Yin(YAMLData); |
| 802 | |
| 803 | Yin >> Doc; |
| 804 | if (Yin.error()) |
| 805 | return; |
| 806 | |
| 807 | // Add a call stack to MemProfData.CallStacks and return its CallStackId. |
| 808 | auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId { |
| 809 | SmallVector<FrameId> IndexedCallStack; |
| 810 | IndexedCallStack.reserve(N: CallStack.size()); |
| 811 | for (const Frame &F : CallStack) |
| 812 | IndexedCallStack.push_back(Elt: MemProfData.addFrame(F)); |
| 813 | return MemProfData.addCallStack(CS: std::move(IndexedCallStack)); |
| 814 | }; |
| 815 | |
| 816 | for (const auto &[GUID, Record] : Doc.HeapProfileRecords) { |
| 817 | IndexedMemProfRecord IndexedRecord; |
| 818 | |
| 819 | // Convert AllocationInfo to IndexedAllocationInfo. |
| 820 | for (const AllocationInfo &AI : Record.AllocSites) { |
| 821 | CallStackId CSId = AddCallStack(AI.CallStack); |
| 822 | IndexedRecord.AllocSites.emplace_back(Args&: CSId, Args: AI.Info); |
| 823 | } |
| 824 | |
| 825 | // Populate CallSites with CalleeGuids. |
| 826 | for (const auto &CallSite : Record.CallSites) { |
| 827 | CallStackId CSId = AddCallStack(CallSite.Frames); |
| 828 | IndexedRecord.CallSites.emplace_back(Args&: CSId, Args: CallSite.CalleeGuids); |
| 829 | } |
| 830 | |
| 831 | MemProfData.Records.try_emplace(Key: GUID, Args: std::move(IndexedRecord)); |
| 832 | } |
| 833 | |
| 834 | if (Doc.YamlifiedDataAccessProfiles.isEmpty()) |
| 835 | return; |
| 836 | |
| 837 | auto ToSymHandleRef = |
| 838 | [](const memprof::SymbolHandle &Handle) -> memprof::SymbolHandleRef { |
| 839 | if (std::holds_alternative<std::string>(v: Handle)) |
| 840 | return StringRef(std::get<std::string>(v: Handle)); |
| 841 | return std::get<uint64_t>(v: Handle); |
| 842 | }; |
| 843 | |
| 844 | auto DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>(); |
| 845 | for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records) |
| 846 | if (Error E = DataAccessProfileData->setDataAccessProfile( |
| 847 | SymbolID: ToSymHandleRef(Record.SymHandle), AccessCount: Record.AccessCount, |
| 848 | Locations: Record.Locations)) |
| 849 | reportFatalInternalError(Err: std::move(E)); |
| 850 | |
| 851 | for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdStrHashes) |
| 852 | if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Hash)) |
| 853 | reportFatalInternalError(Err: std::move(E)); |
| 854 | |
| 855 | for (const std::string &Sym : |
| 856 | Doc.YamlifiedDataAccessProfiles.KnownColdSymbols) |
| 857 | if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(SymbolID: Sym)) |
| 858 | reportFatalInternalError(Err: std::move(E)); |
| 859 | |
| 860 | setDataAccessProfileData(std::move(DataAccessProfileData)); |
| 861 | } |
| 862 | } // namespace memprof |
| 863 | } // namespace llvm |
| 864 | |