1//===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for clang's
10// instrumentation based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ProfileData/InstrProfReader.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/ProfileData/InstrProf.h"
21// #include "llvm/ProfileData/MemProf.h"
22#include "llvm/ProfileData/MemProfRadixTree.h"
23#include "llvm/ProfileData/ProfileCommon.h"
24#include "llvm/ProfileData/SymbolRemappingReader.h"
25#include "llvm/Support/Endian.h"
26#include "llvm/Support/Error.h"
27#include "llvm/Support/ErrorOr.h"
28#include "llvm/Support/FormatVariadic.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/VirtualFileSystem.h"
31#include <algorithm>
32#include <cstddef>
33#include <cstdint>
34#include <limits>
35#include <memory>
36#include <optional>
37#include <system_error>
38#include <utility>
39#include <vector>
40
41using namespace llvm;
42
43// Extracts the variant information from the top 32 bits in the version and
44// returns an enum specifying the variants present.
45static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
46 InstrProfKind ProfileKind = InstrProfKind::Unknown;
47 if (Version & VARIANT_MASK_IR_PROF) {
48 ProfileKind |= InstrProfKind::IRInstrumentation;
49 }
50 if (Version & VARIANT_MASK_CSIR_PROF) {
51 ProfileKind |= InstrProfKind::ContextSensitive;
52 }
53 if (Version & VARIANT_MASK_INSTR_ENTRY) {
54 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
55 }
56 if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) {
57 ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
58 }
59 if (Version & VARIANT_MASK_BYTE_COVERAGE) {
60 ProfileKind |= InstrProfKind::SingleByteCoverage;
61 }
62 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
63 ProfileKind |= InstrProfKind::FunctionEntryOnly;
64 }
65 if (Version & VARIANT_MASK_MEMPROF) {
66 ProfileKind |= InstrProfKind::MemProf;
67 }
68 if (Version & VARIANT_MASK_TEMPORAL_PROF) {
69 ProfileKind |= InstrProfKind::TemporalProfile;
70 }
71 return ProfileKind;
72}
73
74static Expected<std::unique_ptr<MemoryBuffer>>
75setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
76 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
77 : FS.getBufferForFile(Name: Filename);
78 if (std::error_code EC = BufferOrErr.getError())
79 return errorCodeToError(EC);
80 return std::move(BufferOrErr.get());
81}
82
83static Error initializeReader(InstrProfReader &Reader) {
84 return Reader.readHeader();
85}
86
87/// Read a list of binary ids from a profile that consist of
88/// a. uint64_t binary id length
89/// b. uint8_t binary id data
90/// c. uint8_t padding (if necessary)
91/// This function is shared between raw and indexed profiles.
92/// Raw profiles are in host-endian format, and indexed profiles are in
93/// little-endian format. So, this function takes an argument indicating the
94/// associated endian format to read the binary ids correctly.
95static Error
96readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
97 ArrayRef<uint8_t> BinaryIdsBuffer,
98 std::vector<llvm::object::BuildID> &BinaryIds,
99 const llvm::endianness Endian) {
100 using namespace support;
101
102 const uint64_t BinaryIdsSize = BinaryIdsBuffer.size();
103 const uint8_t *BinaryIdsStart = BinaryIdsBuffer.data();
104
105 if (BinaryIdsSize == 0)
106 return Error::success();
107
108 const uint8_t *BI = BinaryIdsStart;
109 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
110 const uint8_t *End =
111 reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd());
112
113 while (BI < BIEnd) {
114 size_t Remaining = BIEnd - BI;
115 // There should be enough left to read the binary id length.
116 if (Remaining < sizeof(uint64_t))
117 return make_error<InstrProfError>(
118 Args: instrprof_error::malformed,
119 Args: "not enough data to read binary id length");
120
121 uint64_t BILen = endian::readNext<uint64_t>(memory&: BI, endian: Endian);
122 if (BILen == 0)
123 return make_error<InstrProfError>(Args: instrprof_error::malformed,
124 Args: "binary id length is 0");
125
126 Remaining = BIEnd - BI;
127 // There should be enough left to read the binary id data.
128 if (Remaining < alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t)))
129 return make_error<InstrProfError>(
130 Args: instrprof_error::malformed, Args: "not enough data to read binary id data");
131
132 // Add binary id to the binary ids list.
133 BinaryIds.push_back(x: object::BuildID(BI, BI + BILen));
134
135 // Increment by binary id data length, which aligned to the size of uint64.
136 BI += alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t));
137 if (BI > End)
138 return make_error<InstrProfError>(
139 Args: instrprof_error::malformed,
140 Args: "binary id section is greater than buffer size");
141 }
142
143 return Error::success();
144}
145
146static void printBinaryIdsInternal(raw_ostream &OS,
147 ArrayRef<llvm::object::BuildID> BinaryIds) {
148 OS << "Binary IDs: \n";
149 for (const auto &BI : BinaryIds) {
150 for (auto I : BI)
151 OS << format(Fmt: "%02x", Vals: I);
152 OS << "\n";
153 }
154}
155
156Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(
157 const Twine &Path, vfs::FileSystem &FS,
158 const InstrProfCorrelator *Correlator,
159 const object::BuildIDFetcher *BIDFetcher,
160 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,
161 std::function<void(Error)> Warn) {
162 // Set up the buffer to read.
163 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
164 if (Error E = BufferOrError.takeError())
165 return std::move(E);
166 return InstrProfReader::create(Buffer: std::move(BufferOrError.get()), Correlator,
167 BIDFetcher, BIDFetcherCorrelatorKind, Warn);
168}
169
170Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(
171 std::unique_ptr<MemoryBuffer> Buffer, const InstrProfCorrelator *Correlator,
172 const object::BuildIDFetcher *BIDFetcher,
173 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,
174 std::function<void(Error)> Warn) {
175 if (Buffer->getBufferSize() == 0)
176 return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile);
177
178 std::unique_ptr<InstrProfReader> Result;
179 // Create the reader.
180 if (IndexedInstrProfReader::hasFormat(DataBuffer: *Buffer))
181 Result.reset(p: new IndexedInstrProfReader(std::move(Buffer)));
182 else if (RawInstrProfReader64::hasFormat(DataBuffer: *Buffer))
183 Result.reset(p: new RawInstrProfReader64(std::move(Buffer), Correlator,
184 BIDFetcher, BIDFetcherCorrelatorKind,
185 Warn));
186 else if (RawInstrProfReader32::hasFormat(DataBuffer: *Buffer))
187 Result.reset(p: new RawInstrProfReader32(std::move(Buffer), Correlator,
188 BIDFetcher, BIDFetcherCorrelatorKind,
189 Warn));
190 else if (TextInstrProfReader::hasFormat(Buffer: *Buffer))
191 Result.reset(p: new TextInstrProfReader(std::move(Buffer)));
192 else
193 return make_error<InstrProfError>(Args: instrprof_error::unrecognized_format);
194
195 // Initialize the reader and return the result.
196 if (Error E = initializeReader(Reader&: *Result))
197 return std::move(E);
198
199 return std::move(Result);
200}
201
202Expected<std::unique_ptr<IndexedInstrProfReader>>
203IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
204 const Twine &RemappingPath) {
205 // Set up the buffer to read.
206 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
207 if (Error E = BufferOrError.takeError())
208 return std::move(E);
209
210 // Set up the remapping buffer if requested.
211 std::unique_ptr<MemoryBuffer> RemappingBuffer;
212 std::string RemappingPathStr = RemappingPath.str();
213 if (!RemappingPathStr.empty()) {
214 auto RemappingBufferOrError = setupMemoryBuffer(Filename: RemappingPathStr, FS);
215 if (Error E = RemappingBufferOrError.takeError())
216 return std::move(E);
217 RemappingBuffer = std::move(RemappingBufferOrError.get());
218 }
219
220 return IndexedInstrProfReader::create(Buffer: std::move(BufferOrError.get()),
221 RemappingBuffer: std::move(RemappingBuffer));
222}
223
224Expected<std::unique_ptr<IndexedInstrProfReader>>
225IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
226 std::unique_ptr<MemoryBuffer> RemappingBuffer) {
227 // Create the reader.
228 if (!IndexedInstrProfReader::hasFormat(DataBuffer: *Buffer))
229 return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
230 auto Result = std::make_unique<IndexedInstrProfReader>(
231 args: std::move(Buffer), args: std::move(RemappingBuffer));
232
233 // Initialize the reader and return the result.
234 if (Error E = initializeReader(Reader&: *Result))
235 return std::move(E);
236
237 return std::move(Result);
238}
239
240bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
241 // Verify that this really looks like plain ASCII text by checking a
242 // 'reasonable' number of characters (up to profile magic size).
243 size_t count = std::min(a: Buffer.getBufferSize(), b: sizeof(uint64_t));
244 StringRef buffer = Buffer.getBufferStart();
245 return count == 0 ||
246 std::all_of(first: buffer.begin(), last: buffer.begin() + count,
247 pred: [](char c) { return isPrint(C: c) || isSpace(C: c); });
248}
249
250// Read the profile variant flag from the header: ":FE" means this is a FE
251// generated profile. ":IR" means this is an IR level profile. Other strings
252// with a leading ':' will be reported an error format.
253Error TextInstrProfReader::readHeader() {
254 Symtab.reset(p: new InstrProfSymtab());
255
256 while (Line->starts_with(Prefix: ":")) {
257 StringRef Str = Line->substr(Start: 1);
258 if (Str.equals_insensitive(RHS: "ir"))
259 ProfileKind |= InstrProfKind::IRInstrumentation;
260 else if (Str.equals_insensitive(RHS: "fe"))
261 ProfileKind |= InstrProfKind::FrontendInstrumentation;
262 else if (Str.equals_insensitive(RHS: "csir")) {
263 ProfileKind |= InstrProfKind::IRInstrumentation;
264 ProfileKind |= InstrProfKind::ContextSensitive;
265 } else if (Str.equals_insensitive(RHS: "entry_first"))
266 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
267 else if (Str.equals_insensitive(RHS: "not_entry_first"))
268 ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
269 else if (Str.equals_insensitive(RHS: "instrument_loop_entries"))
270 ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
271 else if (Str.equals_insensitive(RHS: "single_byte_coverage"))
272 ProfileKind |= InstrProfKind::SingleByteCoverage;
273 else if (Str.equals_insensitive(RHS: "temporal_prof_traces")) {
274 ProfileKind |= InstrProfKind::TemporalProfile;
275 if (auto Err = readTemporalProfTraceData())
276 return error(E: std::move(Err));
277 } else
278 return error(Err: instrprof_error::bad_header);
279 ++Line;
280 }
281 return success();
282}
283
284/// Temporal profile trace data is stored in the header immediately after
285/// ":temporal_prof_traces". The first integer is the number of traces, the
286/// second integer is the stream size, then the following lines are the actual
287/// traces which consist of a weight and a comma separated list of function
288/// names.
289Error TextInstrProfReader::readTemporalProfTraceData() {
290 if ((++Line).is_at_end())
291 return error(Err: instrprof_error::eof);
292
293 uint32_t NumTraces;
294 if (Line->getAsInteger(Radix: 0, Result&: NumTraces))
295 return error(Err: instrprof_error::malformed);
296
297 if ((++Line).is_at_end())
298 return error(Err: instrprof_error::eof);
299
300 if (Line->getAsInteger(Radix: 0, Result&: TemporalProfTraceStreamSize))
301 return error(Err: instrprof_error::malformed);
302
303 for (uint32_t i = 0; i < NumTraces; i++) {
304 if ((++Line).is_at_end())
305 return error(Err: instrprof_error::eof);
306
307 TemporalProfTraceTy Trace;
308 if (Line->getAsInteger(Radix: 0, Result&: Trace.Weight))
309 return error(Err: instrprof_error::malformed);
310
311 if ((++Line).is_at_end())
312 return error(Err: instrprof_error::eof);
313
314 SmallVector<StringRef> FuncNames;
315 Line->split(A&: FuncNames, Separator: ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
316 for (auto &FuncName : FuncNames)
317 Trace.FunctionNameRefs.push_back(
318 x: IndexedInstrProf::ComputeHash(K: FuncName.trim()));
319 TemporalProfTraces.push_back(Elt: std::move(Trace));
320 }
321 return success();
322}
323
324Error
325TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
326
327#define CHECK_LINE_END(Line) \
328 if (Line.is_at_end()) \
329 return error(instrprof_error::truncated);
330#define READ_NUM(Str, Dst) \
331 if ((Str).getAsInteger(10, (Dst))) \
332 return error(instrprof_error::malformed);
333#define VP_READ_ADVANCE(Val) \
334 CHECK_LINE_END(Line); \
335 uint32_t Val; \
336 READ_NUM((*Line), (Val)); \
337 Line++;
338
339 if (Line.is_at_end())
340 return success();
341
342 uint32_t NumValueKinds;
343 if (Line->getAsInteger(Radix: 10, Result&: NumValueKinds)) {
344 // No value profile data
345 return success();
346 }
347 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
348 return error(Err: instrprof_error::malformed,
349 ErrMsg: "number of value kinds is invalid");
350 Line++;
351
352 for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
353 VP_READ_ADVANCE(ValueKind);
354 if (ValueKind > IPVK_Last)
355 return error(Err: instrprof_error::malformed, ErrMsg: "value kind is invalid");
356 ;
357 VP_READ_ADVANCE(NumValueSites);
358 if (!NumValueSites)
359 continue;
360
361 Record.reserveSites(ValueKind: VK, NumValueSites);
362 for (uint32_t S = 0; S < NumValueSites; S++) {
363 VP_READ_ADVANCE(NumValueData);
364
365 std::vector<InstrProfValueData> CurrentValues;
366 for (uint32_t V = 0; V < NumValueData; V++) {
367 CHECK_LINE_END(Line);
368 std::pair<StringRef, StringRef> VD = Line->rsplit(Separator: ':');
369 uint64_t TakenCount, Value;
370 if (ValueKind == IPVK_IndirectCallTarget) {
371 if (InstrProfSymtab::isExternalSymbol(Symbol: VD.first)) {
372 Value = 0;
373 } else {
374 if (Error E = Symtab->addFuncName(FuncName: VD.first))
375 return E;
376 Value = IndexedInstrProf::ComputeHash(K: VD.first);
377 }
378 } else if (ValueKind == IPVK_VTableTarget) {
379 if (InstrProfSymtab::isExternalSymbol(Symbol: VD.first))
380 Value = 0;
381 else {
382 if (Error E = Symtab->addVTableName(VTableName: VD.first))
383 return E;
384 Value = IndexedInstrProf::ComputeHash(K: VD.first);
385 }
386 } else {
387 READ_NUM(VD.first, Value);
388 }
389 READ_NUM(VD.second, TakenCount);
390 CurrentValues.push_back(x: {.Value: Value, .Count: TakenCount});
391 Line++;
392 }
393 assert(CurrentValues.size() == NumValueData);
394 Record.addValueData(ValueKind, Site: S, VData: CurrentValues, SymTab: nullptr);
395 }
396 }
397 return success();
398
399#undef CHECK_LINE_END
400#undef READ_NUM
401#undef VP_READ_ADVANCE
402}
403
404Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
405 // Skip empty lines and comments.
406 while (!Line.is_at_end() && (Line->empty() || Line->starts_with(Prefix: "#")))
407 ++Line;
408 // If we hit EOF while looking for a name, we're done.
409 if (Line.is_at_end()) {
410 return error(Err: instrprof_error::eof);
411 }
412
413 // Read the function name.
414 Record.Name = *Line++;
415 if (Error E = Symtab->addFuncName(FuncName: Record.Name))
416 return error(E: std::move(E));
417
418 // Read the function hash.
419 if (Line.is_at_end())
420 return error(Err: instrprof_error::truncated);
421 if ((Line++)->getAsInteger(Radix: 0, Result&: Record.Hash))
422 return error(Err: instrprof_error::malformed,
423 ErrMsg: "function hash is not a valid integer");
424
425 // Read the number of counters.
426 uint64_t NumCounters;
427 if (Line.is_at_end())
428 return error(Err: instrprof_error::truncated);
429 if ((Line++)->getAsInteger(Radix: 10, Result&: NumCounters))
430 return error(Err: instrprof_error::malformed,
431 ErrMsg: "number of counters is not a valid integer");
432 if (NumCounters == 0)
433 return error(Err: instrprof_error::malformed, ErrMsg: "number of counters is zero");
434
435 // Read each counter and fill our internal storage with the values.
436 Record.Clear();
437 Record.Counts.reserve(n: NumCounters);
438 for (uint64_t I = 0; I < NumCounters; ++I) {
439 if (Line.is_at_end())
440 return error(Err: instrprof_error::truncated);
441 uint64_t Count;
442 if ((Line++)->getAsInteger(Radix: 10, Result&: Count))
443 return error(Err: instrprof_error::malformed, ErrMsg: "count is invalid");
444 Record.Counts.push_back(x: Count);
445 }
446
447 // Bitmap byte information is indicated with special character.
448 if (Line->starts_with(Prefix: "$")) {
449 Record.BitmapBytes.clear();
450 // Read the number of bitmap bytes.
451 uint64_t NumBitmapBytes;
452 if ((Line++)->drop_front(N: 1).trim().getAsInteger(Radix: 0, Result&: NumBitmapBytes))
453 return error(Err: instrprof_error::malformed,
454 ErrMsg: "number of bitmap bytes is not a valid integer");
455 if (NumBitmapBytes != 0) {
456 // Read each bitmap and fill our internal storage with the values.
457 Record.BitmapBytes.reserve(n: NumBitmapBytes);
458 for (uint8_t I = 0; I < NumBitmapBytes; ++I) {
459 if (Line.is_at_end())
460 return error(Err: instrprof_error::truncated);
461 uint8_t BitmapByte;
462 if ((Line++)->getAsInteger(Radix: 0, Result&: BitmapByte))
463 return error(Err: instrprof_error::malformed,
464 ErrMsg: "bitmap byte is not a valid integer");
465 Record.BitmapBytes.push_back(x: BitmapByte);
466 }
467 }
468 }
469
470 // Check if value profile data exists and read it if so.
471 if (Error E = readValueProfileData(Record))
472 return error(E: std::move(E));
473
474 return success();
475}
476
477template <class IntPtrT>
478InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
479 return getProfileKindFromVersion(Version);
480}
481
482template <class IntPtrT>
483SmallVector<TemporalProfTraceTy> &
484RawInstrProfReader<IntPtrT>::getTemporalProfTraces(
485 std::optional<uint64_t> Weight) {
486 if (TemporalProfTimestamps.empty()) {
487 assert(TemporalProfTraces.empty());
488 return TemporalProfTraces;
489 }
490 // Sort functions by their timestamps to build the trace.
491 std::sort(first: TemporalProfTimestamps.begin(), last: TemporalProfTimestamps.end());
492 TemporalProfTraceTy Trace;
493 if (Weight)
494 Trace.Weight = *Weight;
495 for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps)
496 Trace.FunctionNameRefs.push_back(x: NameRef);
497 TemporalProfTraces = {std::move(Trace)};
498 return TemporalProfTraces;
499}
500
501template <class IntPtrT>
502bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
503 if (DataBuffer.getBufferSize() < sizeof(uint64_t))
504 return false;
505 uint64_t Magic =
506 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
507 return RawInstrProf::getMagic<IntPtrT>() == Magic ||
508 llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic;
509}
510
511template <class IntPtrT>
512Error RawInstrProfReader<IntPtrT>::readHeader() {
513 if (!hasFormat(DataBuffer: *DataBuffer))
514 return error(instrprof_error::bad_magic);
515 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
516 return error(instrprof_error::bad_header);
517 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
518 DataBuffer->getBufferStart());
519 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
520 return readHeader(*Header);
521}
522
523template <class IntPtrT>
524Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
525 const char *End = DataBuffer->getBufferEnd();
526 // Skip zero padding between profiles.
527 while (CurrentPos != End && *CurrentPos == 0)
528 ++CurrentPos;
529 // If there's nothing left, we're done.
530 if (CurrentPos == End)
531 return make_error<InstrProfError>(Args: instrprof_error::eof);
532 // If there isn't enough space for another header, this is probably just
533 // garbage at the end of the file.
534 if (CurrentPos + sizeof(RawInstrProf::Header) > End)
535 return make_error<InstrProfError>(Args: instrprof_error::malformed,
536 Args: "not enough space for another header");
537 // The writer ensures each profile is padded to start at an aligned address.
538 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
539 return make_error<InstrProfError>(Args: instrprof_error::malformed,
540 Args: "insufficient padding");
541 // The magic should have the same byte order as in the previous header.
542 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
543 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
544 return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
545
546 // There's another profile to read, so we need to process the header.
547 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
548 return readHeader(*Header);
549}
550
551template <class IntPtrT>
552Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
553 if (Error E = Symtab.create(FuncNameStrings: StringRef(NamesStart, NamesEnd - NamesStart),
554 VTableNameStrings: StringRef(VNamesStart, VNamesEnd - VNamesStart)))
555 return error(std::move(E));
556 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
557 const IntPtrT FPtr = swap(I->FunctionPointer);
558 if (!FPtr)
559 continue;
560 Symtab.mapAddress(Addr: FPtr, MD5Val: swap(I->NameRef));
561 }
562
563 if (VTableBegin != nullptr && VTableEnd != nullptr) {
564 for (const RawInstrProf::VTableProfileData<IntPtrT> *I = VTableBegin;
565 I != VTableEnd; ++I) {
566 const IntPtrT VPtr = swap(I->VTablePointer);
567 if (!VPtr)
568 continue;
569 // Map both begin and end address to the name hash, since the instrumented
570 // address could be somewhere in the middle.
571 // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks
572 // the end of vtable address.
573 Symtab.mapVTableAddress(StartAddr: VPtr, EndAddr: VPtr + swap(I->VTableSize),
574 MD5Val: swap(I->VTableNameHash));
575 }
576 }
577 return success();
578}
579
580template <class IntPtrT>
581Error RawInstrProfReader<IntPtrT>::readHeader(
582 const RawInstrProf::Header &Header) {
583 Version = swap(Header.Version);
584 if (GET_VERSION(Version) != RawInstrProf::Version)
585 return error(instrprof_error::raw_profile_version_mismatch,
586 ("Profile uses raw profile format version = " +
587 Twine(GET_VERSION(Version)) +
588 "; expected version = " + Twine(RawInstrProf::Version) +
589 "\nPLEASE update this tool to version in the raw profile, or "
590 "regenerate raw profile with expected version.")
591 .str());
592
593 uint64_t BinaryIdSize = swap(Header.BinaryIdsSize);
594 // Binary id start just after the header if exists.
595 const uint8_t *BinaryIdStart =
596 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
597 const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize;
598 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
599 if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd)
600 return error(instrprof_error::bad_header);
601 ArrayRef<uint8_t> BinaryIdsBuffer(BinaryIdStart, BinaryIdSize);
602 if (!BinaryIdsBuffer.empty()) {
603 if (Error Err = readBinaryIdsInternal(*DataBuffer, BinaryIdsBuffer,
604 BinaryIds, getDataEndianness()))
605 return Err;
606 }
607
608 CountersDelta = swap(Header.CountersDelta);
609 BitmapDelta = swap(Header.BitmapDelta);
610 UniformCountersDelta = swap(Header.UniformCountersDelta);
611 NamesDelta = swap(Header.NamesDelta);
612 auto NumData = swap(Header.NumData);
613 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
614 auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize();
615 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
616 auto NumBitmapBytes = swap(Header.NumBitmapBytes);
617 auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
618 auto NumUniformCounters = swap(Header.NumUniformCounters);
619 auto PaddingBytesAfterUniformCounters =
620 swap(Header.PaddingBytesAfterUniformCounters);
621 auto NamesSize = swap(Header.NamesSize);
622 auto VTableNameSize = swap(Header.VNamesSize);
623 auto NumVTables = swap(Header.NumVTables);
624 ValueKindLast = swap(Header.ValueKindLast);
625
626 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
627 auto PaddingBytesAfterNames = getNumPaddingBytes(SizeInBytes: NamesSize);
628 auto PaddingBytesAfterVTableNames = getNumPaddingBytes(SizeInBytes: VTableNameSize);
629
630 auto VTableSectionSize =
631 NumVTables * sizeof(RawInstrProf::VTableProfileData<IntPtrT>);
632 auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(SizeInBytes: VTableSectionSize);
633 auto UniformCountersSectionSize = NumUniformCounters * sizeof(uint64_t);
634
635 // Profile data starts after profile header and binary ids if exist.
636 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
637 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
638 ptrdiff_t BitmapOffset =
639 CountersOffset + CountersSize + PaddingBytesAfterCounters;
640 ptrdiff_t UniformCountersOffset =
641 BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
642 ptrdiff_t NamesOffset = UniformCountersOffset + UniformCountersSectionSize +
643 PaddingBytesAfterUniformCounters;
644 ptrdiff_t VTableProfDataOffset =
645 NamesOffset + NamesSize + PaddingBytesAfterNames;
646 ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize +
647 PaddingBytesAfterVTableProfData;
648 ptrdiff_t ValueDataOffset =
649 VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames;
650
651 auto *Start = reinterpret_cast<const char *>(&Header);
652 if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
653 return error(instrprof_error::bad_header);
654
655 if (BIDFetcher) {
656 std::vector<object::BuildID> BinaryIDs;
657 if (Error E = readBinaryIds(BinaryIds&: BinaryIDs))
658 return E;
659 if (auto E = InstrProfCorrelator::get(Filename: "", FileKind: BIDFetcherCorrelatorKind,
660 BIDFetcher, BIs: BinaryIDs)
661 .moveInto(Value&: BIDFetcherCorrelator)) {
662 return E;
663 }
664 if (auto Err = BIDFetcherCorrelator->correlateProfileData(MaxWarnings: 0))
665 return Err;
666 }
667
668 if (Correlator) {
669 // These sizes in the raw file are zero because we constructed them in the
670 // Correlator.
671 if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 &&
672 NamesDelta == 0))
673 return error(instrprof_error::unexpected_correlation_info);
674 Data = Correlator->getDataPointer();
675 DataEnd = Data + Correlator->getDataSize();
676 NamesStart = Correlator->getNamesPointer();
677 NamesEnd = NamesStart + Correlator->getNamesSize();
678 } else if (BIDFetcherCorrelator) {
679 InstrProfCorrelatorImpl<IntPtrT> *BIDFetcherCorrelatorImpl =
680 dyn_cast_or_null<InstrProfCorrelatorImpl<IntPtrT>>(
681 BIDFetcherCorrelator.get());
682 Data = BIDFetcherCorrelatorImpl->getDataPointer();
683 DataEnd = Data + BIDFetcherCorrelatorImpl->getDataSize();
684 NamesStart = BIDFetcherCorrelatorImpl->getNamesPointer();
685 NamesEnd = NamesStart + BIDFetcherCorrelatorImpl->getNamesSize();
686 } else {
687 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
688 Start + DataOffset);
689 DataEnd = Data + NumData;
690 VTableBegin =
691 reinterpret_cast<const RawInstrProf::VTableProfileData<IntPtrT> *>(
692 Start + VTableProfDataOffset);
693 VTableEnd = VTableBegin + NumVTables;
694 NamesStart = Start + NamesOffset;
695 NamesEnd = NamesStart + NamesSize;
696 VNamesStart = Start + VTableNameOffset;
697 VNamesEnd = VNamesStart + VTableNameSize;
698 }
699
700 CountersStart = Start + CountersOffset;
701 CountersEnd = CountersStart + CountersSize;
702 BitmapStart = Start + BitmapOffset;
703 BitmapEnd = BitmapStart + NumBitmapBytes;
704 UniformCountersStart = Start + UniformCountersOffset;
705 UniformCountersEnd = UniformCountersStart + UniformCountersSectionSize;
706 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
707
708 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
709 if (Error E = createSymtab(Symtab&: *NewSymtab))
710 return E;
711
712 Symtab = std::move(NewSymtab);
713 return success();
714}
715
716template <class IntPtrT>
717Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
718 Record.Name = getName(NameRef: Data->NameRef);
719 return success();
720}
721
722template <class IntPtrT>
723Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
724 Record.Hash = swap(Data->FuncHash);
725 return success();
726}
727
728template <class IntPtrT>
729Error RawInstrProfReader<IntPtrT>::readRawCounts(
730 InstrProfRecord &Record) {
731 uint32_t NumCounters = swap(Data->NumCounters);
732 if (NumCounters == 0)
733 return error(instrprof_error::malformed, "number of counters is zero");
734
735 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
736 if (CounterBaseOffset < 0)
737 return error(
738 instrprof_error::malformed,
739 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
740
741 if (CounterBaseOffset >= CountersEnd - CountersStart)
742 return error(instrprof_error::malformed,
743 ("counter offset " + Twine(CounterBaseOffset) +
744 " is greater than the maximum counter offset " +
745 Twine(CountersEnd - CountersStart - 1))
746 .str());
747
748 uint64_t MaxNumCounters =
749 (CountersEnd - (CountersStart + CounterBaseOffset)) /
750 getCounterTypeSize();
751 if (NumCounters > MaxNumCounters)
752 return error(instrprof_error::malformed,
753 ("number of counters " + Twine(NumCounters) +
754 " is greater than the maximum number of counters " +
755 Twine(MaxNumCounters))
756 .str());
757
758 Record.Counts.clear();
759 Record.Counts.reserve(n: NumCounters);
760 for (uint32_t I = 0; I < NumCounters; I++) {
761 const char *Ptr =
762 CountersStart + CounterBaseOffset + I * getCounterTypeSize();
763 if (I == 0 && hasTemporalProfile()) {
764 uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
765 if (TimestampValue != 0 &&
766 TimestampValue != std::numeric_limits<uint64_t>::max()) {
767 TemporalProfTimestamps.emplace_back(TimestampValue,
768 swap(Data->NameRef));
769 TemporalProfTraceStreamSize = 1;
770 }
771 if (hasSingleByteCoverage()) {
772 // In coverage mode, getCounterTypeSize() returns 1 byte but our
773 // timestamp field has size uint64_t. Increment I so that the next
774 // iteration of this for loop points to the byte after the timestamp
775 // field, i.e., I += 8.
776 I += 7;
777 }
778 continue;
779 }
780 if (hasSingleByteCoverage()) {
781 // A value of zero signifies the block is covered.
782 Record.Counts.push_back(x: *Ptr == 0 ? 1 : 0);
783 } else {
784 uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
785 if (CounterValue > MaxCounterValue && Warn)
786 Warn(make_error<InstrProfError>(
787 Args: instrprof_error::counter_value_too_large, Args: Twine(CounterValue)));
788
789 Record.Counts.push_back(x: CounterValue);
790 }
791 }
792
793 return success();
794}
795
796template <class IntPtrT>
797Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) {
798 uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes);
799
800 Record.BitmapBytes.clear();
801 Record.BitmapBytes.reserve(n: NumBitmapBytes);
802
803 // It's possible MCDC is either not enabled or only used for some functions
804 // and not others. So if we record 0 bytes, just move on.
805 if (NumBitmapBytes == 0)
806 return success();
807
808 // BitmapDelta decreases as we advance to the next data record.
809 ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta;
810 if (BitmapOffset < 0)
811 return error(
812 instrprof_error::malformed,
813 ("bitmap offset " + Twine(BitmapOffset) + " is negative").str());
814
815 if (BitmapOffset >= BitmapEnd - BitmapStart)
816 return error(instrprof_error::malformed,
817 ("bitmap offset " + Twine(BitmapOffset) +
818 " is greater than the maximum bitmap offset " +
819 Twine(BitmapEnd - BitmapStart - 1))
820 .str());
821
822 uint64_t MaxNumBitmapBytes =
823 (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t);
824 if (NumBitmapBytes > MaxNumBitmapBytes)
825 return error(instrprof_error::malformed,
826 ("number of bitmap bytes " + Twine(NumBitmapBytes) +
827 " is greater than the maximum number of bitmap bytes " +
828 Twine(MaxNumBitmapBytes))
829 .str());
830
831 for (uint32_t I = 0; I < NumBitmapBytes; I++) {
832 const char *Ptr = BitmapStart + BitmapOffset + I;
833 Record.BitmapBytes.push_back(swap(*Ptr));
834 }
835
836 return success();
837}
838
839template <class IntPtrT>
840Error RawInstrProfReader<IntPtrT>::readRawUniformCounters(
841 InstrProfRecord &Record) {
842 Record.UniformCounts.clear();
843
844 if (UniformCountersStart == UniformCountersEnd)
845 return success();
846
847 uint32_t NumCounters = swap(Data->NumCounters);
848
849 ptrdiff_t UniformCounterOffset =
850 swap(Data->UniformCounterPtr) - UniformCountersDelta;
851 if (UniformCounterOffset < 0)
852 return error(instrprof_error::malformed,
853 ("uniform counter offset " + Twine(UniformCounterOffset) +
854 " is negative")
855 .str());
856
857 if (UniformCounterOffset >= UniformCountersEnd - UniformCountersStart)
858 return error(instrprof_error::malformed,
859 ("uniform counter offset " + Twine(UniformCounterOffset) +
860 " is greater than the maximum uniform counter offset " +
861 Twine(UniformCountersEnd - UniformCountersStart - 1))
862 .str());
863
864 uint64_t MaxNumCounters =
865 (UniformCountersEnd - (UniformCountersStart + UniformCounterOffset)) /
866 sizeof(uint64_t);
867 if (NumCounters > MaxNumCounters)
868 return error(instrprof_error::malformed,
869 ("number of uniform counters " + Twine(NumCounters) +
870 " is greater than the maximum number of uniform counters " +
871 Twine(MaxNumCounters))
872 .str());
873
874 Record.UniformCounts.reserve(n: NumCounters);
875 for (uint32_t I = 0; I < NumCounters; I++) {
876 const char *Ptr =
877 UniformCountersStart + UniformCounterOffset + I * sizeof(uint64_t);
878 uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
879 Record.UniformCounts.push_back(x: CounterValue);
880 }
881
882 return success();
883}
884
885template <class IntPtrT>
886Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
887 InstrProfRecord &Record) {
888 Record.clearValueData();
889 CurValueDataSize = 0;
890 // Need to match the logic in value profile dumper code in compiler-rt:
891 uint32_t NumValueKinds = 0;
892 for (uint32_t I = 0; I < IPVK_Last + 1; I++)
893 NumValueKinds += (Data->NumValueSites[I] != 0);
894
895 if (!NumValueKinds)
896 return success();
897
898 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
899 ValueProfData::getValueProfData(
900 SrcBuffer: ValueDataStart, SrcBufferEnd: (const unsigned char *)DataBuffer->getBufferEnd(),
901 SrcDataEndianness: getDataEndianness());
902
903 if (Error E = VDataPtrOrErr.takeError())
904 return E;
905
906 // Note that besides deserialization, this also performs the conversion for
907 // indirect call targets. The function pointers from the raw profile are
908 // remapped into function name hashes.
909 VDataPtrOrErr.get()->deserializeTo(Record, SymTab: Symtab.get());
910 CurValueDataSize = VDataPtrOrErr.get()->getSize();
911 return success();
912}
913
914template <class IntPtrT>
915Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
916 // Keep reading profiles that consist of only headers and no profile data and
917 // counters.
918 while (atEnd())
919 // At this point, ValueDataStart field points to the next header.
920 if (Error E = readNextHeader(CurrentPos: getNextHeaderPos()))
921 return error(std::move(E));
922
923 // Read name and set it in Record.
924 if (Error E = readName(Record))
925 return error(std::move(E));
926
927 // Read FuncHash and set it in Record.
928 if (Error E = readFuncHash(Record))
929 return error(std::move(E));
930
931 Record.OffloadDeviceWaveSize = swap(Data->OffloadDeviceWaveSize);
932
933 // Read raw counts and set Record.
934 if (Error E = readRawCounts(Record))
935 return error(std::move(E));
936
937 // Read raw bitmap bytes and set Record.
938 if (Error E = readRawBitmapBytes(Record))
939 return error(std::move(E));
940
941 // Read raw uniform counters and set Record.
942 if (Error E = readRawUniformCounters(Record))
943 return error(std::move(E));
944
945 // Read value data and set Record.
946 if (Error E = readValueProfilingData(Record))
947 return error(std::move(E));
948
949 // Iterate.
950 advanceData();
951 return success();
952}
953
954template <class IntPtrT>
955Error RawInstrProfReader<IntPtrT>::readBinaryIds(
956 std::vector<llvm::object::BuildID> &BinaryIds) {
957 BinaryIds.insert(position: BinaryIds.begin(), first: this->BinaryIds.begin(),
958 last: this->BinaryIds.end());
959 return Error::success();
960}
961
962template <class IntPtrT>
963Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
964 if (!BinaryIds.empty())
965 printBinaryIdsInternal(OS, BinaryIds);
966 return Error::success();
967}
968
969namespace llvm {
970
971template class RawInstrProfReader<uint32_t>;
972template class RawInstrProfReader<uint64_t>;
973
974} // end namespace llvm
975
976InstrProfLookupTrait::hash_value_type
977InstrProfLookupTrait::ComputeHash(StringRef K) {
978 return IndexedInstrProf::ComputeHash(Type: HashType, K);
979}
980
981using data_type = InstrProfLookupTrait::data_type;
982using offset_type = InstrProfLookupTrait::offset_type;
983
984bool InstrProfLookupTrait::readValueProfilingData(
985 const unsigned char *&D, const unsigned char *const End) {
986 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
987 ValueProfData::getValueProfData(SrcBuffer: D, SrcBufferEnd: End, SrcDataEndianness: ValueProfDataEndianness);
988
989 if (VDataPtrOrErr.takeError())
990 return false;
991
992 VDataPtrOrErr.get()->deserializeTo(Record&: DataBuffer.back(), SymTab: nullptr);
993 D += VDataPtrOrErr.get()->TotalSize;
994
995 return true;
996}
997
998data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
999 offset_type N) {
1000 using namespace support;
1001
1002 // Check if the data is corrupt. If so, don't try to read it.
1003 if (N % sizeof(uint64_t))
1004 return data_type();
1005
1006 DataBuffer.clear();
1007 std::vector<uint64_t> CounterBuffer;
1008 std::vector<uint8_t> BitmapByteBuffer;
1009 std::vector<uint8_t> UniformityBitsBuffer;
1010
1011 const unsigned char *End = D + N;
1012 while (D < End) {
1013 // Read hash.
1014 if (D + sizeof(uint64_t) > End)
1015 return data_type();
1016 uint64_t Hash = endian::readNext<uint64_t, llvm::endianness::little>(memory&: D);
1017
1018 // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
1019 uint64_t CountsSize = N / sizeof(uint64_t) - 1;
1020 // If format version is different then read the number of counters.
1021 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
1022 if (D + sizeof(uint64_t) > End)
1023 return data_type();
1024 CountsSize = endian::readNext<uint64_t, llvm::endianness::little>(memory&: D);
1025 }
1026 // Read counter values.
1027 if (D + CountsSize * sizeof(uint64_t) > End)
1028 return data_type();
1029
1030 CounterBuffer.clear();
1031 CounterBuffer.reserve(n: CountsSize);
1032 for (uint64_t J = 0; J < CountsSize; ++J)
1033 CounterBuffer.push_back(
1034 x: endian::readNext<uint64_t, llvm::endianness::little>(memory&: D));
1035
1036 // Read bitmap bytes for GET_VERSION(FormatVersion) > 10.
1037 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) {
1038 uint64_t BitmapBytes = 0;
1039 if (D + sizeof(uint64_t) > End)
1040 return data_type();
1041 BitmapBytes = endian::readNext<uint64_t, llvm::endianness::little>(memory&: D);
1042 BitmapByteBuffer.clear();
1043 BitmapByteBuffer.reserve(n: BitmapBytes);
1044
1045 if (GET_VERSION(FormatVersion) >=
1046 IndexedInstrProf::ProfVersion::Version14) {
1047 // Version 14+: bitmap bytes stored as uint8_t with padding.
1048 uint64_t PaddedSize = alignTo(Value: BitmapBytes, Align: sizeof(uint64_t));
1049 if (D + PaddedSize > End)
1050 return data_type();
1051 for (uint64_t J = 0; J < BitmapBytes; ++J)
1052 BitmapByteBuffer.push_back(
1053 x: endian::readNext<uint8_t, llvm::endianness::little>(memory&: D));
1054 for (uint64_t J = BitmapBytes; J < PaddedSize; ++J)
1055 (void)endian::readNext<uint8_t, llvm::endianness::little>(memory&: D);
1056
1057 // Read uniformity bits (AMDGPU offload profiling).
1058 uint64_t UniformityBitsSize = 0;
1059 if (D + sizeof(uint64_t) > End)
1060 return data_type();
1061 UniformityBitsSize =
1062 endian::readNext<uint64_t, llvm::endianness::little>(memory&: D);
1063 uint64_t PaddedUniformitySize =
1064 alignTo(Value: UniformityBitsSize, Align: sizeof(uint64_t));
1065 if (D + PaddedUniformitySize > End)
1066 return data_type();
1067 UniformityBitsBuffer.clear();
1068 UniformityBitsBuffer.reserve(n: UniformityBitsSize);
1069 for (uint64_t J = 0; J < UniformityBitsSize; ++J)
1070 UniformityBitsBuffer.push_back(
1071 x: endian::readNext<uint8_t, llvm::endianness::little>(memory&: D));
1072 for (uint64_t J = UniformityBitsSize; J < PaddedUniformitySize; ++J)
1073 (void)endian::readNext<uint8_t, llvm::endianness::little>(memory&: D);
1074 } else {
1075 // Version 11-13: each bitmap byte stored as a uint64_t.
1076 if (D + BitmapBytes * sizeof(uint64_t) > End)
1077 return data_type();
1078 for (uint64_t J = 0; J < BitmapBytes; ++J)
1079 BitmapByteBuffer.push_back(x: static_cast<uint8_t>(
1080 endian::readNext<uint64_t, llvm::endianness::little>(memory&: D)));
1081 }
1082 }
1083
1084 DataBuffer.emplace_back(args&: K, args&: Hash, args: std::move(CounterBuffer),
1085 args: std::move(BitmapByteBuffer),
1086 args: std::move(UniformityBitsBuffer));
1087
1088 // Read value profiling data.
1089 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
1090 !readValueProfilingData(D, End)) {
1091 DataBuffer.clear();
1092 return data_type();
1093 }
1094 }
1095 return DataBuffer;
1096}
1097
1098template <typename HashTableImpl>
1099Error InstrProfReaderIndex<HashTableImpl>::getRecords(
1100 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
1101 auto Iter = HashTable->find(FuncName);
1102 if (Iter == HashTable->end())
1103 return make_error<InstrProfError>(Args: instrprof_error::unknown_function);
1104
1105 Data = (*Iter);
1106 if (Data.empty())
1107 return make_error<InstrProfError>(Args: instrprof_error::malformed,
1108 Args: "profile data is empty");
1109
1110 return Error::success();
1111}
1112
1113template <typename HashTableImpl>
1114Error InstrProfReaderIndex<HashTableImpl>::getRecords(
1115 ArrayRef<NamedInstrProfRecord> &Data) {
1116 if (atEnd())
1117 return make_error<InstrProfError>(Args: instrprof_error::eof);
1118
1119 Data = *RecordIterator;
1120
1121 if (Data.empty())
1122 return make_error<InstrProfError>(Args: instrprof_error::malformed,
1123 Args: "profile data is empty");
1124
1125 return Error::success();
1126}
1127
1128template <typename HashTableImpl>
1129InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
1130 const unsigned char *Buckets, const unsigned char *const Payload,
1131 const unsigned char *const Base, IndexedInstrProf::HashT HashType,
1132 uint64_t Version) {
1133 FormatVersion = Version;
1134 HashTable.reset(HashTableImpl::Create(
1135 Buckets, Payload, Base,
1136 typename HashTableImpl::InfoType(HashType, Version)));
1137 RecordIterator = HashTable->data_begin();
1138}
1139
1140template <typename HashTableImpl>
1141InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
1142 return getProfileKindFromVersion(Version: FormatVersion);
1143}
1144
1145namespace {
1146/// A remapper that does not apply any remappings.
1147class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
1148 InstrProfReaderIndexBase &Underlying;
1149
1150public:
1151 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
1152 : Underlying(Underlying) {}
1153
1154 Error getRecords(StringRef FuncName,
1155 ArrayRef<NamedInstrProfRecord> &Data) override {
1156 return Underlying.getRecords(FuncName, Data);
1157 }
1158};
1159} // namespace
1160
1161/// A remapper that applies remappings based on a symbol remapping file.
1162template <typename HashTableImpl>
1163class llvm::InstrProfReaderItaniumRemapper
1164 : public InstrProfReaderRemapper {
1165public:
1166 InstrProfReaderItaniumRemapper(
1167 std::unique_ptr<MemoryBuffer> RemapBuffer,
1168 InstrProfReaderIndex<HashTableImpl> &Underlying)
1169 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
1170 }
1171
1172 /// Extract the original function name from a PGO function name.
1173 static StringRef extractName(StringRef Name) {
1174 // We can have multiple pieces separated by kGlobalIdentifierDelimiter (
1175 // semicolon now and colon in older profiles); there can be pieces both
1176 // before and after the mangled name. Find the first part that starts with
1177 // '_Z'; we'll assume that's the mangled name we want.
1178 std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
1179 while (true) {
1180 Parts = Parts.second.split(Separator: GlobalIdentifierDelimiter);
1181 if (Parts.first.starts_with(Prefix: "_Z"))
1182 return Parts.first;
1183 if (Parts.second.empty())
1184 return Name;
1185 }
1186 }
1187
1188 /// Given a mangled name extracted from a PGO function name, and a new
1189 /// form for that mangled name, reconstitute the name.
1190 static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
1191 StringRef Replacement,
1192 SmallVectorImpl<char> &Out) {
1193 Out.reserve(N: OrigName.size() + Replacement.size() - ExtractedName.size());
1194 Out.insert(I: Out.end(), From: OrigName.begin(), To: ExtractedName.begin());
1195 llvm::append_range(C&: Out, R&: Replacement);
1196 Out.insert(I: Out.end(), From: ExtractedName.end(), To: OrigName.end());
1197 }
1198
1199 Error populateRemappings() override {
1200 if (Error E = Remappings.read(B&: *RemapBuffer))
1201 return E;
1202 for (StringRef Name : Underlying.HashTable->keys()) {
1203 StringRef RealName = extractName(Name);
1204 if (auto Key = Remappings.insert(FunctionName: RealName)) {
1205 // FIXME: We could theoretically map the same equivalence class to
1206 // multiple names in the profile data. If that happens, we should
1207 // return NamedInstrProfRecords from all of them.
1208 MappedNames.insert(KV: {Key, RealName});
1209 }
1210 }
1211 return Error::success();
1212 }
1213
1214 Error getRecords(StringRef FuncName,
1215 ArrayRef<NamedInstrProfRecord> &Data) override {
1216 StringRef RealName = extractName(Name: FuncName);
1217 if (auto Key = Remappings.lookup(FunctionName: RealName)) {
1218 StringRef Remapped = MappedNames.lookup(Val: Key);
1219 if (!Remapped.empty()) {
1220 if (RealName.begin() == FuncName.begin() &&
1221 RealName.end() == FuncName.end())
1222 FuncName = Remapped;
1223 else {
1224 // Try rebuilding the name from the given remapping.
1225 SmallString<256> Reconstituted;
1226 reconstituteName(OrigName: FuncName, ExtractedName: RealName, Replacement: Remapped, Out&: Reconstituted);
1227 Error E = Underlying.getRecords(Reconstituted, Data);
1228 if (!E)
1229 return E;
1230
1231 // If we failed because the name doesn't exist, fall back to asking
1232 // about the original name.
1233 if (Error Unhandled = handleErrors(
1234 std::move(E), [](std::unique_ptr<InstrProfError> Err) {
1235 return Err->get() == instrprof_error::unknown_function
1236 ? Error::success()
1237 : Error(std::move(Err));
1238 }))
1239 return Unhandled;
1240 }
1241 }
1242 }
1243 return Underlying.getRecords(FuncName, Data);
1244 }
1245
1246private:
1247 /// The memory buffer containing the remapping configuration. Remappings
1248 /// holds pointers into this buffer.
1249 std::unique_ptr<MemoryBuffer> RemapBuffer;
1250
1251 /// The mangling remapper.
1252 SymbolRemappingReader Remappings;
1253
1254 /// Mapping from mangled name keys to the name used for the key in the
1255 /// profile data.
1256 /// FIXME: Can we store a location within the on-disk hash table instead of
1257 /// redoing lookup?
1258 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
1259
1260 /// The real profile data reader.
1261 InstrProfReaderIndex<HashTableImpl> &Underlying;
1262};
1263
1264bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
1265 using namespace support;
1266
1267 if (DataBuffer.getBufferSize() < 8)
1268 return false;
1269 uint64_t Magic = endian::read<uint64_t, aligned>(memory: DataBuffer.getBufferStart(),
1270 endian: llvm::endianness::little);
1271 // Verify that it's magical.
1272 return Magic == IndexedInstrProf::Magic;
1273}
1274
1275const unsigned char *
1276IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
1277 const unsigned char *Cur, bool UseCS) {
1278 using namespace IndexedInstrProf;
1279 using namespace support;
1280
1281 if (Version >= IndexedInstrProf::Version4) {
1282 const IndexedInstrProf::Summary *SummaryInLE =
1283 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
1284 uint64_t NFields = endian::byte_swap<uint64_t>(
1285 value: SummaryInLE->NumSummaryFields, endian: llvm::endianness::little);
1286 uint64_t NEntries = endian::byte_swap<uint64_t>(
1287 value: SummaryInLE->NumCutoffEntries, endian: llvm::endianness::little);
1288 uint32_t SummarySize =
1289 IndexedInstrProf::Summary::getSize(NumSumFields: NFields, NumCutoffEntries: NEntries);
1290 std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
1291 IndexedInstrProf::allocSummary(TotalSize: SummarySize);
1292
1293 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
1294 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
1295 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
1296 Dst[I] = endian::byte_swap<uint64_t>(value: Src[I], endian: llvm::endianness::little);
1297
1298 SummaryEntryVector DetailedSummary;
1299 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
1300 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
1301 DetailedSummary.emplace_back(args: (uint32_t)Ent.Cutoff, args: Ent.MinBlockCount,
1302 args: Ent.NumBlocks);
1303 }
1304 std::unique_ptr<llvm::ProfileSummary> &Summary =
1305 UseCS ? this->CS_Summary : this->Summary;
1306
1307 // initialize InstrProfSummary using the SummaryData from disk.
1308 Summary = std::make_unique<ProfileSummary>(
1309 args: UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
1310 args&: DetailedSummary, args: SummaryData->get(K: Summary::TotalBlockCount),
1311 args: SummaryData->get(K: Summary::MaxBlockCount),
1312 args: SummaryData->get(K: Summary::MaxInternalBlockCount),
1313 args: SummaryData->get(K: Summary::MaxFunctionCount),
1314 args: SummaryData->get(K: Summary::TotalNumBlocks),
1315 args: SummaryData->get(K: Summary::TotalNumFunctions));
1316 return Cur + SummarySize;
1317 } else {
1318 // The older versions do not support a profile summary. This just computes
1319 // an empty summary, which will not result in accurate hot/cold detection.
1320 // We would need to call addRecord for all NamedInstrProfRecords to get the
1321 // correct summary. However, this version is old (prior to early 2016) and
1322 // has not been supporting an accurate summary for several years.
1323 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1324 Summary = Builder.getSummary();
1325 return Cur;
1326 }
1327}
1328
1329Error IndexedInstrProfReader::readHeader() {
1330 using namespace support;
1331
1332 const unsigned char *Start =
1333 (const unsigned char *)DataBuffer->getBufferStart();
1334 const unsigned char *Cur = Start;
1335 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
1336 return error(Err: instrprof_error::truncated);
1337
1338 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Buffer: Start);
1339 if (!HeaderOr)
1340 return HeaderOr.takeError();
1341
1342 const IndexedInstrProf::Header *Header = &HeaderOr.get();
1343 Cur += Header->size();
1344
1345 Cur = readSummary(Version: (IndexedInstrProf::ProfVersion)Header->Version, Cur,
1346 /* UseCS */ false);
1347 if (Header->Version & VARIANT_MASK_CSIR_PROF)
1348 Cur = readSummary(Version: (IndexedInstrProf::ProfVersion)Header->Version, Cur,
1349 /* UseCS */ true);
1350 // Read the hash type and start offset.
1351 IndexedInstrProf::HashT HashType =
1352 static_cast<IndexedInstrProf::HashT>(Header->HashType);
1353 if (HashType > IndexedInstrProf::HashT::Last)
1354 return error(Err: instrprof_error::unsupported_hash_type);
1355
1356 // The hash table with profile counts comes next.
1357 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
1358 args: Start + Header->HashOffset, args&: Cur, args&: Start, args&: HashType, args: Header->Version);
1359
1360 // The MemProfOffset field in the header is only valid when the format
1361 // version is higher than 8 (when it was introduced).
1362 if (Header->getIndexedProfileVersion() >= 8 &&
1363 Header->Version & VARIANT_MASK_MEMPROF) {
1364 if (Error E = MemProfReader.deserialize(Start, MemProfOffset: Header->MemProfOffset))
1365 return E;
1366 }
1367
1368 // BinaryIdOffset field in the header is only valid when the format version
1369 // is higher than 9 (when it was introduced).
1370 if (Header->getIndexedProfileVersion() >= 9) {
1371 const unsigned char *Ptr = Start + Header->BinaryIdOffset;
1372 // Read binary ids size.
1373 uint64_t BinaryIdsSize =
1374 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1375 if (BinaryIdsSize % sizeof(uint64_t))
1376 return error(Err: instrprof_error::bad_header);
1377 // Set the binary ids start.
1378 BinaryIdsBuffer = ArrayRef<uint8_t>(Ptr, BinaryIdsSize);
1379 if (Ptr > (const unsigned char *)DataBuffer->getBufferEnd())
1380 return make_error<InstrProfError>(Args: instrprof_error::malformed,
1381 Args: "corrupted binary ids");
1382 }
1383
1384 if (Header->getIndexedProfileVersion() >= 12) {
1385 const unsigned char *Ptr = Start + Header->VTableNamesOffset;
1386
1387 uint64_t CompressedVTableNamesLen =
1388 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1389
1390 // Writer first writes the length of compressed string, and then the actual
1391 // content.
1392 const char *VTableNamePtr = (const char *)Ptr;
1393 if (VTableNamePtr > DataBuffer->getBufferEnd())
1394 return make_error<InstrProfError>(Args: instrprof_error::truncated);
1395
1396 VTableName = StringRef(VTableNamePtr, CompressedVTableNamesLen);
1397 }
1398
1399 if (Header->getIndexedProfileVersion() >= 10 &&
1400 Header->Version & VARIANT_MASK_TEMPORAL_PROF) {
1401 const unsigned char *Ptr = Start + Header->TemporalProfTracesOffset;
1402 const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
1403 // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
1404 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1405 return error(Err: instrprof_error::truncated);
1406 const uint64_t NumTraces =
1407 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1408 TemporalProfTraceStreamSize =
1409 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1410 for (unsigned i = 0; i < NumTraces; i++) {
1411 // Expect at least two 64 bit fields: Weight and NumFunctions
1412 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1413 return error(Err: instrprof_error::truncated);
1414 TemporalProfTraceTy Trace;
1415 Trace.Weight =
1416 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1417 const uint64_t NumFunctions =
1418 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1419 // Expect at least NumFunctions 64 bit fields
1420 if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
1421 return error(Err: instrprof_error::truncated);
1422 for (unsigned j = 0; j < NumFunctions; j++) {
1423 const uint64_t NameRef =
1424 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
1425 Trace.FunctionNameRefs.push_back(x: NameRef);
1426 }
1427 TemporalProfTraces.push_back(Elt: std::move(Trace));
1428 }
1429 }
1430
1431 // Load the remapping table now if requested.
1432 if (RemappingBuffer) {
1433 Remapper =
1434 std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
1435 args: std::move(RemappingBuffer), args&: *IndexPtr);
1436 if (Error E = Remapper->populateRemappings())
1437 return E;
1438 } else {
1439 Remapper = std::make_unique<InstrProfReaderNullRemapper>(args&: *IndexPtr);
1440 }
1441 Index = std::move(IndexPtr);
1442
1443 return success();
1444}
1445
1446InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
1447 if (Symtab)
1448 return *Symtab;
1449
1450 auto NewSymtab = std::make_unique<InstrProfSymtab>();
1451
1452 if (Error E = NewSymtab->initVTableNamesFromCompressedStrings(CompressedVTableNames: VTableName)) {
1453 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
1454 consumeError(Err: error(Err: ErrCode, ErrMsg: Msg));
1455 }
1456
1457 // finalizeSymtab is called inside populateSymtab.
1458 if (Error E = Index->populateSymtab(*NewSymtab)) {
1459 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
1460 consumeError(Err: error(Err: ErrCode, ErrMsg: Msg));
1461 }
1462
1463 Symtab = std::move(NewSymtab);
1464 return *Symtab;
1465}
1466
1467Expected<NamedInstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
1468 StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
1469 uint64_t *MismatchedFuncSum) {
1470 ArrayRef<NamedInstrProfRecord> Data;
1471 uint64_t FuncSum = 0;
1472 auto Err = Remapper->getRecords(FuncName, Data);
1473 if (Err) {
1474 // If we don't find FuncName, try DeprecatedFuncName to handle profiles
1475 // built by older compilers.
1476 auto Err2 =
1477 handleErrors(E: std::move(Err), Hs: [&](const InstrProfError &IE) -> Error {
1478 if (IE.get() != instrprof_error::unknown_function)
1479 return make_error<InstrProfError>(Args: IE);
1480 if (auto Err = Remapper->getRecords(FuncName: DeprecatedFuncName, Data))
1481 return Err;
1482 return Error::success();
1483 });
1484 if (Err2)
1485 return std::move(Err2);
1486 }
1487 // Found it. Look for counters with the right hash.
1488
1489 // A flag to indicate if the records are from the same type
1490 // of profile (i.e cs vs nocs).
1491 bool CSBitMatch = false;
1492 auto getFuncSum = [](ArrayRef<uint64_t> Counts) {
1493 uint64_t ValueSum = 0;
1494 for (uint64_t CountValue : Counts) {
1495 if (CountValue == (uint64_t)-1)
1496 continue;
1497 // Handle overflow -- if that happens, return max.
1498 if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum)
1499 return std::numeric_limits<uint64_t>::max();
1500 ValueSum += CountValue;
1501 }
1502 return ValueSum;
1503 };
1504
1505 for (const NamedInstrProfRecord &I : Data) {
1506 // Check for a match and fill the vector if there is one.
1507 if (I.Hash == FuncHash)
1508 return std::move(I);
1509 if (NamedInstrProfRecord::hasCSFlagInHash(FuncHash: I.Hash) ==
1510 NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) {
1511 CSBitMatch = true;
1512 if (MismatchedFuncSum == nullptr)
1513 continue;
1514 FuncSum = std::max(a: FuncSum, b: getFuncSum(I.Counts));
1515 }
1516 }
1517 if (CSBitMatch) {
1518 if (MismatchedFuncSum != nullptr)
1519 *MismatchedFuncSum = FuncSum;
1520 return error(Err: instrprof_error::hash_mismatch);
1521 }
1522 return error(Err: instrprof_error::unknown_function);
1523}
1524
1525static Expected<memprof::MemProfRecord>
1526getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
1527 MemProfFrameHashTable &MemProfFrameTable,
1528 MemProfCallStackHashTable &MemProfCallStackTable) {
1529 memprof::FrameIdConverter<MemProfFrameHashTable> FrameIdConv(
1530 MemProfFrameTable);
1531
1532 memprof::CallStackIdConverter<MemProfCallStackHashTable> CSIdConv(
1533 MemProfCallStackTable, FrameIdConv);
1534
1535 memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback: CSIdConv);
1536
1537 // Check that all call stack ids were successfully converted to call stacks.
1538 if (CSIdConv.LastUnmappedId) {
1539 return make_error<InstrProfError>(
1540 Args: instrprof_error::hash_mismatch,
1541 Args: "memprof call stack not found for call stack id " +
1542 Twine(*CSIdConv.LastUnmappedId));
1543 }
1544
1545 // Check that all frame ids were successfully converted to frames.
1546 if (FrameIdConv.LastUnmappedId) {
1547 return make_error<InstrProfError>(Args: instrprof_error::hash_mismatch,
1548 Args: "memprof frame not found for frame id " +
1549 Twine(*FrameIdConv.LastUnmappedId));
1550 }
1551
1552 return Record;
1553}
1554
1555Expected<memprof::MemProfRecord>
1556IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
1557 // TODO: Add memprof specific errors.
1558 if (MemProfRecordTable == nullptr)
1559 return make_error<InstrProfError>(Args: instrprof_error::invalid_prof,
1560 Args: "no memprof data available in profile");
1561 auto Iter = MemProfRecordTable->find(EKey: FuncNameHash);
1562 if (Iter == MemProfRecordTable->end())
1563 return make_error<InstrProfError>(
1564 Args: instrprof_error::unknown_function,
1565 Args: "memprof record not found for function hash " + Twine(FuncNameHash));
1566
1567 const memprof::IndexedMemProfRecord &IndexedRecord = *Iter;
1568 switch (Version) {
1569 case memprof::Version2:
1570 assert(MemProfFrameTable && "MemProfFrameTable must be available");
1571 assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
1572 return getMemProfRecordV2(IndexedRecord, MemProfFrameTable&: *MemProfFrameTable,
1573 MemProfCallStackTable&: *MemProfCallStackTable);
1574 // Combine V3 and V4 cases as the record conversion logic is the same.
1575 case memprof::Version3:
1576 case memprof::Version4:
1577 assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
1578 assert(!MemProfCallStackTable &&
1579 "MemProfCallStackTable must not be available");
1580 assert(FrameBase && "FrameBase must be available");
1581 assert(CallStackBase && "CallStackBase must be available");
1582 {
1583 memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
1584 memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
1585 memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback: CSIdConv);
1586 return Record;
1587 }
1588 }
1589
1590 return make_error<InstrProfError>(
1591 Args: instrprof_error::unsupported_version,
1592 Args: formatv(Fmt: "MemProf version {} not supported; "
1593 "requires version between {} and {}, inclusive",
1594 Vals: Version, Vals: memprof::MinimumSupportedVersion,
1595 Vals: memprof::MaximumSupportedVersion));
1596}
1597
1598DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
1599IndexedMemProfReader::getMemProfCallerCalleePairs() const {
1600 assert(MemProfRecordTable);
1601 assert(Version == memprof::Version3 || Version == memprof::Version4);
1602
1603 memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
1604 memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv,
1605 RadixTreeSize);
1606
1607 // The set of linear call stack IDs that we need to traverse from. We expect
1608 // the set to be dense, so we use a BitVector.
1609 BitVector Worklist(RadixTreeSize);
1610
1611 // Collect the set of linear call stack IDs. Since we expect a lot of
1612 // duplicates, we first collect them in the form of a bit vector before
1613 // processing them.
1614 for (const memprof::IndexedMemProfRecord &IndexedRecord :
1615 MemProfRecordTable->data()) {
1616 for (const memprof::IndexedAllocationInfo &IndexedAI :
1617 IndexedRecord.AllocSites)
1618 Worklist.set(IndexedAI.CSId);
1619 }
1620
1621 // Collect caller-callee pairs for each linear call stack ID in Worklist.
1622 for (unsigned CS : Worklist.set_bits())
1623 Extractor(CS);
1624
1625 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> Pairs =
1626 std::move(Extractor.CallerCalleePairs);
1627
1628 // Sort each call list by the source location.
1629 for (auto &[CallerGUID, CallList] : Pairs) {
1630 llvm::sort(C&: CallList);
1631 CallList.erase(CS: llvm::unique(R&: CallList), CE: CallList.end());
1632 }
1633
1634 return Pairs;
1635}
1636
1637memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const {
1638 memprof::AllMemProfData AllMemProfData;
1639 AllMemProfData.HeapProfileRecords.reserve(
1640 n: MemProfRecordTable->getNumEntries());
1641 for (uint64_t Key : MemProfRecordTable->keys()) {
1642 auto Record = getMemProfRecord(FuncNameHash: Key);
1643 if (Record.takeError())
1644 continue;
1645 memprof::GUIDMemProfRecordPair Pair;
1646 Pair.GUID = Key;
1647 Pair.Record = std::move(*Record);
1648 AllMemProfData.HeapProfileRecords.push_back(x: std::move(Pair));
1649 }
1650 // Populate the data access profiles for yaml output.
1651 if (DataAccessProfileData != nullptr) {
1652 AllMemProfData.YamlifiedDataAccessProfiles.Records.reserve(
1653 n: DataAccessProfileData->getRecords().size());
1654 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.reserve(
1655 n: DataAccessProfileData->getKnownColdSymbols().size());
1656 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes.reserve(
1657 n: DataAccessProfileData->getKnownColdHashes().size());
1658 for (const auto &[SymHandleRef, RecordRef] :
1659 DataAccessProfileData->getRecords())
1660 AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
1661 x: memprof::DataAccessProfRecord(SymHandleRef, RecordRef.AccessCount,
1662 RecordRef.Locations));
1663 for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
1664 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
1665 x: ColdSymbol.str());
1666 for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
1667 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes.push_back(
1668 x: Hash);
1669 llvm::stable_sort(Range&: AllMemProfData.YamlifiedDataAccessProfiles.Records,
1670 C: [](const llvm::memprof::DataAccessProfRecord &lhs,
1671 const llvm::memprof::DataAccessProfRecord &rhs) {
1672 return lhs.AccessCount > rhs.AccessCount;
1673 });
1674 llvm::stable_sort(
1675 Range&: AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols,
1676 C: [](const std::string &lhs, const std::string &rhs) {
1677 return lhs < rhs;
1678 });
1679 llvm::stable_sort(
1680 Range&: AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes,
1681 C: [](const uint64_t &lhs, const uint64_t &rhs) { return lhs < rhs; });
1682 }
1683 return AllMemProfData;
1684}
1685
1686Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1687 uint64_t FuncHash,
1688 std::vector<uint64_t> &Counts) {
1689 auto Record = getInstrProfRecord(FuncName, FuncHash);
1690 if (Error E = Record.takeError())
1691 return error(E: std::move(E));
1692
1693 Counts = Record.get().Counts;
1694 return success();
1695}
1696
1697Error IndexedInstrProfReader::getFunctionBitmap(StringRef FuncName,
1698 uint64_t FuncHash,
1699 BitVector &Bitmap) {
1700 auto Record = getInstrProfRecord(FuncName, FuncHash);
1701 if (Error E = Record.takeError())
1702 return error(E: std::move(E));
1703
1704 const auto &BitmapBytes = Record.get().BitmapBytes;
1705 size_t I = 0, E = BitmapBytes.size();
1706 Bitmap.resize(N: E * CHAR_BIT);
1707 BitVector::apply(
1708 f: [&](auto X) {
1709 using XTy = decltype(X);
1710 alignas(XTy) uint8_t W[sizeof(X)];
1711 size_t N = std::min(a: E - I, b: sizeof(W));
1712 std::memset(s: W, c: 0, n: sizeof(W));
1713 std::memcpy(dest: W, src: &BitmapBytes[I], n: N);
1714 I += N;
1715 return support::endian::read<XTy, support::aligned>(
1716 W, llvm::endianness::little);
1717 },
1718 Out&: Bitmap, Arg: Bitmap);
1719 assert(I == E);
1720
1721 return success();
1722}
1723
1724Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1725 ArrayRef<NamedInstrProfRecord> Data;
1726
1727 Error E = Index->getRecords(Data);
1728 if (E)
1729 return error(E: std::move(E));
1730
1731 Record = Data[RecordIndex++];
1732 if (RecordIndex >= Data.size()) {
1733 Index->advanceToNextKey();
1734 RecordIndex = 0;
1735 }
1736 return success();
1737}
1738
1739Error IndexedInstrProfReader::readBinaryIds(
1740 std::vector<llvm::object::BuildID> &BinaryIds) {
1741 return readBinaryIdsInternal(DataBuffer: *DataBuffer, BinaryIdsBuffer, BinaryIds,
1742 Endian: llvm::endianness::little);
1743}
1744
1745Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
1746 std::vector<llvm::object::BuildID> BinaryIds;
1747 if (Error E = readBinaryIds(BinaryIds))
1748 return E;
1749 printBinaryIdsInternal(OS, BinaryIds);
1750 return Error::success();
1751}
1752
1753void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1754 uint64_t NumFuncs = 0;
1755 for (const auto &Func : *this) {
1756 if (isIRLevelProfile()) {
1757 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
1758 if (FuncIsCS != IsCS)
1759 continue;
1760 }
1761 Func.accumulateCounts(Sum);
1762 ++NumFuncs;
1763 }
1764 Sum.NumEntries = NumFuncs;
1765}
1766