1//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for instrumentation
10// based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15#define LLVM_PROFILEDATA_INSTRPROFREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/Object/BuildID.h"
21#include "llvm/ProfileData/DataAccessProf.h"
22#include "llvm/ProfileData/InstrProf.h"
23#include "llvm/ProfileData/InstrProfCorrelator.h"
24#include "llvm/ProfileData/MemProf.h"
25#include "llvm/ProfileData/MemProfSummary.h"
26#include "llvm/ProfileData/MemProfYAML.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/Support/Endian.h"
29#include "llvm/Support/Error.h"
30#include "llvm/Support/LineIterator.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/OnDiskHashTable.h"
34#include "llvm/Support/SwapByteOrder.h"
35#include <algorithm>
36#include <cassert>
37#include <cstddef>
38#include <cstdint>
39#include <iterator>
40#include <memory>
41#include <utility>
42#include <vector>
43
44namespace llvm {
45
46class InstrProfReader;
47
48namespace vfs {
49class FileSystem;
50} // namespace vfs
51
52/// A file format agnostic iterator over profiling data.
53template <class record_type = NamedInstrProfRecord,
54 class reader_type = InstrProfReader>
55class InstrProfIterator {
56public:
57 using iterator_category = std::input_iterator_tag;
58 using value_type = record_type;
59 using difference_type = std::ptrdiff_t;
60 using pointer = value_type *;
61 using reference = value_type &;
62
63private:
64 reader_type *Reader = nullptr;
65 value_type Record;
66
67 void increment() {
68 if (Error E = Reader->readNextRecord(Record)) {
69 // Handle errors in the reader.
70 InstrProfError::take(E: std::move(E));
71 *this = InstrProfIterator();
72 }
73 }
74
75public:
76 InstrProfIterator() = default;
77 InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); }
78
79 InstrProfIterator &operator++() {
80 increment();
81 return *this;
82 }
83 bool operator==(const InstrProfIterator &RHS) const {
84 return Reader == RHS.Reader;
85 }
86 bool operator!=(const InstrProfIterator &RHS) const {
87 return Reader != RHS.Reader;
88 }
89 value_type &operator*() { return Record; }
90 value_type *operator->() { return &Record; }
91};
92
93/// Base class and interface for reading profiling data of any known instrprof
94/// format. Provides an iterator over NamedInstrProfRecords.
95class InstrProfReader {
96 instrprof_error LastError = instrprof_error::success;
97 std::string LastErrorMsg;
98
99public:
100 InstrProfReader() = default;
101 virtual ~InstrProfReader() = default;
102
103 /// Read the header. Required before reading first record.
104 virtual Error readHeader() = 0;
105
106 /// Read a single record.
107 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
108
109 /// Read a list of binary ids.
110 virtual Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) {
111 return success();
112 }
113
114 /// Print binary ids.
115 virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
116
117 /// Iterator over profile data.
118 InstrProfIterator<> begin() { return InstrProfIterator<>(this); }
119 InstrProfIterator<> end() { return InstrProfIterator<>(); }
120
121 /// Return the profile version.
122 virtual uint64_t getVersion() const = 0;
123
124 virtual bool isIRLevelProfile() const = 0;
125
126 virtual bool hasCSIRLevelProfile() const = 0;
127
128 virtual bool instrEntryBBEnabled() const = 0;
129
130 /// Return true if the profile instruments all loop entries.
131 virtual bool instrLoopEntriesEnabled() const = 0;
132
133 /// Return true if the profile has single byte counters representing coverage.
134 virtual bool hasSingleByteCoverage() const = 0;
135
136 /// Return true if the profile only instruments function entries.
137 virtual bool functionEntryOnly() const = 0;
138
139 /// Return true if profile includes a memory profile.
140 virtual bool hasMemoryProfile() const = 0;
141
142 /// Return true if this has a temporal profile.
143 virtual bool hasTemporalProfile() const = 0;
144
145 /// Returns a BitsetEnum describing the attributes of the profile. To check
146 /// individual attributes prefer using the helpers above.
147 virtual InstrProfKind getProfileKind() const = 0;
148
149 /// Return the PGO symtab. There are three different readers:
150 /// Raw, Text, and Indexed profile readers. The first two types
151 /// of readers are used only by llvm-profdata tool, while the indexed
152 /// profile reader is also used by llvm-cov tool and the compiler (
153 /// backend or frontend). Since creating PGO symtab can create
154 /// significant runtime and memory overhead (as it touches data
155 /// for the whole program), InstrProfSymtab for the indexed profile
156 /// reader should be created on demand and it is recommended to be
157 /// only used for dumping purpose with llvm-proftool, not with the
158 /// compiler.
159 virtual InstrProfSymtab &getSymtab() = 0;
160
161 /// Compute the sum of counts and return in Sum.
162 LLVM_ABI void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
163
164protected:
165 std::unique_ptr<InstrProfSymtab> Symtab;
166 /// A list of temporal profile traces.
167 SmallVector<TemporalProfTraceTy> TemporalProfTraces;
168 /// The total number of temporal profile traces seen.
169 uint64_t TemporalProfTraceStreamSize = 0;
170
171 /// Set the current error and return same.
172 Error error(instrprof_error Err, const std::string &ErrMsg = "") {
173 LastError = Err;
174 LastErrorMsg = ErrMsg;
175 if (Err == instrprof_error::success)
176 return Error::success();
177 return make_error<InstrProfError>(Args&: Err, Args: ErrMsg);
178 }
179
180 Error error(Error &&E) {
181 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
182 LastError = IPE.get();
183 LastErrorMsg = IPE.getMessage();
184 });
185 return make_error<InstrProfError>(Args&: LastError, Args&: LastErrorMsg);
186 }
187
188 /// Clear the current error and return a successful one.
189 Error success() { return error(Err: instrprof_error::success); }
190
191public:
192 /// Return true if the reader has finished reading the profile data.
193 bool isEOF() { return LastError == instrprof_error::eof; }
194
195 /// Return true if the reader encountered an error reading profiling data.
196 bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
197
198 /// Get the current error.
199 Error getError() {
200 if (hasError())
201 return make_error<InstrProfError>(Args&: LastError, Args&: LastErrorMsg);
202 return Error::success();
203 }
204
205 /// Factory method to create an appropriately typed reader for the given
206 /// instrprof file.
207 LLVM_ABI static Expected<std::unique_ptr<InstrProfReader>> create(
208 const Twine &Path, vfs::FileSystem &FS,
209 const InstrProfCorrelator *Correlator = nullptr,
210 const object::BuildIDFetcher *BIDFetcher = nullptr,
211 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
212 InstrProfCorrelator::ProfCorrelatorKind::NONE,
213 std::function<void(Error)> Warn = nullptr);
214
215 LLVM_ABI static Expected<std::unique_ptr<InstrProfReader>> create(
216 std::unique_ptr<MemoryBuffer> Buffer,
217 const InstrProfCorrelator *Correlator = nullptr,
218 const object::BuildIDFetcher *BIDFetcher = nullptr,
219 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
220 InstrProfCorrelator::ProfCorrelatorKind::NONE,
221 std::function<void(Error)> Warn = nullptr);
222
223 /// \param Weight for raw profiles use this as the temporal profile trace
224 /// weight
225 /// \returns a list of temporal profile traces.
226 virtual SmallVector<TemporalProfTraceTy> &
227 getTemporalProfTraces(std::optional<uint64_t> Weight = {}) {
228 // For non-raw profiles we ignore the input weight and instead use the
229 // weights already in the traces.
230 return TemporalProfTraces;
231 }
232 /// \returns the total number of temporal profile traces seen.
233 uint64_t getTemporalProfTraceStreamSize() {
234 return TemporalProfTraceStreamSize;
235 }
236};
237
238/// Reader for the simple text based instrprof format.
239///
240/// This format is a simple text format that's suitable for test data. Records
241/// are separated by one or more blank lines, and record fields are separated by
242/// new lines.
243///
244/// Each record consists of a function name, a function hash, a number of
245/// counters, and then each counter value, in that order.
246class LLVM_ABI TextInstrProfReader : public InstrProfReader {
247private:
248 /// The profile data file contents.
249 std::unique_ptr<MemoryBuffer> DataBuffer;
250 /// Iterator over the profile data.
251 line_iterator Line;
252 /// The attributes of the current profile.
253 InstrProfKind ProfileKind = InstrProfKind::Unknown;
254
255 Error readValueProfileData(InstrProfRecord &Record);
256
257 Error readTemporalProfTraceData();
258
259public:
260 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
261 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
262 TextInstrProfReader(const TextInstrProfReader &) = delete;
263 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
264
265 /// Return true if the given buffer is in text instrprof format.
266 static bool hasFormat(const MemoryBuffer &Buffer);
267
268 // Text format does not have version, so return 0.
269 uint64_t getVersion() const override { return 0; }
270
271 bool isIRLevelProfile() const override {
272 return static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation);
273 }
274
275 bool hasCSIRLevelProfile() const override {
276 return static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive);
277 }
278
279 bool instrEntryBBEnabled() const override {
280 return static_cast<bool>(ProfileKind &
281 InstrProfKind::FunctionEntryInstrumentation);
282 }
283
284 bool instrLoopEntriesEnabled() const override {
285 return static_cast<bool>(ProfileKind &
286 InstrProfKind::LoopEntriesInstrumentation);
287 }
288
289 bool hasSingleByteCoverage() const override {
290 return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
291 }
292
293 bool functionEntryOnly() const override {
294 return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
295 }
296
297 bool hasMemoryProfile() const override {
298 // TODO: Add support for text format memory profiles.
299 return false;
300 }
301
302 bool hasTemporalProfile() const override {
303 return static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile);
304 }
305
306 InstrProfKind getProfileKind() const override { return ProfileKind; }
307
308 /// Read the header.
309 Error readHeader() override;
310
311 /// Read a single record.
312 Error readNextRecord(NamedInstrProfRecord &Record) override;
313
314 InstrProfSymtab &getSymtab() override {
315 assert(Symtab);
316 return *Symtab;
317 }
318};
319
320/// Reader for the raw instrprof binary format from runtime.
321///
322/// This format is a raw memory dump of the instrumentation-based profiling data
323/// from the runtime. It has no index.
324///
325/// Templated on the unsigned type whose size matches pointers on the platform
326/// that wrote the profile.
327template <class IntPtrT>
328class RawInstrProfReader : public InstrProfReader {
329private:
330 /// The profile data file contents.
331 std::unique_ptr<MemoryBuffer> DataBuffer;
332 /// If available, this hold the ProfileData array used to correlate raw
333 /// instrumentation data to their functions.
334 const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
335 /// Fetches debuginfo by build id to correlate profiles.
336 const object::BuildIDFetcher *BIDFetcher;
337 /// Correlates profiles with build id fetcher by fetching debuginfo with build
338 /// ID.
339 std::unique_ptr<InstrProfCorrelator> BIDFetcherCorrelator;
340 /// Indicates if should use debuginfo or binary to correlate with build id
341 /// fetcher.
342 InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind;
343 /// A list of timestamps paired with a function name reference.
344 std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
345 bool ShouldSwapBytes;
346 // The value of the version field of the raw profile data header. The lower 32
347 // bits specifies the format version and the most significant 32 bits specify
348 // the variant types of the profile.
349 uint64_t Version;
350 uint64_t CountersDelta;
351 uint64_t BitmapDelta;
352 uint64_t NamesDelta;
353 const RawInstrProf::ProfileData<IntPtrT> *Data;
354 const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
355 const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
356 const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
357 const char *CountersStart;
358 const char *CountersEnd;
359 const char *BitmapStart;
360 const char *BitmapEnd;
361 const char *NamesStart;
362 const char *NamesEnd;
363 const char *VNamesStart = nullptr;
364 const char *VNamesEnd = nullptr;
365 // After value profile is all read, this pointer points to
366 // the header of next profile data (if exists)
367 const uint8_t *ValueDataStart;
368 uint32_t ValueKindLast;
369 uint32_t CurValueDataSize;
370 std::vector<llvm::object::BuildID> BinaryIds;
371
372 std::function<void(Error)> Warn;
373
374 /// Maxium counter value 2^56.
375 static const uint64_t MaxCounterValue = (1ULL << 56);
376
377public:
378 RawInstrProfReader(
379 std::unique_ptr<MemoryBuffer> DataBuffer,
380 const InstrProfCorrelator *Correlator,
381 const object::BuildIDFetcher *BIDFetcher,
382 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,
383 std::function<void(Error)> Warn)
384 : DataBuffer(std::move(DataBuffer)),
385 Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
386 Correlator)),
387 BIDFetcher(BIDFetcher),
388 BIDFetcherCorrelatorKind(BIDFetcherCorrelatorKind), Warn(Warn) {}
389
390 RawInstrProfReader(const RawInstrProfReader &) = delete;
391 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
392
393 static bool hasFormat(const MemoryBuffer &DataBuffer);
394 Error readHeader() override;
395 Error readNextRecord(NamedInstrProfRecord &Record) override;
396 Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
397 Error printBinaryIds(raw_ostream &OS) override;
398
399 uint64_t getVersion() const override { return Version; }
400
401 bool isIRLevelProfile() const override {
402 return (Version & VARIANT_MASK_IR_PROF) != 0;
403 }
404
405 bool hasCSIRLevelProfile() const override {
406 return (Version & VARIANT_MASK_CSIR_PROF) != 0;
407 }
408
409 bool instrEntryBBEnabled() const override {
410 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
411 }
412
413 bool instrLoopEntriesEnabled() const override {
414 return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
415 }
416
417 bool hasSingleByteCoverage() const override {
418 return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
419 }
420
421 bool functionEntryOnly() const override {
422 return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
423 }
424
425 bool hasMemoryProfile() const override {
426 // Memory profiles have a separate raw format, so this should never be set.
427 assert(!(Version & VARIANT_MASK_MEMPROF));
428 return false;
429 }
430
431 bool hasTemporalProfile() const override {
432 return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0;
433 }
434
435 /// Returns a BitsetEnum describing the attributes of the raw instr profile.
436 InstrProfKind getProfileKind() const override;
437
438 InstrProfSymtab &getSymtab() override {
439 assert(Symtab.get());
440 return *Symtab.get();
441 }
442
443 SmallVector<TemporalProfTraceTy> &
444 getTemporalProfTraces(std::optional<uint64_t> Weight = {}) override;
445
446private:
447 Error createSymtab(InstrProfSymtab &Symtab);
448 Error readNextHeader(const char *CurrentPos);
449 Error readHeader(const RawInstrProf::Header &Header);
450
451 template <class IntT> IntT swap(IntT Int) const {
452 return ShouldSwapBytes ? llvm::byteswap(Int) : Int;
453 }
454
455 llvm::endianness getDataEndianness() const {
456 if (!ShouldSwapBytes)
457 return llvm::endianness::native;
458 if (llvm::endianness::native == llvm::endianness::little)
459 return llvm::endianness::big;
460 else
461 return llvm::endianness::little;
462 }
463
464 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
465 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
466 }
467
468 Error readName(NamedInstrProfRecord &Record);
469 Error readFuncHash(NamedInstrProfRecord &Record);
470 Error readRawCounts(InstrProfRecord &Record);
471 Error readRawBitmapBytes(InstrProfRecord &Record);
472 Error readValueProfilingData(InstrProfRecord &Record);
473 bool atEnd() const { return Data == DataEnd; }
474
475 void advanceData() {
476 // `CountersDelta` is a constant zero when using debug info correlation.
477 if (!Correlator && !BIDFetcherCorrelator) {
478 // The initial CountersDelta is the in-memory address difference between
479 // the data and counts sections:
480 // start(__llvm_prf_cnts) - start(__llvm_prf_data)
481 // As we advance to the next record, we maintain the correct CountersDelta
482 // with respect to the next record.
483 CountersDelta -= sizeof(*Data);
484 BitmapDelta -= sizeof(*Data);
485 }
486 Data++;
487 ValueDataStart += CurValueDataSize;
488 }
489
490 const char *getNextHeaderPos() const {
491 assert(atEnd());
492 return (const char *)ValueDataStart;
493 }
494
495 StringRef getName(uint64_t NameRef) const {
496 return Symtab->getFuncOrVarName(MD5Hash: swap(NameRef));
497 }
498
499 int getCounterTypeSize() const {
500 return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
501 }
502};
503
504using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
505using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
506
507namespace IndexedInstrProf {
508
509enum class HashT : uint32_t;
510
511} // end namespace IndexedInstrProf
512
513/// Trait for lookups into the on-disk hash table for the binary instrprof
514/// format.
515class InstrProfLookupTrait {
516 std::vector<NamedInstrProfRecord> DataBuffer;
517 IndexedInstrProf::HashT HashType;
518 unsigned FormatVersion;
519 // Endianness of the input value profile data.
520 // It should be LE by default, but can be changed
521 // for testing purpose.
522 llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
523
524public:
525 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
526 : HashType(HashType), FormatVersion(FormatVersion) {}
527
528 using data_type = ArrayRef<NamedInstrProfRecord>;
529
530 using internal_key_type = StringRef;
531 using external_key_type = StringRef;
532 using hash_value_type = uint64_t;
533 using offset_type = uint64_t;
534
535 static bool EqualKey(StringRef A, StringRef B) { return A == B; }
536 static StringRef GetInternalKey(StringRef K) { return K; }
537 static StringRef GetExternalKey(StringRef K) { return K; }
538
539 LLVM_ABI hash_value_type ComputeHash(StringRef K);
540
541 static std::pair<offset_type, offset_type>
542 ReadKeyDataLength(const unsigned char *&D) {
543 using namespace support;
544
545 offset_type KeyLen =
546 endian::readNext<offset_type, llvm::endianness::little>(memory&: D);
547 offset_type DataLen =
548 endian::readNext<offset_type, llvm::endianness::little>(memory&: D);
549 return std::make_pair(x&: KeyLen, y&: DataLen);
550 }
551
552 StringRef ReadKey(const unsigned char *D, offset_type N) {
553 return StringRef((const char *)D, N);
554 }
555
556 LLVM_ABI bool readValueProfilingData(const unsigned char *&D,
557 const unsigned char *const End);
558 LLVM_ABI data_type ReadData(StringRef K, const unsigned char *D,
559 offset_type N);
560
561 // Used for testing purpose only.
562 void setValueProfDataEndianness(llvm::endianness Endianness) {
563 ValueProfDataEndianness = Endianness;
564 }
565};
566
567struct InstrProfReaderIndexBase {
568 virtual ~InstrProfReaderIndexBase() = default;
569
570 // Read all the profile records with the same key pointed to the current
571 // iterator.
572 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
573
574 // Read all the profile records with the key equal to FuncName
575 virtual Error getRecords(StringRef FuncName,
576 ArrayRef<NamedInstrProfRecord> &Data) = 0;
577 virtual void advanceToNextKey() = 0;
578 virtual bool atEnd() const = 0;
579 virtual void setValueProfDataEndianness(llvm::endianness Endianness) = 0;
580 virtual uint64_t getVersion() const = 0;
581 virtual bool isIRLevelProfile() const = 0;
582 virtual bool hasCSIRLevelProfile() const = 0;
583 virtual bool instrEntryBBEnabled() const = 0;
584 virtual bool instrLoopEntriesEnabled() const = 0;
585 virtual bool hasSingleByteCoverage() const = 0;
586 virtual bool functionEntryOnly() const = 0;
587 virtual bool hasMemoryProfile() const = 0;
588 virtual bool hasTemporalProfile() const = 0;
589 virtual InstrProfKind getProfileKind() const = 0;
590 virtual Error populateSymtab(InstrProfSymtab &) = 0;
591};
592
593using OnDiskHashTableImplV3 =
594 OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
595
596using MemProfRecordHashTable =
597 OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
598using MemProfFrameHashTable =
599 OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
600using MemProfCallStackHashTable =
601 OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
602
603template <typename HashTableImpl>
604class InstrProfReaderItaniumRemapper;
605
606template <typename HashTableImpl>
607class InstrProfReaderIndex : public InstrProfReaderIndexBase {
608private:
609 std::unique_ptr<HashTableImpl> HashTable;
610 typename HashTableImpl::data_iterator RecordIterator;
611 uint64_t FormatVersion;
612
613 friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
614
615public:
616 InstrProfReaderIndex(const unsigned char *Buckets,
617 const unsigned char *const Payload,
618 const unsigned char *const Base,
619 IndexedInstrProf::HashT HashType, uint64_t Version);
620 ~InstrProfReaderIndex() override = default;
621
622 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
623 Error getRecords(StringRef FuncName,
624 ArrayRef<NamedInstrProfRecord> &Data) override;
625 void advanceToNextKey() override { RecordIterator++; }
626
627 bool atEnd() const override {
628 return RecordIterator == HashTable->data_end();
629 }
630
631 void setValueProfDataEndianness(llvm::endianness Endianness) override {
632 HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
633 }
634
635 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
636
637 bool isIRLevelProfile() const override {
638 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
639 }
640
641 bool hasCSIRLevelProfile() const override {
642 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
643 }
644
645 bool instrEntryBBEnabled() const override {
646 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
647 }
648
649 bool instrLoopEntriesEnabled() const override {
650 return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
651 }
652
653 bool hasSingleByteCoverage() const override {
654 return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
655 }
656
657 bool functionEntryOnly() const override {
658 return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
659 }
660
661 bool hasMemoryProfile() const override {
662 return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
663 }
664
665 bool hasTemporalProfile() const override {
666 return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0;
667 }
668
669 InstrProfKind getProfileKind() const override;
670
671 Error populateSymtab(InstrProfSymtab &Symtab) override {
672 // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
673 // arrays/maps. Since there are other data sources other than 'HashTable' to
674 // populate a symtab, it might make sense to have something like this
675 // 1. Let each data source populate Symtab and init the arrays/maps without
676 // calling 'finalizeSymtab'
677 // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
678 return Symtab.create(HashTable->keys());
679 }
680};
681
682/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
683class InstrProfReaderRemapper {
684public:
685 virtual ~InstrProfReaderRemapper() = default;
686 virtual Error populateRemappings() { return Error::success(); }
687 virtual Error getRecords(StringRef FuncName,
688 ArrayRef<NamedInstrProfRecord> &Data) = 0;
689};
690
691class IndexedMemProfReader {
692private:
693 /// The MemProf version.
694 memprof::IndexedVersion Version =
695 static_cast<memprof::IndexedVersion>(memprof::MinimumSupportedVersion);
696 /// MemProf summary (if available, version >= 4).
697 std::unique_ptr<memprof::MemProfSummary> MemProfSum;
698 /// MemProf profile schema (if available).
699 memprof::MemProfSchema Schema;
700 /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
701 std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
702 /// MemProf frame profile data on-disk indexed via frame id.
703 std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
704 /// MemProf call stack data on-disk indexed via call stack id.
705 std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
706 /// The starting address of the frame array.
707 const unsigned char *FrameBase = nullptr;
708 /// The starting address of the call stack array.
709 const unsigned char *CallStackBase = nullptr;
710 // The number of elements in the radix tree array.
711 unsigned RadixTreeSize = 0;
712 /// The data access profiles, deserialized from binary data.
713 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
714
715 Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
716 Error deserializeRadixTreeBased(const unsigned char *Start,
717 const unsigned char *Ptr,
718 memprof::IndexedVersion Version);
719
720public:
721 IndexedMemProfReader() = default;
722
723 LLVM_ABI Error deserialize(const unsigned char *Start,
724 uint64_t MemProfOffset);
725
726 LLVM_ABI Expected<memprof::MemProfRecord>
727 getMemProfRecord(const uint64_t FuncNameHash) const;
728
729 LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
730 getMemProfCallerCalleePairs() const;
731
732 // Return the entire MemProf profile.
733 LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
734
735 memprof::MemProfSummary *getSummary() const { return MemProfSum.get(); }
736};
737
738/// Reader for the indexed binary instrprof format.
739class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
740private:
741 /// The profile data file contents.
742 std::unique_ptr<MemoryBuffer> DataBuffer;
743 /// The profile remapping file contents.
744 std::unique_ptr<MemoryBuffer> RemappingBuffer;
745 /// The index into the profile data.
746 std::unique_ptr<InstrProfReaderIndexBase> Index;
747 /// The profile remapping file contents.
748 std::unique_ptr<InstrProfReaderRemapper> Remapper;
749 /// Profile summary data.
750 std::unique_ptr<ProfileSummary> Summary;
751 /// Context sensitive profile summary data.
752 std::unique_ptr<ProfileSummary> CS_Summary;
753 IndexedMemProfReader MemProfReader;
754 /// The compressed vtable names, to be used for symtab construction.
755 /// A compiler that reads indexed profiles could construct symtab from module
756 /// IR so it doesn't need the decompressed names.
757 StringRef VTableName;
758 /// A memory buffer holding binary ids.
759 ArrayRef<uint8_t> BinaryIdsBuffer;
760
761 // Index to the current record in the record array.
762 unsigned RecordIndex = 0;
763
764 // Read the profile summary. Return a pointer pointing to one byte past the
765 // end of the summary data if it exists or the input \c Cur.
766 // \c UseCS indicates whether to use the context-sensitive profile summary.
767 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
768 const unsigned char *Cur, bool UseCS);
769
770public:
771 IndexedInstrProfReader(
772 std::unique_ptr<MemoryBuffer> DataBuffer,
773 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
774 : DataBuffer(std::move(DataBuffer)),
775 RemappingBuffer(std::move(RemappingBuffer)) {}
776 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
777 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
778
779 /// Return the profile version.
780 uint64_t getVersion() const override { return Index->getVersion(); }
781 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
782 bool hasCSIRLevelProfile() const override {
783 return Index->hasCSIRLevelProfile();
784 }
785
786 bool instrEntryBBEnabled() const override {
787 return Index->instrEntryBBEnabled();
788 }
789
790 bool instrLoopEntriesEnabled() const override {
791 return Index->instrLoopEntriesEnabled();
792 }
793
794 bool hasSingleByteCoverage() const override {
795 return Index->hasSingleByteCoverage();
796 }
797
798 bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
799
800 bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
801
802 bool hasTemporalProfile() const override {
803 return Index->hasTemporalProfile();
804 }
805
806 /// Returns a BitsetEnum describing the attributes of the indexed instr
807 /// profile.
808 InstrProfKind getProfileKind() const override {
809 return Index->getProfileKind();
810 }
811
812 /// Return true if the given buffer is in an indexed instrprof format.
813 static bool hasFormat(const MemoryBuffer &DataBuffer);
814
815 /// Read the file header.
816 Error readHeader() override;
817 /// Read a single record.
818 Error readNextRecord(NamedInstrProfRecord &Record) override;
819
820 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash.
821 /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
822 /// the sum of all counters in the mismatched function will be set to
823 /// MismatchedFuncSum. If there are multiple instances of mismatched
824 /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
825 /// found, try to lookup \c DeprecatedFuncName to handle profiles built by
826 /// older compilers.
827 Expected<NamedInstrProfRecord>
828 getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
829 StringRef DeprecatedFuncName = "",
830 uint64_t *MismatchedFuncSum = nullptr);
831
832 /// Return the memprof record for the function identified by
833 /// llvm::md5(Name).
834 Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
835 return MemProfReader.getMemProfRecord(FuncNameHash);
836 }
837
838 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
839 getMemProfCallerCalleePairs() {
840 return MemProfReader.getMemProfCallerCalleePairs();
841 }
842
843 memprof::AllMemProfData getAllMemProfData() const {
844 return MemProfReader.getAllMemProfData();
845 }
846
847 /// Fill Counts with the profile data for the given function name.
848 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
849 std::vector<uint64_t> &Counts);
850
851 /// Fill Bitmap with the profile data for the given function name.
852 Error getFunctionBitmap(StringRef FuncName, uint64_t FuncHash,
853 BitVector &Bitmap);
854
855 /// Return the maximum of all known function counts.
856 /// \c UseCS indicates whether to use the context-sensitive count.
857 uint64_t getMaximumFunctionCount(bool UseCS) {
858 if (UseCS) {
859 assert(CS_Summary && "No context sensitive profile summary");
860 return CS_Summary->getMaxFunctionCount();
861 } else {
862 assert(Summary && "No profile summary");
863 return Summary->getMaxFunctionCount();
864 }
865 }
866
867 /// Factory method to create an indexed reader.
868 static Expected<std::unique_ptr<IndexedInstrProfReader>>
869 create(const Twine &Path, vfs::FileSystem &FS,
870 const Twine &RemappingPath = "");
871
872 static Expected<std::unique_ptr<IndexedInstrProfReader>>
873 create(std::unique_ptr<MemoryBuffer> Buffer,
874 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
875
876 // Used for testing purpose only.
877 void setValueProfDataEndianness(llvm::endianness Endianness) {
878 Index->setValueProfDataEndianness(Endianness);
879 }
880
881 // See description in the base class. This interface is designed
882 // to be used by llvm-profdata (for dumping). Avoid using this when
883 // the client is the compiler.
884 InstrProfSymtab &getSymtab() override;
885
886 /// Return the profile summary.
887 /// \c UseCS indicates whether to use the context-sensitive summary.
888 ProfileSummary &getSummary(bool UseCS) {
889 if (UseCS) {
890 assert(CS_Summary && "No context sensitive summary");
891 return *CS_Summary;
892 } else {
893 assert(Summary && "No profile summary");
894 return *Summary;
895 }
896 }
897
898 /// Return the MemProf summary. Will be null if unavailable (version < 4).
899 memprof::MemProfSummary *getMemProfSummary() const {
900 return MemProfReader.getSummary();
901 }
902
903 Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
904 Error printBinaryIds(raw_ostream &OS) override;
905};
906
907} // end namespace llvm
908
909#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
910