1//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11#include "ErrorHandling.h"
12#include "ProfiledBinary.h"
13#include "llvm/Support/Casting.h"
14#include "llvm/Support/CommandLine.h"
15#include "llvm/Support/Error.h"
16#include "llvm/Support/Regex.h"
17#include <cstdint>
18#include <fstream>
19#include <map>
20
21namespace llvm {
22
23class CleanupInstaller;
24
25namespace sampleprof {
26
27// Stream based trace line iterator
28class TraceStream {
29 std::string CurrentLine;
30 std::ifstream Fin;
31 bool IsAtEoF = false;
32 uint64_t LineNumber = 0;
33
34public:
35 TraceStream(StringRef Filename) : Fin(Filename.str()) {
36 if (!Fin.good())
37 exitWithError(Message: "Error read input perf script file", Whence: Filename);
38 advance();
39 }
40
41 StringRef getCurrentLine() {
42 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
43 return CurrentLine;
44 }
45
46 uint64_t getLineNumber() { return LineNumber; }
47
48 bool isAtEoF() { return IsAtEoF; }
49
50 // Read the next line
51 void advance() {
52 if (!std::getline(is&: Fin, str&: CurrentLine)) {
53 IsAtEoF = true;
54 return;
55 }
56 LineNumber++;
57 }
58};
59
60// The type of input format.
61enum PerfFormat {
62 UnknownFormat = 0,
63 PerfData = 1, // Raw linux perf.data.
64 PerfScript = 2, // Perf script create by `perf script` command.
65 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
66
67};
68
69// The type of perfscript content.
70enum PerfContent {
71 UnknownContent = 0,
72 LBR = 1, // Only LBR sample.
73 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
74};
75
76struct PerfInputFile {
77 std::string InputFile;
78 PerfFormat Format = PerfFormat::UnknownFormat;
79 PerfContent Content = PerfContent::UnknownContent;
80};
81
82// The parsed LBR sample entry.
83struct LBREntry {
84 uint64_t Source = 0;
85 uint64_t Target = 0;
86 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
87
88#ifndef NDEBUG
89 void print() const {
90 dbgs() << "from " << format("%#010x", Source) << " to "
91 << format("%#010x", Target);
92 }
93#endif
94};
95
96#ifndef NDEBUG
97static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
98 for (size_t I = 0; I < LBRStack.size(); I++) {
99 dbgs() << "[" << I << "] ";
100 LBRStack[I].print();
101 dbgs() << "\n";
102 }
103}
104
105static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
106 for (size_t I = 0; I < CallStack.size(); I++) {
107 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
108 }
109}
110#endif
111
112// Hash interface for generic data of type T
113// Data should implement a \fn getHashCode and a \fn isEqual
114// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
115// i.e we explicitly calculate hash of derived class, assign to base class's
116// HashCode. This also provides the flexibility for calculating the hash code
117// incrementally(like rolling hash) during frame stack unwinding since unwinding
118// only changes the leaf of frame stack. \fn isEqual is a virtual function,
119// which will have perf overhead. In the future, if we redesign a better hash
120// function, then we can just skip this or switch to non-virtual function(like
121// just ignore comparison if hash conflicts probabilities is low)
122template <class T> class Hashable {
123public:
124 std::shared_ptr<T> Data;
125 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
126
127 // Hash code generation
128 struct Hash {
129 uint64_t operator()(const Hashable<T> &Key) const {
130 // Don't make it virtual for getHashCode
131 uint64_t Hash = Key.Data->getHashCode();
132 assert(Hash && "Should generate HashCode for it!");
133 return Hash;
134 }
135 };
136
137 // Hash equal
138 struct Equal {
139 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
140 // Precisely compare the data, vtable will have overhead.
141 return LHS.Data->isEqual(RHS.Data.get());
142 }
143 };
144
145 T *getPtr() const { return Data.get(); }
146};
147
148struct PerfSample {
149 // LBR stack recorded in FIFO order.
150 SmallVector<LBREntry, 16> LBRStack;
151 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
152 // generation
153 SmallVector<uint64_t, 16> CallStack;
154
155 virtual ~PerfSample() = default;
156 uint64_t getHashCode() const {
157 // Use simple DJB2 hash
158 auto HashCombine = [](uint64_t H, uint64_t V) {
159 return ((H << 5) + H) + V;
160 };
161 uint64_t Hash = 5381;
162 for (const auto &Value : CallStack) {
163 Hash = HashCombine(Hash, Value);
164 }
165 for (const auto &Entry : LBRStack) {
166 Hash = HashCombine(Hash, Entry.Source);
167 Hash = HashCombine(Hash, Entry.Target);
168 }
169 return Hash;
170 }
171
172 bool isEqual(const PerfSample *Other) const {
173 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
174 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
175
176 if (CallStack.size() != OtherCallStack.size() ||
177 LBRStack.size() != OtherLBRStack.size())
178 return false;
179
180 if (!std::equal(first1: CallStack.begin(), last1: CallStack.end(), first2: OtherCallStack.begin()))
181 return false;
182
183 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
184 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
185 LBRStack[I].Target != OtherLBRStack[I].Target)
186 return false;
187 }
188 return true;
189 }
190
191#ifndef NDEBUG
192 uint64_t Linenum = 0;
193
194 void print() const {
195 dbgs() << "Line " << Linenum << "\n";
196 dbgs() << "LBR stack\n";
197 printLBRStack(LBRStack);
198 dbgs() << "Call stack\n";
199 printCallStack(CallStack);
200 }
201#endif
202};
203// After parsing the sample, we record the samples by aggregating them
204// into this counter. The key stores the sample data and the value is
205// the sample repeat times.
206using AggregatedCounter =
207 std::unordered_map<Hashable<PerfSample>, uint64_t,
208 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
209
210using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
211
212inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
213 ProfiledBinary *Binary) {
214 // Start bigger than End is considered invalid.
215 // LBR ranges cross the unconditional jmp are also assumed invalid.
216 // It's found that perf data may contain duplicate LBR entries that could form
217 // a range that does not reflect real execution flow on some Intel targets,
218 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
219 // cannot be a linear execution range that spans over unconditional jmp.
220 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
221}
222
223// The state for the unwinder, it doesn't hold the data but only keep the
224// pointer/index of the data, While unwinding, the CallStack is changed
225// dynamicially and will be recorded as the context of the sample
226struct UnwindState {
227 // Profiled binary that current frame address belongs to
228 const ProfiledBinary *Binary;
229 // Call stack trie node
230 struct ProfiledFrame {
231 const uint64_t Address = DummyRoot;
232 ProfiledFrame *Parent;
233 SampleVector RangeSamples;
234 SampleVector BranchSamples;
235 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
236
237 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
238 : Address(Addr), Parent(P) {}
239 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
240 assert(Address && "Address can't be zero!");
241 auto Ret = Children.emplace(
242 args&: Address, args: std::make_unique<ProfiledFrame>(args&: Address, args: this));
243 return Ret.first->second.get();
244 }
245 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
246 RangeSamples.emplace_back(Args: std::make_tuple(args&: Start, args&: End, args&: Count));
247 }
248 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
249 BranchSamples.emplace_back(Args: std::make_tuple(args&: Source, args&: Target, args&: Count));
250 }
251 bool isDummyRoot() { return Address == DummyRoot; }
252 bool isExternalFrame() { return Address == ExternalAddr; }
253 bool isLeafFrame() { return Children.empty(); }
254 };
255
256 ProfiledFrame DummyTrieRoot;
257 ProfiledFrame *CurrentLeafFrame;
258 // Used to fall through the LBR stack
259 uint32_t LBRIndex = 0;
260 // Reference to PerfSample.LBRStack
261 const SmallVector<LBREntry, 16> &LBRStack;
262 // Used to iterate the address range
263 InstructionPointer InstPtr;
264 // Indicate whether unwinding is currently in a bad state which requires to
265 // skip all subsequent unwinding.
266 bool Invalid = false;
267 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
268 : Binary(Binary), LBRStack(Sample->LBRStack),
269 InstPtr(Binary, Sample->CallStack.front()) {
270 initFrameTrie(CallStack: Sample->CallStack);
271 }
272
273 bool validateInitialState() {
274 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
275 uint64_t LeafAddr = CurrentLeafFrame->Address;
276 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
277 "External leading LBR should match the leaf frame.");
278
279 // When we take a stack sample, ideally the sampling distance between the
280 // leaf IP of stack and the last LBR target shouldn't be very large.
281 // Use a heuristic size (0x100) to filter out broken records.
282 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
283 WithColor::warning() << "Bogus trace: stack tip = "
284 << format(Fmt: "%#010x", Vals: LeafAddr)
285 << ", LBR tip = " << format(Fmt: "%#010x\n", Vals: LBRLeaf);
286 return false;
287 }
288 return true;
289 }
290
291 void checkStateConsistency() {
292 assert(InstPtr.Address == CurrentLeafFrame->Address &&
293 "IP should align with context leaf");
294 }
295
296 void setInvalid() { Invalid = true; }
297 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
298 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
299 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
300 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
301 bool IsLastLBR() const { return LBRIndex == 0; }
302 bool getLBRStackSize() const { return LBRStack.size(); }
303 void advanceLBR() { LBRIndex++; }
304 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
305
306 void pushFrame(uint64_t Address) {
307 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
308 }
309
310 void switchToFrame(uint64_t Address) {
311 if (CurrentLeafFrame->Address == Address)
312 return;
313 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
314 }
315
316 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
317
318 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
319
320 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
321 ProfiledFrame *Cur = &DummyTrieRoot;
322 for (auto Address : reverse(C: CallStack)) {
323 Cur = Cur->getOrCreateChildFrame(Address);
324 }
325 CurrentLeafFrame = Cur;
326 }
327
328 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
329};
330
331// Base class for sample counter key with context
332struct ContextKey {
333 uint64_t HashCode = 0;
334 virtual ~ContextKey() = default;
335 uint64_t getHashCode() {
336 if (HashCode == 0)
337 genHashCode();
338 return HashCode;
339 }
340 virtual void genHashCode() = 0;
341 virtual bool isEqual(const ContextKey *K) const {
342 return HashCode == K->HashCode;
343 };
344
345 // Utilities for LLVM-style RTTI
346 enum ContextKind { CK_StringBased, CK_AddrBased };
347 const ContextKind Kind;
348 ContextKind getKind() const { return Kind; }
349 ContextKey(ContextKind K) : Kind(K){};
350};
351
352// String based context id
353struct StringBasedCtxKey : public ContextKey {
354 SampleContextFrameVector Context;
355
356 bool WasLeafInlined;
357 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
358 static bool classof(const ContextKey *K) {
359 return K->getKind() == CK_StringBased;
360 }
361
362 bool isEqual(const ContextKey *K) const override {
363 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(Val: K);
364 return Context == Other->Context;
365 }
366
367 void genHashCode() override {
368 HashCode = hash_value(S: SampleContextFrames(Context));
369 }
370};
371
372// Address-based context id
373struct AddrBasedCtxKey : public ContextKey {
374 SmallVector<uint64_t, 16> Context;
375
376 bool WasLeafInlined;
377 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
378 static bool classof(const ContextKey *K) {
379 return K->getKind() == CK_AddrBased;
380 }
381
382 bool isEqual(const ContextKey *K) const override {
383 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(Val: K);
384 return Context == Other->Context;
385 }
386
387 void genHashCode() override { HashCode = hash_combine_range(R&: Context); }
388};
389
390// The counter of branch samples for one function indexed by the branch,
391// which is represented as the source and target offset pair.
392using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
393// The counter of range samples for one function indexed by the range,
394// which is represented as the start and end offset pair.
395using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
396// <<inst-addr, vtable-data-symbol>, count> map for data access samples.
397// The instruction address is the virtual address in the binary.
398using DataAccessSample = std::map<std::pair<uint64_t, StringRef>, uint64_t>;
399// Wrapper for sample counters including range counter and branch counter
400struct SampleCounter {
401 RangeSample RangeCounter;
402 BranchSample BranchCounter;
403 DataAccessSample DataAccessCounter;
404
405 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
406 assert(Start <= End && "Invalid instruction range");
407 RangeCounter[{Start, End}] += Repeat;
408 }
409 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
410 BranchCounter[{Source, Target}] += Repeat;
411 }
412 void recordDataAccessCount(uint64_t InstAddr, StringRef DataSymbol,
413 uint64_t Repeat) {
414 DataAccessCounter[{InstAddr, DataSymbol}] += Repeat;
415 }
416};
417
418// Sample counter with context to support context-sensitive profile
419using ContextSampleCounterMap =
420 std::unordered_map<Hashable<ContextKey>, SampleCounter,
421 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
422
423struct FrameStack {
424 SmallVector<uint64_t, 16> Stack;
425 ProfiledBinary *Binary;
426 FrameStack(ProfiledBinary *B) : Binary(B) {}
427 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
428 assert(!Cur->isExternalFrame() &&
429 "External frame's not expected for context stack.");
430 Stack.push_back(Elt: Cur->Address);
431 return true;
432 }
433
434 void popFrame() {
435 if (!Stack.empty())
436 Stack.pop_back();
437 }
438 std::shared_ptr<StringBasedCtxKey> getContextKey();
439};
440
441struct AddressStack {
442 SmallVector<uint64_t, 16> Stack;
443 ProfiledBinary *Binary;
444 AddressStack(ProfiledBinary *B) : Binary(B) {}
445 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
446 assert(!Cur->isExternalFrame() &&
447 "External frame's not expected for context stack.");
448 Stack.push_back(Elt: Cur->Address);
449 return true;
450 }
451
452 void popFrame() {
453 if (!Stack.empty())
454 Stack.pop_back();
455 }
456 std::shared_ptr<AddrBasedCtxKey> getContextKey();
457};
458
459/*
460As in hybrid sample we have a group of LBRs and the most recent sampling call
461stack, we can walk through those LBRs to infer more call stacks which would be
462used as context for profile. VirtualUnwinder is the class to do the call stack
463unwinding based on LBR state. Two types of unwinding are processd here:
4641) LBR unwinding and 2) linear range unwinding.
465Specifically, for each LBR entry(can be classified into call, return, regular
466branch), LBR unwinding will replay the operation by pushing, popping or
467switching leaf frame towards the call stack and since the initial call stack
468is most recently sampled, the replay should be in anti-execution order, i.e. for
469the regular case, pop the call stack when LBR is call, push frame on call stack
470when LBR is return. After each LBR processed, it also needs to align with the
471next LBR by going through instructions from previous LBR's target to current
472LBR's source, which is the linear unwinding. As instruction from linear range
473can come from different function by inlining, linear unwinding will do the range
474splitting and record counters by the range with same inline context. Over those
475unwinding process we will record each call stack as context id and LBR/linear
476range as sample counter for further CS profile generation.
477*/
478class VirtualUnwinder {
479public:
480 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
481 : CtxCounterMap(Counter), Binary(B) {}
482 bool unwind(const PerfSample *Sample, uint64_t Repeat);
483 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
484
485 uint64_t NumTotalBranches = 0;
486 uint64_t NumExtCallBranch = 0;
487 uint64_t NumMissingExternalFrame = 0;
488 uint64_t NumMismatchedProEpiBranch = 0;
489 uint64_t NumMismatchedExtCallBranch = 0;
490 uint64_t NumUnpairedExtAddr = 0;
491 uint64_t NumPairedExtAddr = 0;
492
493private:
494 bool isSourceExternal(UnwindState &State) const {
495 return State.getCurrentLBRSource() == ExternalAddr;
496 }
497
498 bool isTargetExternal(UnwindState &State) const {
499 return State.getCurrentLBRTarget() == ExternalAddr;
500 }
501
502 // Determine whether the return source is from external code by checking if
503 // the target's the next inst is a call inst.
504 bool isReturnFromExternal(UnwindState &State) const {
505 return isSourceExternal(State) &&
506 (Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) != 0);
507 }
508
509 // If the source is external address but it's not the `return` case, treat it
510 // as a call from external.
511 bool isCallFromExternal(UnwindState &State) const {
512 return isSourceExternal(State) &&
513 Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) == 0;
514 }
515
516 bool isCallState(UnwindState &State) const {
517 // The tail call frame is always missing here in stack sample, we will
518 // use a specific tail call tracker to infer it.
519 if (!isValidState(State))
520 return false;
521
522 if (Binary->addressIsCall(Address: State.getCurrentLBRSource()))
523 return true;
524
525 return isCallFromExternal(State);
526 }
527
528 bool isReturnState(UnwindState &State) const {
529 if (!isValidState(State))
530 return false;
531
532 // Simply check addressIsReturn, as ret is always reliable, both for
533 // regular call and tail call.
534 if (Binary->addressIsReturn(Address: State.getCurrentLBRSource()))
535 return true;
536
537 return isReturnFromExternal(State);
538 }
539
540 bool isValidState(UnwindState &State) const { return !State.Invalid; }
541
542 void unwindCall(UnwindState &State);
543 void unwindLinear(UnwindState &State, uint64_t Repeat);
544 void unwindReturn(UnwindState &State);
545 void unwindBranch(UnwindState &State);
546
547 template <typename T>
548 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
549 // Collect each samples on trie node by DFS traversal
550 template <typename T>
551 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
552 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
553
554 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
555 uint64_t Repeat);
556 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
557 uint64_t Repeat);
558
559 ContextSampleCounterMap *CtxCounterMap;
560 // Profiled binary that current frame address belongs to
561 ProfiledBinary *Binary;
562 // Keep track of all untracked callsites
563 std::set<uint64_t> UntrackedCallsites;
564};
565
566// Read perf trace to parse the events and samples.
567class PerfReaderBase {
568public:
569 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
570 : Binary(B), PerfTraceFile(PerfTrace) {
571 // Initialize the base address to preferred address.
572 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
573 };
574 virtual ~PerfReaderBase() = default;
575 static std::unique_ptr<PerfReaderBase>
576 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
577 std::optional<int32_t> PIDFilter);
578
579 // Entry of the reader to parse multiple perf traces
580 virtual void parsePerfTraces() = 0;
581
582 // Parse the <ip, vtable-data-symbol> from the data access perf trace file,
583 // and accumulate the data access count for each <ip, data-symbol> pair.
584 Error
585 parseDataAccessPerfTraces(StringRef DataAccessPerfFile,
586 std::optional<int32_t> PIDFilter = std::nullopt);
587
588 const ContextSampleCounterMap &getSampleCounters() const {
589 return SampleCounters;
590 }
591 bool profileIsCS() { return ProfileIsCS; }
592
593protected:
594 ProfiledBinary *Binary = nullptr;
595 StringRef PerfTraceFile;
596
597 ContextSampleCounterMap SampleCounters;
598 bool ProfileIsCS = false;
599
600 uint64_t NumTotalSample = 0;
601 uint64_t NumLeafExternalFrame = 0;
602 uint64_t NumLeadingOutgoingLBR = 0;
603};
604
605// Read perf script to parse the events and samples.
606class PerfScriptReader : public PerfReaderBase {
607public:
608 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
609 std::optional<int32_t> PID)
610 : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
611
612 // Entry of the reader to parse multiple perf traces
613 void parsePerfTraces() override;
614
615 // Parse a single line of a PERF_RECORD_MMAP event looking for a
616 // mapping between the binary name and its memory layout.
617 // TODO: Move this static method from PerScriptReader (subclass) to
618 // PerfReaderBase (superclass).
619 static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
620 MMapEvent &MMap);
621
622 // Generate perf script from perf data
623 static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
624 bool SkipPID, PerfInputFile &File,
625 std::optional<int32_t> PIDFilter);
626 // Extract perf script type by peaking at the input
627 static PerfContent checkPerfScriptType(StringRef FileName);
628
629 // Cleanup installers for temporary files created by perf script command.
630 // Those files will be automatically removed when running destructor or
631 // receiving signals.
632 static SmallVector<CleanupInstaller, 2> TempFileCleanups;
633
634protected:
635 // Check whether a given line is LBR sample
636 static bool isLBRSample(StringRef Line);
637 // Check whether a given line is MMAP event
638 static bool isMMapEvent(StringRef Line);
639 // Update base address based on mmap events
640 void updateBinaryAddress(const MMapEvent &Event);
641 // Parse mmap event and update binary address
642 void parseMMapEvent(TraceStream &TraceIt);
643 // Parse perf events/samples and do aggregation
644 void parseAndAggregateTrace();
645 // Parse either an MMAP event or a perf sample
646 void parseEventOrSample(TraceStream &TraceIt);
647 // Warn if the relevant mmap event is missing.
648 void warnIfMissingMMap();
649 // Emit accumulate warnings.
650 void warnTruncatedStack();
651 // Warn if range is invalid.
652 void warnInvalidRange();
653 // Extract call stack from the perf trace lines
654 bool extractCallstack(TraceStream &TraceIt,
655 SmallVectorImpl<uint64_t> &CallStack);
656 // Extract LBR stack from one perf trace line
657 bool extractLBRStack(TraceStream &TraceIt,
658 SmallVectorImpl<LBREntry> &LBRStack);
659 uint64_t parseAggregatedCount(TraceStream &TraceIt);
660 // Parse one sample from multiple perf lines, override this for different
661 // sample type
662 void parseSample(TraceStream &TraceIt);
663 // An aggregated count is given to indicate how many times the sample is
664 // repeated.
665 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
666 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
667 // Post process the profile after trace aggregation, we will do simple range
668 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
669 virtual void generateUnsymbolizedProfile();
670 void writeUnsymbolizedProfile(StringRef Filename);
671 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
672
673 // Samples with the repeating time generated by the perf reader
674 AggregatedCounter AggregatedSamples;
675 // Keep track of all invalid return addresses
676 std::set<uint64_t> InvalidReturnAddresses;
677 // PID for the process of interest
678 std::optional<int32_t> PIDFilter;
679};
680
681/*
682 The reader of LBR only perf script.
683 A typical LBR sample is like:
684 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
685 ... 0x4005c8/0x4005dc/P/-/-/0
686*/
687class LBRPerfReader : public PerfScriptReader {
688public:
689 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
690 std::optional<int32_t> PID)
691 : PerfScriptReader(Binary, PerfTrace, PID) {};
692 // Parse the LBR only sample.
693 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
694};
695
696/*
697 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
698 which is used to generate CS profile. An example of hybrid sample:
699 4005dc # call stack leaf
700 400634
701 400684 # call stack root
702 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
703 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
704*/
705class HybridPerfReader : public PerfScriptReader {
706public:
707 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
708 std::optional<int32_t> PID)
709 : PerfScriptReader(Binary, PerfTrace, PID) {};
710 // Parse the hybrid sample including the call and LBR line
711 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
712 void generateUnsymbolizedProfile() override;
713
714private:
715 // Unwind the hybrid samples after aggregration
716 void unwindSamples();
717};
718
719/*
720 Format of unsymbolized profile:
721
722 [frame1 @ frame2 @ ...] # If it's a CS profile
723 number of entries in RangeCounter
724 from_1-to_1:count_1
725 from_2-to_2:count_2
726 ......
727 from_n-to_n:count_n
728 number of entries in BranchCounter
729 src_1->dst_1:count_1
730 src_2->dst_2:count_2
731 ......
732 src_n->dst_n:count_n
733 [frame1 @ frame2 @ ...] # Next context
734 ......
735
736Note that non-CS profile doesn't have the empty `[]` context.
737*/
738class UnsymbolizedProfileReader : public PerfReaderBase {
739public:
740 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
741 : PerfReaderBase(Binary, PerfTrace){};
742 void parsePerfTraces() override;
743
744private:
745 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
746 void readUnsymbolizedProfile(StringRef Filename);
747
748 std::unordered_set<std::string> ContextStrSet;
749};
750
751} // end namespace sampleprof
752} // end namespace llvm
753
754#endif
755