1//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11#include "ErrorHandling.h"
12#include "ProfiledBinary.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/StringSet.h"
15#include "llvm/Support/Casting.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Error.h"
18#include "llvm/Support/Regex.h"
19#include <cstdint>
20#include <fstream>
21#include <map>
22
23namespace llvm {
24
25class CleanupInstaller;
26
27namespace sampleprof {
28
29// Stream based trace line iterator
30class TraceStream {
31 std::string CurrentLine;
32 std::ifstream Fin;
33 bool IsAtEoF = false;
34 uint64_t LineNumber = 0;
35
36public:
37 TraceStream(StringRef Filename) : Fin(Filename.str()) {
38 if (!Fin.good())
39 exitWithError(Message: "Error read input perf script file", Whence: Filename);
40 advance();
41 }
42
43 StringRef getCurrentLine() {
44 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45 return CurrentLine;
46 }
47
48 uint64_t getLineNumber() { return LineNumber; }
49
50 bool isAtEoF() { return IsAtEoF; }
51
52 // Read the next line
53 void advance() {
54 if (!std::getline(is&: Fin, str&: CurrentLine)) {
55 IsAtEoF = true;
56 return;
57 }
58 LineNumber++;
59 }
60};
61
62// The type of input format.
63enum InputFormat {
64 UnknownFormat = 0,
65 PerfData = 1, // Raw linux perf.data.
66 PerfScript = 2, // Perf script create by `perf script` command.
67 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68 ETMFormat = 4, // Raw ETM format.
69};
70
71// The type of perfscript content.
72enum PerfContent {
73 UnknownContent = 0,
74 LBR = 1, // Only LBR sample.
75 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
76};
77
78struct InputFile {
79 std::string InputFilePath;
80 InputFormat Format = InputFormat::UnknownFormat;
81 PerfContent Content = PerfContent::UnknownContent;
82};
83
84// The parsed LBR sample entry.
85struct LBREntry {
86 uint64_t Source = 0;
87 uint64_t Target = 0;
88 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
89
90#ifndef NDEBUG
91 void print() const {
92 dbgs() << "from " << format("%#010x", Source) << " to "
93 << format("%#010x", Target);
94 }
95#endif
96};
97
98#ifndef NDEBUG
99static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
100 for (size_t I = 0; I < LBRStack.size(); I++) {
101 dbgs() << "[" << I << "] ";
102 LBRStack[I].print();
103 dbgs() << "\n";
104 }
105}
106
107static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
108 for (size_t I = 0; I < CallStack.size(); I++) {
109 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
110 }
111}
112#endif
113
114// Hash interface for generic data of type T
115// Data should implement a \fn getHashCode and a \fn isEqual
116// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117// i.e we explicitly calculate hash of derived class, assign to base class's
118// HashCode. This also provides the flexibility for calculating the hash code
119// incrementally(like rolling hash) during frame stack unwinding since unwinding
120// only changes the leaf of frame stack. \fn isEqual is a virtual function,
121// which will have perf overhead. In the future, if we redesign a better hash
122// function, then we can just skip this or switch to non-virtual function(like
123// just ignore comparison if hash conflicts probabilities is low)
124template <class T> class Hashable {
125public:
126 std::shared_ptr<T> Data;
127 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128
129 T *getPtr() const { return Data.get(); }
130};
131
132} // end namespace sampleprof
133
134template <typename T> struct DenseMapInfo<sampleprof::Hashable<T>> {
135 static unsigned getHashValue(const sampleprof::Hashable<T> &Key) {
136 // Don't make it virtual for getHashCode
137 uint64_t Hash = Key.Data->getHashCode();
138 assert(Hash && "Should generate HashCode for it!");
139 return DenseMapInfo<uint64_t>::getHashValue(Val: Hash);
140 }
141
142 static bool isEqual(const sampleprof::Hashable<T> &LHS,
143 const sampleprof::Hashable<T> &RHS) {
144 // Precisely compare the data, vtable will have overhead.
145 return LHS.Data->isEqual(RHS.Data.get());
146 }
147};
148
149namespace sampleprof {
150
151struct PerfSample {
152 // LBR stack recorded in FIFO order.
153 SmallVector<LBREntry, 16> LBRStack;
154 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
155 // generation
156 SmallVector<uint64_t, 16> CallStack;
157
158 virtual ~PerfSample() = default;
159 uint64_t getHashCode() const {
160 // Use simple DJB2 hash
161 auto HashCombine = [](uint64_t H, uint64_t V) {
162 return ((H << 5) + H) + V;
163 };
164 uint64_t Hash = 5381;
165 for (const auto &Value : CallStack) {
166 Hash = HashCombine(Hash, Value);
167 }
168 for (const auto &Entry : LBRStack) {
169 Hash = HashCombine(Hash, Entry.Source);
170 Hash = HashCombine(Hash, Entry.Target);
171 }
172 return Hash;
173 }
174
175 bool isEqual(const PerfSample *Other) const {
176 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
177 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
178
179 if (CallStack.size() != OtherCallStack.size() ||
180 LBRStack.size() != OtherLBRStack.size())
181 return false;
182
183 if (!std::equal(first1: CallStack.begin(), last1: CallStack.end(), first2: OtherCallStack.begin()))
184 return false;
185
186 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
187 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
188 LBRStack[I].Target != OtherLBRStack[I].Target)
189 return false;
190 }
191 return true;
192 }
193
194#ifndef NDEBUG
195 uint64_t Linenum = 0;
196
197 void print() const {
198 dbgs() << "Line " << Linenum << "\n";
199 dbgs() << "LBR stack\n";
200 printLBRStack(LBRStack);
201 dbgs() << "Call stack\n";
202 printCallStack(CallStack);
203 }
204#endif
205};
206// After parsing the sample, we record the samples by aggregating them
207// into this counter. The key stores the sample data and the value is
208// the sample repeat times.
209using AggregatedCounter = DenseMap<Hashable<PerfSample>, uint64_t>;
210
211using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
212
213inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
214 ProfiledBinary *Binary) {
215 // Start bigger than End is considered invalid.
216 // LBR ranges cross the unconditional jmp are also assumed invalid.
217 // It's found that perf data may contain duplicate LBR entries that could form
218 // a range that does not reflect real execution flow on some Intel targets,
219 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
220 // cannot be a linear execution range that spans over unconditional jmp.
221 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
222}
223
224// The state for the unwinder, it doesn't hold the data but only keep the
225// pointer/index of the data, While unwinding, the CallStack is changed
226// dynamicially and will be recorded as the context of the sample
227struct UnwindState {
228 // Profiled binary that current frame address belongs to
229 const ProfiledBinary *Binary;
230 // Call stack trie node
231 struct ProfiledFrame {
232 const uint64_t Address = DummyRoot;
233 ProfiledFrame *Parent;
234 SampleVector RangeSamples;
235 SampleVector BranchSamples;
236 DenseMap<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
237
238 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
239 : Address(Addr), Parent(P) {}
240 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
241 assert(Address && "Address can't be zero!");
242 auto [It, Inserted] = Children.try_emplace(Key: Address);
243 if (Inserted)
244 It->second = std::make_unique<ProfiledFrame>(args&: Address, args: this);
245 return It->second.get();
246 }
247 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
248 RangeSamples.emplace_back(Args: std::make_tuple(args&: Start, args&: End, args&: Count));
249 }
250 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
251 BranchSamples.emplace_back(Args: std::make_tuple(args&: Source, args&: Target, args&: Count));
252 }
253 bool isDummyRoot() { return Address == DummyRoot; }
254 bool isExternalFrame() { return Address == ExternalAddr; }
255 bool isLeafFrame() { return Children.empty(); }
256 };
257
258 ProfiledFrame DummyTrieRoot;
259 ProfiledFrame *CurrentLeafFrame;
260 // Used to fall through the LBR stack
261 uint32_t LBRIndex = 0;
262 // Reference to PerfSample.LBRStack
263 const SmallVector<LBREntry, 16> &LBRStack;
264 // Used to iterate the address range
265 InstructionPointer InstPtr;
266 // Indicate whether unwinding is currently in a bad state which requires to
267 // skip all subsequent unwinding.
268 bool Invalid = false;
269 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270 : Binary(Binary), LBRStack(Sample->LBRStack),
271 InstPtr(Binary, Sample->CallStack.front()) {
272 initFrameTrie(CallStack: Sample->CallStack);
273 }
274
275 bool validateInitialState() {
276 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
277 uint64_t LeafAddr = CurrentLeafFrame->Address;
278 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
279 "External leading LBR should match the leaf frame.");
280
281 // When we take a stack sample, ideally the sampling distance between the
282 // leaf IP of stack and the last LBR target shouldn't be very large.
283 // Use a heuristic size (0x100) to filter out broken records.
284 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
285 WithColor::warning() << "Bogus trace: stack tip = "
286 << format(Fmt: "%#010x", Vals: LeafAddr)
287 << ", LBR tip = " << format(Fmt: "%#010x\n", Vals: LBRLeaf);
288 return false;
289 }
290 return true;
291 }
292
293 void checkStateConsistency() {
294 assert(InstPtr.Address == CurrentLeafFrame->Address &&
295 "IP should align with context leaf");
296 }
297
298 void setInvalid() { Invalid = true; }
299 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
300 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
301 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
302 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
303 bool IsLastLBR() const { return LBRIndex == 0; }
304 size_t getLBRStackSize() const { return LBRStack.size(); }
305 void advanceLBR() { LBRIndex++; }
306 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
307
308 void pushFrame(uint64_t Address) {
309 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
310 }
311
312 void switchToFrame(uint64_t Address) {
313 if (CurrentLeafFrame->Address == Address)
314 return;
315 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
316 }
317
318 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
319
320 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
321
322 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
323 ProfiledFrame *Cur = &DummyTrieRoot;
324 for (auto Address : reverse(C: CallStack)) {
325 Cur = Cur->getOrCreateChildFrame(Address);
326 }
327 CurrentLeafFrame = Cur;
328 }
329
330 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
331};
332
333// Base class for sample counter key with context
334struct ContextKey {
335 uint64_t HashCode = 0;
336 virtual ~ContextKey() = default;
337 uint64_t getHashCode() {
338 if (HashCode == 0)
339 genHashCode();
340 return HashCode;
341 }
342 virtual void genHashCode() = 0;
343 virtual bool isEqual(const ContextKey *K) const {
344 return HashCode == K->HashCode;
345 };
346
347 // Utilities for LLVM-style RTTI
348 enum ContextKind { CK_StringBased, CK_AddrBased };
349 const ContextKind Kind;
350 ContextKind getKind() const { return Kind; }
351 ContextKey(ContextKind K) : Kind(K){};
352};
353
354// String based context id
355struct StringBasedCtxKey : public ContextKey {
356 SampleContextFrameVector Context;
357
358 bool WasLeafInlined;
359 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
360 static bool classof(const ContextKey *K) {
361 return K->getKind() == CK_StringBased;
362 }
363
364 bool isEqual(const ContextKey *K) const override {
365 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(Val: K);
366 return Context == Other->Context;
367 }
368
369 void genHashCode() override {
370 HashCode = hash_value(S: SampleContextFrames(Context));
371 }
372};
373
374// Address-based context id
375struct AddrBasedCtxKey : public ContextKey {
376 SmallVector<uint64_t, 16> Context;
377
378 bool WasLeafInlined;
379 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
380 static bool classof(const ContextKey *K) {
381 return K->getKind() == CK_AddrBased;
382 }
383
384 bool isEqual(const ContextKey *K) const override {
385 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(Val: K);
386 return Context == Other->Context;
387 }
388
389 void genHashCode() override { HashCode = hash_combine_range(R&: Context); }
390};
391
392// The counter of branch samples for one function indexed by the branch,
393// which is represented as the source and target offset pair.
394using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
395// The counter of range samples for one function indexed by the range,
396// which is represented as the start and end offset pair.
397using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
398// <<inst-addr, vtable-data-symbol>, count> map for data access samples.
399// The instruction address is the virtual address in the binary.
400using DataAccessSample = std::map<std::pair<uint64_t, StringRef>, uint64_t>;
401// Wrapper for sample counters including range counter and branch counter
402struct SampleCounter {
403 RangeSample RangeCounter;
404 BranchSample BranchCounter;
405 DataAccessSample DataAccessCounter;
406
407 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
408 assert(Start <= End && "Invalid instruction range");
409 RangeCounter[{Start, End}] += Repeat;
410 }
411 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
412 BranchCounter[{Source, Target}] += Repeat;
413 }
414 void recordDataAccessCount(uint64_t InstAddr, StringRef DataSymbol,
415 uint64_t Repeat) {
416 DataAccessCounter[{InstAddr, DataSymbol}] += Repeat;
417 }
418};
419
420// Sample counter with context to support context-sensitive profile
421using ContextSampleCounterMap = DenseMap<Hashable<ContextKey>, SampleCounter>;
422
423struct FrameStack {
424 SmallVector<uint64_t, 16> Stack;
425 ProfiledBinary *Binary;
426 FrameStack(ProfiledBinary *B) : Binary(B) {}
427 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
428 assert(!Cur->isExternalFrame() &&
429 "External frame's not expected for context stack.");
430 Stack.push_back(Elt: Cur->Address);
431 return true;
432 }
433
434 void popFrame() {
435 if (!Stack.empty())
436 Stack.pop_back();
437 }
438 std::shared_ptr<StringBasedCtxKey> getContextKey();
439};
440
441struct AddressStack {
442 SmallVector<uint64_t, 16> Stack;
443 ProfiledBinary *Binary;
444 AddressStack(ProfiledBinary *B) : Binary(B) {}
445 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
446 assert(!Cur->isExternalFrame() &&
447 "External frame's not expected for context stack.");
448 Stack.push_back(Elt: Cur->Address);
449 return true;
450 }
451
452 void popFrame() {
453 if (!Stack.empty())
454 Stack.pop_back();
455 }
456 std::shared_ptr<AddrBasedCtxKey> getContextKey();
457};
458
459/*
460As in hybrid sample we have a group of LBRs and the most recent sampling call
461stack, we can walk through those LBRs to infer more call stacks which would be
462used as context for profile. VirtualUnwinder is the class to do the call stack
463unwinding based on LBR state. Two types of unwinding are processd here:
4641) LBR unwinding and 2) linear range unwinding.
465Specifically, for each LBR entry(can be classified into call, return, regular
466branch), LBR unwinding will replay the operation by pushing, popping or
467switching leaf frame towards the call stack and since the initial call stack
468is most recently sampled, the replay should be in anti-execution order, i.e. for
469the regular case, pop the call stack when LBR is call, push frame on call stack
470when LBR is return. After each LBR processed, it also needs to align with the
471next LBR by going through instructions from previous LBR's target to current
472LBR's source, which is the linear unwinding. As instruction from linear range
473can come from different function by inlining, linear unwinding will do the range
474splitting and record counters by the range with same inline context. Over those
475unwinding process we will record each call stack as context id and LBR/linear
476range as sample counter for further CS profile generation.
477*/
478class VirtualUnwinder {
479public:
480 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
481 : CtxCounterMap(Counter), Binary(B) {}
482 bool unwind(const PerfSample *Sample, uint64_t Repeat);
483 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
484
485 uint64_t NumTotalBranches = 0;
486 uint64_t NumExtCallBranch = 0;
487 uint64_t NumMissingExternalFrame = 0;
488 uint64_t NumMismatchedProEpiBranch = 0;
489 uint64_t NumMismatchedExtCallBranch = 0;
490 uint64_t NumUnpairedExtAddr = 0;
491 uint64_t NumPairedExtAddr = 0;
492
493private:
494 bool isSourceExternal(UnwindState &State) const {
495 return State.getCurrentLBRSource() == ExternalAddr;
496 }
497
498 bool isTargetExternal(UnwindState &State) const {
499 return State.getCurrentLBRTarget() == ExternalAddr;
500 }
501
502 // Determine whether the return source is from external code by checking if
503 // the target's the next inst is a call inst.
504 bool isReturnFromExternal(UnwindState &State) const {
505 return isSourceExternal(State) &&
506 (Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) != 0);
507 }
508
509 // If the source is external address but it's not the `return` case, treat it
510 // as a call from external.
511 bool isCallFromExternal(UnwindState &State) const {
512 return isSourceExternal(State) &&
513 Binary->getCallAddrFromFrameAddr(FrameAddr: State.getCurrentLBRTarget()) == 0;
514 }
515
516 bool isCallState(UnwindState &State) const {
517 // The tail call frame is always missing here in stack sample, we will
518 // use a specific tail call tracker to infer it.
519 if (!isValidState(State))
520 return false;
521
522 if (Binary->addressIsCall(Address: State.getCurrentLBRSource()))
523 return true;
524
525 return isCallFromExternal(State);
526 }
527
528 bool isReturnState(UnwindState &State) const {
529 if (!isValidState(State))
530 return false;
531
532 // Simply check addressIsReturn, as ret is always reliable, both for
533 // regular call and tail call.
534 if (Binary->addressIsReturn(Address: State.getCurrentLBRSource()))
535 return true;
536
537 return isReturnFromExternal(State);
538 }
539
540 bool isValidState(UnwindState &State) const { return !State.Invalid; }
541
542 void unwindCall(UnwindState &State);
543 void unwindLinear(UnwindState &State, uint64_t Repeat);
544 void unwindReturn(UnwindState &State);
545 void unwindBranch(UnwindState &State);
546
547 template <typename T>
548 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
549 // Collect each samples on trie node by DFS traversal
550 template <typename T>
551 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
552 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
553
554 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
555 uint64_t Repeat);
556 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
557 uint64_t Repeat);
558
559 ContextSampleCounterMap *CtxCounterMap;
560 // Profiled binary that current frame address belongs to
561 ProfiledBinary *Binary;
562 // Keep track of all untracked callsites
563 std::set<uint64_t> UntrackedCallsites;
564};
565
566// Read perf trace to parse the events and samples.
567class PerfReaderBase {
568public:
569 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
570 : Binary(B), PerfTraceFile(PerfTrace) {
571 // Initialize the base address to preferred address.
572 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
573 };
574 virtual ~PerfReaderBase() = default;
575 static std::unique_ptr<PerfReaderBase>
576 create(ProfiledBinary *Binary, InputFile &Input,
577 std::optional<int32_t> PIDFilter);
578
579 // Entry of the reader to parse multiple perf traces
580 virtual void parsePerfTraces() = 0;
581
582 // Parse the <ip, vtable-data-symbol> from the data access perf trace file,
583 // and accumulate the data access count for each <ip, data-symbol> pair.
584 Error
585 parseDataAccessPerfTraces(StringRef DataAccessPerfFile,
586 std::optional<int32_t> PIDFilter = std::nullopt);
587
588 const ContextSampleCounterMap &getSampleCounters() const {
589 return SampleCounters;
590 }
591 bool profileIsCS() { return ProfileIsCS; }
592
593protected:
594 ProfiledBinary *Binary = nullptr;
595 StringRef PerfTraceFile;
596
597 ContextSampleCounterMap SampleCounters;
598 bool ProfileIsCS = false;
599
600 uint64_t NumTotalSample = 0;
601 uint64_t NumLeafExternalFrame = 0;
602 uint64_t NumLeadingOutgoingLBR = 0;
603};
604
605// Read perf script to parse the events and samples.
606class PerfScriptReader : public PerfReaderBase {
607public:
608 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
609 std::optional<int32_t> PID)
610 : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
611
612 // Entry of the reader to parse multiple perf traces
613 void parsePerfTraces() override;
614
615 // Parse a single line of a PERF_RECORD_MMAP event looking for a
616 // mapping between the binary name and its memory layout.
617 // TODO: Move this static method from PerScriptReader (subclass) to
618 // PerfReaderBase (superclass).
619 static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
620 MMapEvent &MMap);
621
622 // Generate perf script from perf data
623 static InputFile convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
624 InputFile &File,
625 std::optional<int32_t> PIDFilter);
626 // Extract perf script type by peaking at the input
627 static PerfContent checkPerfScriptType(StringRef FileName);
628
629 // Cleanup installers for temporary files created by perf script command.
630 // Those files will be automatically removed when running destructor or
631 // receiving signals.
632 static SmallVector<CleanupInstaller, 2> TempFileCleanups;
633
634protected:
635 // Check whether a given line is LBR sample
636 static bool isLBRSample(StringRef Line, bool CheckLineStart);
637 // Check whether a given line is MMAP event
638 static bool isMMapEvent(StringRef Line);
639 // Update base address based on mmap events
640 void updateBinaryAddress(const MMapEvent &Event);
641 // Parse mmap event and update binary address
642 void parseMMapEvent(TraceStream &TraceIt);
643 // Parse perf events/samples and do aggregation
644 void parseAndAggregateTrace();
645 // Parse either an MMAP event or a perf sample
646 void parseEventOrSample(TraceStream &TraceIt);
647 // Warn if the relevant mmap event is missing.
648 void warnIfMissingMMap();
649 // Emit accumulate warnings.
650 void warnTruncatedStack();
651 // Warn if range is invalid.
652 void warnInvalidRange();
653 // Warn if sampled branch/target addresses don't match the binary.
654 void warnIfBranchTargetMismatch();
655 // Extract call stack from the perf trace lines
656 bool extractCallstack(TraceStream &TraceIt,
657 SmallVectorImpl<uint64_t> &CallStack);
658 // Extract LBR stack from one perf trace line
659 bool extractLBRStack(TraceStream &TraceIt,
660 SmallVectorImpl<LBREntry> &LBRStack);
661 uint64_t parseAggregatedCount(TraceStream &TraceIt);
662 // Parse one sample from multiple perf lines, override this for different
663 // sample type
664 void parseSample(TraceStream &TraceIt);
665 // An aggregated count is given to indicate how many times the sample is
666 // repeated.
667 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
668 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
669 // Post process the profile after trace aggregation, we will do simple range
670 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
671 virtual void generateUnsymbolizedProfile();
672 void writeUnsymbolizedProfile(StringRef Filename);
673 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
674
675 // Samples with the repeating time generated by the perf reader
676 AggregatedCounter AggregatedSamples;
677 // Keep track of all invalid return addresses
678 std::set<uint64_t> InvalidReturnAddresses;
679 // PID for the process of interest
680 std::optional<int32_t> PIDFilter;
681};
682
683/*
684 The reader of LBR only perf script.
685 A typical LBR sample is like:
686 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
687 ... 0x4005c8/0x4005dc/P/-/-/0
688*/
689class LBRPerfReader : public PerfScriptReader {
690public:
691 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
692 std::optional<int32_t> PID)
693 : PerfScriptReader(Binary, PerfTrace, PID) {};
694 // Parse the LBR only sample.
695 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
696};
697
698/*
699 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
700 which is used to generate CS profile. An example of hybrid sample:
701 4005dc # call stack leaf
702 400634
703 400684 # call stack root
704 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
705 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
706*/
707class HybridPerfReader : public PerfScriptReader {
708public:
709 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
710 std::optional<int32_t> PID)
711 : PerfScriptReader(Binary, PerfTrace, PID) {};
712 // Parse the hybrid sample including the call and LBR line
713 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
714 void generateUnsymbolizedProfile() override;
715
716private:
717 // Unwind the hybrid samples after aggregration
718 void unwindSamples();
719};
720
721/*
722 Format of unsymbolized profile:
723
724 [frame1 @ frame2 @ ...] # If it's a CS profile
725 number of entries in RangeCounter
726 from_1-to_1:count_1
727 from_2-to_2:count_2
728 ......
729 from_n-to_n:count_n
730 number of entries in BranchCounter
731 src_1->dst_1:count_1
732 src_2->dst_2:count_2
733 ......
734 src_n->dst_n:count_n
735 [frame1 @ frame2 @ ...] # Next context
736 ......
737
738Note that non-CS profile doesn't have the empty `[]` context.
739*/
740class UnsymbolizedProfileReader : public PerfReaderBase {
741public:
742 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
743 : PerfReaderBase(Binary, PerfTrace){};
744 void parsePerfTraces() override;
745
746private:
747 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
748 void readUnsymbolizedProfile(StringRef Filename);
749
750 StringSet<> ContextStrSet;
751};
752
753class ETMReader {
754public:
755 ETMReader(ProfiledBinary *Binary, StringRef TraceFile, uint8_t TraceID)
756 : Binary(Binary), TraceFile(TraceFile), TraceID(TraceID) {}
757 void parseETMTraces();
758 void recordProcessedRange(uint64_t Start, uint64_t End, uint64_t Count);
759 const ContextSampleCounterMap &getSampleCounters() const { return Counters; }
760
761private:
762 ProfiledBinary *Binary = nullptr;
763 StringRef TraceFile;
764 uint8_t TraceID;
765 ContextSampleCounterMap Counters;
766};
767
768} // end namespace sampleprof
769} // end namespace llvm
770
771#endif
772