1//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "PerfReader.h"
9#include "ErrorHandling.h"
10#include "Options.h"
11#include "ProfileGenerator.h"
12#include "llvm/ADT/SmallString.h"
13#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14#include "llvm/ProfileData/ETMTraceDecoder.h"
15#include "llvm/Support/FileSystem.h"
16#include "llvm/Support/LineIterator.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/Support/Process.h"
19#include "llvm/Support/Timer.h"
20#include "llvm/Support/ToolOutputFile.h"
21#include "llvm/TargetParser/Triple.h"
22
23#define DEBUG_TYPE "perf-reader"
24
25namespace llvm {
26
27cl::opt<bool> SkipSymbolization("skip-symbolization",
28 cl::desc("Dump the unsymbolized profile to the "
29 "output file. It will show unwinder "
30 "output for CS profile generation."),
31 cl::cat(ProfGenCategory));
32
33static cl::opt<bool> ShowMmapEvents("show-mmap-events",
34 cl::desc("Print binary load events."),
35 cl::cat(ProfGenCategory));
36
37static cl::opt<bool>
38 UseOffset("use-offset", cl::init(Val: true),
39 cl::desc("Work with `--skip-symbolization` or "
40 "`--unsymbolized-profile` to write/read the "
41 "offset instead of virtual address."),
42 cl::cat(ProfGenCategory));
43
44static cl::opt<bool> UseLoadableSegmentAsBase(
45 "use-first-loadable-segment-as-base",
46 cl::desc("Use first loadable segment address as base address "
47 "for offsets in unsymbolized profile. By default "
48 "first executable segment address is used"),
49 cl::cat(ProfGenCategory));
50
51static cl::opt<bool>
52 IgnoreStackSamples("ignore-stack-samples",
53 cl::desc("Ignore call stack samples for hybrid samples "
54 "and produce context-insensitive profile."),
55 cl::cat(ProfGenCategory));
56cl::opt<bool> ShowDetailedWarning("show-detailed-warning",
57 cl::desc("Show detailed warning message."),
58 cl::cat(ProfGenCategory));
59
60static cl::opt<int> CSProfMaxUnsymbolizedCtxDepth(
61 "csprof-max-unsymbolized-context-depth", cl::init(Val: -1),
62 cl::desc("Keep the last K contexts while merging unsymbolized profile. -1 "
63 "means no depth limit."),
64 cl::cat(ProfGenCategory));
65
66cl::opt<bool> TimeProfGen("time-profgen", cl::desc("Time llvm-profgen phases"),
67 cl::init(Val: false), cl::cat(ProfGenCategory));
68
69static const char *TimerGroupName = "profgen";
70static const char *TimerGroupDesc = "llvm-profgen";
71
72namespace sampleprof {
73
74void VirtualUnwinder::unwindCall(UnwindState &State) {
75 uint64_t Source = State.getCurrentLBRSource();
76 auto *ParentFrame = State.getParentFrame();
77 // The 2nd frame after leaf could be missing if stack sample is
78 // taken when IP is within prolog/epilog, as frame chain isn't
79 // setup yet. Fill in the missing frame in that case.
80 // TODO: Currently we just assume all the addr that can't match the
81 // 2nd frame is in prolog/epilog. In the future, we will switch to
82 // pro/epi tracker(Dwarf CFI) for the precise check.
83 if (ParentFrame == State.getDummyRootPtr() ||
84 ParentFrame->Address != Source) {
85 State.switchToFrame(Address: Source);
86 if (ParentFrame != State.getDummyRootPtr()) {
87 if (Source == ExternalAddr)
88 NumMismatchedExtCallBranch++;
89 else
90 NumMismatchedProEpiBranch++;
91 }
92 } else {
93 State.popFrame();
94 }
95 State.InstPtr.update(Addr: Source);
96}
97
98void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
99 InstructionPointer &IP = State.InstPtr;
100 uint64_t Target = State.getCurrentLBRTarget();
101 uint64_t End = IP.Address;
102
103 if (End == ExternalAddr && Target == ExternalAddr) {
104 // Filter out the case when leaf external frame matches the external LBR
105 // target, this is a valid state, it happens that the code run into external
106 // address then return back. The call frame under the external frame
107 // remains valid and can be unwound later, just skip recording this range.
108 NumPairedExtAddr++;
109 return;
110 }
111
112 if (End == ExternalAddr || Target == ExternalAddr) {
113 // Range is invalid if only one point is external address. This means LBR
114 // traces contains a standalone external address failing to pair another
115 // one, likely due to interrupt jmp or broken perf script. Set the
116 // state to invalid.
117 NumUnpairedExtAddr++;
118 State.setInvalid();
119 return;
120 }
121
122 if (!isValidFallThroughRange(Start: Target, End, Binary)) {
123 // Skip unwinding the rest of LBR trace when a bogus range is seen.
124 State.setInvalid();
125 return;
126 }
127
128 if (Binary->usePseudoProbes()) {
129 // We don't need to top frame probe since it should be extracted
130 // from the range.
131 // The outcome of the virtual unwinding with pseudo probes is a
132 // map from a context key to the address range being unwound.
133 // This means basically linear unwinding is not needed for pseudo
134 // probes. The range will be simply recorded here and will be
135 // converted to a list of pseudo probes to report in ProfileGenerator.
136 State.getParentFrame()->recordRangeCount(Start: Target, End, Count: Repeat);
137 } else {
138 // Unwind linear execution part.
139 // Split and record the range by different inline context. For example:
140 // [0x01] ... main:1 # Target
141 // [0x02] ... main:2
142 // [0x03] ... main:3 @ foo:1
143 // [0x04] ... main:3 @ foo:2
144 // [0x05] ... main:3 @ foo:3
145 // [0x06] ... main:4
146 // [0x07] ... main:5 # End
147 // It will be recorded:
148 // [main:*] : [0x06, 0x07], [0x01, 0x02]
149 // [main:3 @ foo:*] : [0x03, 0x05]
150 while (IP.Address > Target) {
151 uint64_t PrevIP = IP.Address;
152 IP.backward();
153 // Break into segments for implicit call/return due to inlining
154 bool SameInlinee = Binary->inlineContextEqual(Add1: PrevIP, Add2: IP.Address);
155 if (!SameInlinee) {
156 State.switchToFrame(Address: PrevIP);
157 State.CurrentLeafFrame->recordRangeCount(Start: PrevIP, End, Count: Repeat);
158 End = IP.Address;
159 }
160 }
161 assert(IP.Address == Target && "The last one must be the target address.");
162 // Record the remaining range, [0x01, 0x02] in the example
163 State.switchToFrame(Address: IP.Address);
164 State.CurrentLeafFrame->recordRangeCount(Start: IP.Address, End, Count: Repeat);
165 }
166}
167
168void VirtualUnwinder::unwindReturn(UnwindState &State) {
169 // Add extra frame as we unwind through the return
170 const LBREntry &LBR = State.getCurrentLBR();
171 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr: LBR.Target);
172 State.switchToFrame(Address: CallAddr);
173 State.pushFrame(Address: LBR.Source);
174 State.InstPtr.update(Addr: LBR.Source);
175}
176
177void VirtualUnwinder::unwindBranch(UnwindState &State) {
178 // TODO: Tolerate tail call for now, as we may see tail call from libraries.
179 // This is only for intra function branches, excluding tail calls.
180 uint64_t Source = State.getCurrentLBRSource();
181 State.switchToFrame(Address: Source);
182 State.InstPtr.update(Addr: Source);
183}
184
185std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
186 std::shared_ptr<StringBasedCtxKey> KeyStr =
187 std::make_shared<StringBasedCtxKey>();
188 KeyStr->Context = Binary->getExpandedContext(Stack, WasLeafInlined&: KeyStr->WasLeafInlined);
189 return KeyStr;
190}
191
192std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
193 std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
194 KeyStr->Context = Stack;
195 CSProfileGenerator::compressRecursionContext<uint64_t>(Context&: KeyStr->Context);
196 // MaxContextDepth(--csprof-max-context-depth) is used to trim both symbolized
197 // and unsymbolized profile context. Sometimes we want to at least preserve
198 // the inlinings for the leaf frame(the profiled binary inlining),
199 // --csprof-max-context-depth may not be flexible enough, in this case,
200 // --csprof-max-unsymbolized-context-depth is used to limit the context for
201 // unsymbolized profile. If both are set, use the minimum of them.
202 int Depth = CSProfileGenerator::MaxContextDepth != -1
203 ? CSProfileGenerator::MaxContextDepth
204 : KeyStr->Context.size();
205 Depth = CSProfMaxUnsymbolizedCtxDepth != -1
206 ? std::min(a: static_cast<int>(CSProfMaxUnsymbolizedCtxDepth), b: Depth)
207 : Depth;
208 CSProfileGenerator::trimContext<uint64_t>(S&: KeyStr->Context, Depth);
209 return KeyStr;
210}
211
212template <typename T>
213void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
214 T &Stack) {
215 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
216 return;
217
218 std::shared_ptr<ContextKey> Key = Stack.getContextKey();
219 if (Key == nullptr)
220 return;
221 auto Ret = CtxCounterMap->try_emplace(Key: Hashable<ContextKey>(Key));
222 SampleCounter &SCounter = Ret.first->second;
223 for (auto &I : Cur->RangeSamples)
224 SCounter.recordRangeCount(Start: std::get<0>(t&: I), End: std::get<1>(t&: I), Repeat: std::get<2>(t&: I));
225
226 for (auto &I : Cur->BranchSamples)
227 SCounter.recordBranchCount(Source: std::get<0>(t&: I), Target: std::get<1>(t&: I), Repeat: std::get<2>(t&: I));
228}
229
230template <typename T>
231void VirtualUnwinder::collectSamplesFromFrameTrie(
232 UnwindState::ProfiledFrame *Cur, T &Stack) {
233 if (!Cur->isDummyRoot()) {
234 // Truncate the context for external frame since this isn't a real call
235 // context the compiler will see.
236 if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) {
237 // Process truncated context
238 // Start a new traversal ignoring its bottom context
239 T EmptyStack(Binary);
240 collectSamplesFromFrame(Cur, EmptyStack);
241 for (const auto &Item : Cur->Children) {
242 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
243 }
244
245 // Keep note of untracked call site and deduplicate them
246 // for warning later.
247 if (!Cur->isLeafFrame())
248 UntrackedCallsites.insert(x: Cur->Address);
249
250 return;
251 }
252 }
253
254 collectSamplesFromFrame(Cur, Stack);
255 // Process children frame
256 for (const auto &Item : Cur->Children) {
257 collectSamplesFromFrameTrie(Item.second.get(), Stack);
258 }
259 // Recover the call stack
260 Stack.popFrame();
261}
262
263void VirtualUnwinder::collectSamplesFromFrameTrie(
264 UnwindState::ProfiledFrame *Cur) {
265 if (Binary->usePseudoProbes()) {
266 AddressStack Stack(Binary);
267 collectSamplesFromFrameTrie<AddressStack>(Cur, Stack);
268 } else {
269 FrameStack Stack(Binary);
270 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
271 }
272}
273
274void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
275 UnwindState &State, uint64_t Repeat) {
276 if (Branch.Target == ExternalAddr)
277 return;
278
279 // Record external-to-internal pattern on the trie root, it later can be
280 // used for generating head samples.
281 if (Branch.Source == ExternalAddr) {
282 State.getDummyRootPtr()->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
283 Count: Repeat);
284 return;
285 }
286
287 if (Binary->usePseudoProbes()) {
288 // Same as recordRangeCount, We don't need to top frame probe since we will
289 // extract it from branch's source address
290 State.getParentFrame()->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
291 Count: Repeat);
292 } else {
293 State.CurrentLeafFrame->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
294 Count: Repeat);
295 }
296}
297
298bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
299 // Capture initial state as starting point for unwinding.
300 UnwindState State(Sample, Binary);
301
302 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
303 // Stack sample sometimes can be unreliable, so filter out bogus ones.
304 if (!State.validateInitialState())
305 return false;
306
307 NumTotalBranches += State.LBRStack.size();
308 // Now process the LBR samples in parrallel with stack sample
309 // Note that we do not reverse the LBR entry order so we can
310 // unwind the sample stack as we walk through LBR entries.
311 while (State.hasNextLBR()) {
312 State.checkStateConsistency();
313
314 // Do not attempt linear unwind for the leaf range as it's incomplete.
315 if (!State.IsLastLBR()) {
316 // Unwind implicit calls/returns from inlining, along the linear path,
317 // break into smaller sub section each with its own calling context.
318 unwindLinear(State, Repeat);
319 }
320
321 // Save the LBR branch before it gets unwound.
322 const LBREntry &Branch = State.getCurrentLBR();
323 if (isCallState(State)) {
324 // Unwind calls - we know we encountered call if LBR overlaps with
325 // transition between leaf the 2nd frame. Note that for calls that
326 // were not in the original stack sample, we should have added the
327 // extra frame when processing the return paired with this call.
328 unwindCall(State);
329 } else if (isReturnState(State)) {
330 // Unwind returns - check whether the IP is indeed at a return
331 // instruction
332 unwindReturn(State);
333 } else if (isValidState(State)) {
334 // Unwind branches
335 unwindBranch(State);
336 } else {
337 // Skip unwinding the rest of LBR trace. Reset the stack and update the
338 // state so that the rest of the trace can still be processed as if they
339 // do not have stack samples.
340 State.clearCallStack();
341 State.InstPtr.update(Addr: State.getCurrentLBRSource());
342 State.pushFrame(Address: State.InstPtr.Address);
343 }
344
345 State.advanceLBR();
346 // Record `branch` with calling context after unwinding.
347 recordBranchCount(Branch, State, Repeat);
348 }
349 // As samples are aggregated on trie, record them into counter map
350 collectSamplesFromFrameTrie(Cur: State.getDummyRootPtr());
351
352 return true;
353}
354
355std::unique_ptr<PerfReaderBase>
356PerfReaderBase::create(ProfiledBinary *Binary, InputFile &Input,
357 std::optional<int32_t> PIDFilter) {
358 std::unique_ptr<PerfReaderBase> PerfReader;
359
360 if (Input.Format == InputFormat::UnsymbolizedProfile) {
361 PerfReader.reset(
362 p: new UnsymbolizedProfileReader(Binary, Input.InputFilePath));
363 return PerfReader;
364 }
365
366 // For perf data input, we need to convert them into perf script first.
367 // If this is a kernel perf file, there is no need for retrieving PIDs.
368 if (Input.Format == InputFormat::PerfData)
369 Input = PerfScriptReader::convertPerfDataToTrace(Binary, SkipPID: Binary->isKernel(),
370 File&: Input, PIDFilter);
371
372 assert((Input.Format == InputFormat::PerfScript) &&
373 "Should be a perfscript!");
374
375 Input.Content = PerfScriptReader::checkPerfScriptType(FileName: Input.InputFilePath);
376 if (Input.Content == PerfContent::LBRStack) {
377 PerfReader.reset(
378 p: new HybridPerfReader(Binary, Input.InputFilePath, PIDFilter));
379 } else if (Input.Content == PerfContent::LBR) {
380 PerfReader.reset(p: new LBRPerfReader(Binary, Input.InputFilePath, PIDFilter));
381 } else {
382 exitWithError(Message: "Unsupported perfscript!");
383 }
384
385 return PerfReader;
386}
387
388Error PerfReaderBase::parseDataAccessPerfTraces(
389 StringRef DataAccessPerfTraceFile, std::optional<int32_t> PIDFilter) {
390 // A perf_record_sample line is like
391 // . 1282514022939813 0x87b0 [0x60]: PERF_RECORD_SAMPLE(IP, 0x4002):
392 // 3446532/3446532: 0x2608a2 period: 233 addr: 0x3b3fb0
393 constexpr static StringRef DataAccessSamplePattern =
394 "PERF_RECORD_SAMPLE\\([A-Za-z]+, 0x[0-9a-fA-F]+\\): "
395 "([0-9]+)\\/[0-9]+: 0x([0-9a-fA-F]+) period: [0-9]+ addr: "
396 "0x([0-9a-fA-F]+)";
397
398 llvm::Regex LogRegex(DataAccessSamplePattern);
399
400 auto BufferOrErr = MemoryBuffer::getFile(Filename: DataAccessPerfTraceFile);
401 std::error_code EC = BufferOrErr.getError();
402 if (EC)
403 return make_error<StringError>(Args: "Failed to open perf trace file: " +
404 DataAccessPerfTraceFile,
405 Args: inconvertibleErrorCode());
406
407 assert(!SampleCounters.empty() && "Sample counters should not be empty!");
408 SampleCounter &Counter = SampleCounters.begin()->second;
409 line_iterator LineIt(*BufferOrErr.get(), true);
410
411 for (; !LineIt.is_at_eof(); ++LineIt) {
412 StringRef Line = *LineIt;
413
414 MMapEvent MMap;
415 if (Line.contains(Other: "PERF_RECORD_MMAP2")) {
416 if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) {
417 if (!MMap.MemProtectionFlag.contains(Other: "x")) {
418 if (Error E = Binary->addMMapNonTextEvent(Event: MMap)) {
419 return E;
420 }
421 }
422 }
423 continue;
424 }
425
426 SmallVector<StringRef> Fields;
427 if (LogRegex.match(String: Line, Matches: &Fields)) {
428 int32_t PID = 0;
429 if (Fields[1].getAsInteger(Radix: 10, Result&: PID))
430 return make_error<StringError>(
431 Args: "Failed to parse PID from perf trace line: " + Line,
432 Args: inconvertibleErrorCode());
433
434 if (PIDFilter.has_value() && *PIDFilter != PID) {
435 continue;
436 }
437
438 uint64_t DataAddress = 0;
439 if (Fields[3].getAsInteger(Radix: 16, Result&: DataAddress))
440 return make_error<StringError>(
441 Args: "Failed to parse data address from perf trace line: " + Line,
442 Args: inconvertibleErrorCode());
443 // Out of all the memory access events, the vtable accesses are used to
444 // construct type profiles. We assume that this is under the Itanium
445 // C++ ABI so we can use `_ZTV` prefix to identify vtable.
446 StringRef DataSymbol = Binary->symbolizeDataAddress(
447 Address: Binary->CanonicalizeNonTextAddress(Address: DataAddress));
448 if (DataSymbol.starts_with(Prefix: "_ZTV")) {
449 uint64_t IP = 0;
450 Fields[2].getAsInteger(Radix: 16, Result&: IP);
451 Counter.recordDataAccessCount(InstAddr: Binary->canonicalizeVirtualAddress(Address: IP),
452 DataSymbol, Repeat: 1);
453 }
454 }
455 }
456 return Error::success();
457}
458
459InputFile
460PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
461 InputFile &File,
462 std::optional<int32_t> PIDFilter) {
463 StringRef PerfData = File.InputFilePath;
464 // Run perf script to retrieve PIDs matching binary we're interested in.
465 auto PerfExecutable = sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf");
466 if (!PerfExecutable) {
467 exitWithError(Message: "Perf not found.");
468 }
469 std::string PerfPath = *PerfExecutable;
470 SmallString<128> PerfTraceFile;
471 sys::fs::createUniquePath(Model: "perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
472 ResultPath&: PerfTraceFile, /*MakeAbsolute=*/true);
473 std::string ErrorFile = std::string(PerfTraceFile) + ".err";
474 std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin
475 StringRef(PerfTraceFile), // Stdout
476 StringRef(ErrorFile)}; // Stderr
477 PerfScriptReader::TempFileCleanups.emplace_back(Args&: PerfTraceFile);
478 PerfScriptReader::TempFileCleanups.emplace_back(Args&: ErrorFile);
479
480 std::string PIDs;
481 if (!SkipPID) {
482 StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
483 "-F", "comm,pid", "-i",
484 PerfData};
485 sys::ExecuteAndWait(Program: PerfPath, Args: ScriptMMapArgs, Env: std::nullopt, Redirects);
486
487 // Collect the PIDs
488 TraceStream TraceIt(PerfTraceFile);
489 DenseSet<int32_t> PIDSet;
490 while (!TraceIt.isAtEoF()) {
491 MMapEvent MMap;
492 if (isMMapEvent(Line: TraceIt.getCurrentLine()) &&
493 extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) {
494 auto It = PIDSet.insert(V: MMap.PID);
495 if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
496 if (!PIDs.empty()) {
497 PIDs.append(s: ",");
498 }
499 PIDs.append(str: utostr(X: MMap.PID));
500 }
501 }
502 TraceIt.advance();
503 }
504
505 if (PIDs.empty()) {
506 exitWithError(Message: "No relevant mmap event is found in perf data.");
507 }
508 }
509
510 // Run perf script again to retrieve events for PIDs collected above
511 SmallVector<StringRef, 8> ScriptSampleArgs;
512 ScriptSampleArgs.push_back(Elt: PerfPath);
513 ScriptSampleArgs.push_back(Elt: "script");
514 ScriptSampleArgs.push_back(Elt: "--show-mmap-events");
515 ScriptSampleArgs.push_back(Elt: "-F");
516 ScriptSampleArgs.push_back(Elt: "ip,brstack");
517 ScriptSampleArgs.push_back(Elt: "-i");
518 ScriptSampleArgs.push_back(Elt: PerfData);
519 if (!PIDs.empty()) {
520 ScriptSampleArgs.push_back(Elt: "--pid");
521 ScriptSampleArgs.push_back(Elt: PIDs);
522 }
523 sys::ExecuteAndWait(Program: PerfPath, Args: ScriptSampleArgs, Env: std::nullopt, Redirects);
524
525 return {.InputFilePath: std::string(PerfTraceFile), .Format: InputFormat::PerfScript,
526 .Content: PerfContent::UnknownContent};
527}
528
529static StringRef filename(StringRef Path, bool UseBackSlash) {
530 llvm::sys::path::Style PathStyle =
531 UseBackSlash ? llvm::sys::path::Style::windows_backslash
532 : llvm::sys::path::Style::native;
533 StringRef FileName = llvm::sys::path::filename(path: Path, style: PathStyle);
534
535 // In case this file use \r\n as newline.
536 if (UseBackSlash && FileName.back() == '\r')
537 return FileName.drop_back();
538
539 return FileName;
540}
541
542void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
543 // Drop the event which doesn't belong to user-provided binary
544 StringRef BinaryName = filename(Path: Event.BinaryPath, UseBackSlash: Binary->isCOFF());
545 bool IsKernel = Binary->isKernel();
546 if (!IsKernel && Binary->getName() != BinaryName)
547 return;
548 if (IsKernel && !Binary->isKernelImageName(BinaryName))
549 return;
550
551 // Drop the event if process does not match pid filter
552 if (PIDFilter && Event.PID != *PIDFilter)
553 return;
554
555 // Drop the event if its image is loaded at the same address
556 if (Event.Address == Binary->getBaseAddress()) {
557 Binary->setIsLoadedByMMap(true);
558 return;
559 }
560
561 if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
562 // A binary image could be unloaded and then reloaded at different
563 // place, so update binary load address.
564 // Only update for the first executable segment and assume all other
565 // segments are loaded at consecutive memory addresses, which is the case on
566 // X64.
567 Binary->setBaseAddress(Event.Address);
568 Binary->setIsLoadedByMMap(true);
569 } else {
570 // Verify segments are loaded consecutively.
571 const auto &Offsets = Binary->getTextSegmentOffsets();
572 auto It = llvm::lower_bound(Range: Offsets, Value: Event.Offset);
573 if (It != Offsets.end() && *It == Event.Offset) {
574 // The event is for loading a separate executable segment.
575 auto I = std::distance(first: Offsets.begin(), last: It);
576 const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses();
577 if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() !=
578 Event.Address - Binary->getBaseAddress())
579 exitWithError(Message: "Executable segments not loaded consecutively");
580 } else {
581 if (It == Offsets.begin())
582 exitWithError(Message: "File offset not found");
583 else {
584 // Find the segment the event falls in. A large segment could be loaded
585 // via multiple mmap calls with consecutive memory addresses.
586 --It;
587 assert(*It < Event.Offset);
588 if (Event.Offset - *It != Event.Address - Binary->getBaseAddress())
589 exitWithError(Message: "Segment not loaded by consecutive mmaps");
590 }
591 }
592 }
593}
594
595static std::string getContextKeyStr(ContextKey *K,
596 const ProfiledBinary *Binary) {
597 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(Val: K)) {
598 return SampleContext::getContextString(Context: CtxKey->Context);
599 } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: K)) {
600 std::ostringstream OContextStr;
601 for (uint32_t I = 0; I < CtxKey->Context.size(); I++) {
602 if (OContextStr.str().size())
603 OContextStr << " @ ";
604 uint64_t Address = CtxKey->Context[I];
605 if (UseOffset) {
606 if (UseLoadableSegmentAsBase)
607 Address -= Binary->getFirstLoadableAddress();
608 else
609 Address -= Binary->getPreferredBaseAddress();
610 }
611 OContextStr << "0x"
612 << utohexstr(X: Address,
613 /*LowerCase=*/true);
614 }
615 return OContextStr.str();
616 } else {
617 llvm_unreachable("unexpected key type");
618 }
619}
620
621void HybridPerfReader::unwindSamples() {
622 NamedRegionTimer T("unwind", "Unwind samples", TimerGroupName, TimerGroupDesc,
623 TimeProfGen);
624 VirtualUnwinder Unwinder(&SampleCounters, Binary);
625 for (const auto &Item : AggregatedSamples) {
626 const PerfSample *Sample = Item.first.getPtr();
627 Unwinder.unwind(Sample, Repeat: Item.second);
628 }
629
630 // Warn about untracked frames due to missing probes.
631 if (ShowDetailedWarning) {
632 for (auto Address : Unwinder.getUntrackedCallsites())
633 WithColor::warning() << "Profile context truncated due to missing probe "
634 << "for call instruction at "
635 << format(Fmt: "0x%" PRIx64, Vals: Address) << "\n";
636 }
637
638 emitWarningSummary(Num: Unwinder.getUntrackedCallsites().size(),
639 Total: SampleCounters.size(),
640 Msg: "of profiled contexts are truncated due to missing probe "
641 "for call instruction.");
642
643 emitWarningSummary(
644 Num: Unwinder.NumMismatchedExtCallBranch, Total: Unwinder.NumTotalBranches,
645 Msg: "of branches'source is a call instruction but doesn't match call frame "
646 "stack, likely due to unwinding error of external frame.");
647
648 emitWarningSummary(Num: Unwinder.NumPairedExtAddr * 2, Total: Unwinder.NumTotalBranches,
649 Msg: "of branches containing paired external address.");
650
651 emitWarningSummary(Num: Unwinder.NumUnpairedExtAddr, Total: Unwinder.NumTotalBranches,
652 Msg: "of branches containing external address but doesn't have "
653 "another external address to pair, likely due to "
654 "interrupt jmp or broken perf script.");
655
656 emitWarningSummary(
657 Num: Unwinder.NumMismatchedProEpiBranch, Total: Unwinder.NumTotalBranches,
658 Msg: "of branches'source is a call instruction but doesn't match call frame "
659 "stack, likely due to frame in prolog/epilog.");
660
661 emitWarningSummary(Num: Unwinder.NumMissingExternalFrame,
662 Total: Unwinder.NumExtCallBranch,
663 Msg: "of artificial call branches but doesn't have an external "
664 "frame to match.");
665}
666
667/// Parse a hex address from \p Str.
668static bool parseAddress(StringRef Str, uint64_t &Addr, bool HasPrefix) {
669 if (Str.consume_front(Prefix: "0x") != HasPrefix)
670 return true;
671 return Str.getAsInteger(Radix: 16, Result&: Addr);
672}
673
674bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
675 SmallVectorImpl<LBREntry> &LBRStack) {
676 // The raw format of LBR stack is like:
677 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
678 // ... 0x4005c8/0x4005dc/P/-/-/0
679 // It's in FIFO order and separated by whitespace.
680 SmallVector<StringRef, 32> Records;
681 TraceIt.getCurrentLine().rtrim().split(A&: Records, Separator: " ", MaxSplit: -1, KeepEmpty: false);
682 auto WarnInvalidLBR = [](TraceStream &TraceIt) {
683 WithColor::warning() << "Invalid address in LBR record at line "
684 << TraceIt.getLineNumber() << ": "
685 << TraceIt.getCurrentLine() << "\n";
686 };
687
688 // Skip the leading instruction pointer.
689 size_t Index = 0;
690 uint64_t LeadingAddr;
691 if (!Records.empty() && !Records[0].contains(C: '/')) {
692 if (parseAddress(Str: Records[0], Addr&: LeadingAddr, HasPrefix: false)) {
693 WarnInvalidLBR(TraceIt);
694 TraceIt.advance();
695 return false;
696 }
697 Index = 1;
698 }
699
700 // Now extract LBR samples - note that we do not reverse the
701 // LBR entry order so we can unwind the sample stack as we walk
702 // through LBR entries.
703 while (Index < Records.size()) {
704 auto &Token = Records[Index++];
705 if (Token.size() == 0)
706 continue;
707
708 SmallVector<StringRef, 8> Addresses;
709 Token.split(A&: Addresses, Separator: "/");
710 uint64_t Src;
711 uint64_t Dst;
712
713 // Stop at broken LBR records.
714 if (Addresses.size() < 2 || parseAddress(Str: Addresses[0], Addr&: Src, HasPrefix: true) ||
715 parseAddress(Str: Addresses[1], Addr&: Dst, HasPrefix: true)) {
716 WarnInvalidLBR(TraceIt);
717 break;
718 }
719
720 // Canonicalize to use preferred load address as base address.
721 Src = Binary->canonicalizeVirtualAddress(Address: Src);
722 Dst = Binary->canonicalizeVirtualAddress(Address: Dst);
723 bool SrcIsInternal = Binary->addressIsCode(Address: Src);
724 bool DstIsInternal = Binary->addressIsCode(Address: Dst);
725 if (!SrcIsInternal)
726 Src = ExternalAddr;
727 if (!DstIsInternal)
728 Dst = ExternalAddr;
729 // Filter external-to-external case to reduce LBR trace size.
730 if (!SrcIsInternal && !DstIsInternal)
731 continue;
732
733 LBRStack.emplace_back(Args: LBREntry(Src, Dst));
734 }
735 TraceIt.advance();
736 return !LBRStack.empty();
737}
738
739bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
740 SmallVectorImpl<uint64_t> &CallStack) {
741 // The raw format of call stack is like:
742 // 4005dc # leaf frame
743 // 400634
744 // 400684 # root frame
745 // It's in bottom-up order with each frame in one line.
746
747 // Extract stack frames from sample
748 while (!TraceIt.isAtEoF() && !isLBRSample(Line: TraceIt.getCurrentLine(), CheckLineStart: true)) {
749 StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
750 uint64_t FrameAddr = 0;
751 if (parseAddress(Str: FrameStr, Addr&: FrameAddr, HasPrefix: false)) {
752 // We might parse a non-perf sample line like empty line and comments,
753 // skip it
754 TraceIt.advance();
755 return false;
756 }
757 TraceIt.advance();
758
759 FrameAddr = Binary->canonicalizeVirtualAddress(Address: FrameAddr);
760 // Currently intermixed frame from different binaries is not supported.
761 if (!Binary->addressIsCode(Address: FrameAddr)) {
762 if (CallStack.empty())
763 NumLeafExternalFrame++;
764 // Push a special value(ExternalAddr) for the external frames so that
765 // unwinder can still work on this with artificial Call/Return branch.
766 // After unwinding, the context will be truncated for external frame.
767 // Also deduplicate the consecutive external addresses.
768 if (CallStack.empty() || CallStack.back() != ExternalAddr)
769 CallStack.emplace_back(Args: ExternalAddr);
770 continue;
771 }
772
773 // We need to translate return address to call address for non-leaf frames.
774 if (!CallStack.empty()) {
775 auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
776 if (!CallAddr) {
777 // Stop at an invalid return address caused by bad unwinding. This could
778 // happen to frame-pointer-based unwinding and the callee functions that
779 // do not have the frame pointer chain set up.
780 InvalidReturnAddresses.insert(x: FrameAddr);
781 break;
782 }
783 FrameAddr = CallAddr;
784 }
785
786 CallStack.emplace_back(Args&: FrameAddr);
787 }
788
789 // Strip out the bottom external addr.
790 if (CallStack.size() > 1 && CallStack.back() == ExternalAddr)
791 CallStack.pop_back();
792
793 // Skip other unrelated line, find the next valid LBR line
794 // Note that even for empty call stack, we should skip the address at the
795 // bottom, otherwise the following pass may generate a truncated callstack
796 while (!TraceIt.isAtEoF() && !isLBRSample(Line: TraceIt.getCurrentLine(), CheckLineStart: true)) {
797 TraceIt.advance();
798 }
799 // Filter out broken stack sample. We may not have complete frame info
800 // if sample end up in prolog/epilog, the result is dangling context not
801 // connected to entry point. This should be relatively rare thus not much
802 // impact on overall profile quality. However we do want to filter them
803 // out to reduce the number of different calling contexts. One instance
804 // of such case - when sample landed in prolog/epilog, somehow stack
805 // walking will be broken in an unexpected way that higher frames will be
806 // missing.
807 return !CallStack.empty() &&
808 !Binary->addressInPrologEpilog(Address: CallStack.front());
809}
810
811void PerfScriptReader::warnIfMissingMMap() {
812 if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
813 WithColor::warning() << "No relevant mmap event is matched for "
814 << Binary->getName()
815 << ", will use preferred address ("
816 << format(Fmt: "0x%" PRIx64,
817 Vals: Binary->getPreferredBaseAddress())
818 << ") as the base loading address!\n";
819 // Avoid redundant warning, only warn at the first unmatched sample.
820 Binary->setMissingMMapWarned(true);
821 }
822}
823
824void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
825 // The raw hybird sample started with call stack in FILO order and followed
826 // intermediately by LBR sample
827 // e.g.
828 // 4005dc # call stack leaf
829 // 400634
830 // 400684 # call stack root
831 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
832 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
833 //
834 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
835#ifndef NDEBUG
836 Sample->Linenum = TraceIt.getLineNumber();
837#endif
838 // Parsing call stack and populate into PerfSample.CallStack
839 if (!extractCallstack(TraceIt, CallStack&: Sample->CallStack)) {
840 // Skip the next LBR line matched current call stack
841 if (!TraceIt.isAtEoF() && isLBRSample(Line: TraceIt.getCurrentLine(), CheckLineStart: true))
842 TraceIt.advance();
843 return;
844 }
845
846 warnIfMissingMMap();
847
848 if (!TraceIt.isAtEoF() && isLBRSample(Line: TraceIt.getCurrentLine(), CheckLineStart: true)) {
849 // Parsing LBR stack and populate into PerfSample.LBRStack
850 if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) {
851 if (IgnoreStackSamples) {
852 Sample->CallStack.clear();
853 } else {
854 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
855 // ranges
856 Sample->CallStack.front() = Sample->LBRStack[0].Target;
857 }
858 // Record samples by aggregation
859 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
860 }
861 } else {
862 // LBR sample is encoded in single line after stack sample
863 exitWithError(Message: "'Hybrid perf sample is corrupted, No LBR sample line");
864 }
865}
866
867void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) {
868 std::error_code EC;
869 raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
870 if (EC)
871 exitWithError(EC, Whence: Filename);
872 writeUnsymbolizedProfile(OS);
873}
874
875// Use ordered map to make the output deterministic
876using OrderedCounterForPrint = std::map<std::string, SampleCounter *>;
877
878void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) {
879 OrderedCounterForPrint OrderedCounters;
880 for (auto &CI : SampleCounters) {
881 OrderedCounters[getContextKeyStr(K: CI.first.getPtr(), Binary)] = &CI.second;
882 }
883
884 auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator,
885 uint32_t Indent) {
886 OS.indent(NumSpaces: Indent);
887 OS << Counter.size() << "\n";
888 for (auto &I : Counter) {
889 uint64_t Start = I.first.first;
890 uint64_t End = I.first.second;
891
892 if (UseOffset) {
893 if (UseLoadableSegmentAsBase) {
894 Start -= Binary->getFirstLoadableAddress();
895 End -= Binary->getFirstLoadableAddress();
896 } else {
897 Start -= Binary->getPreferredBaseAddress();
898 End -= Binary->getPreferredBaseAddress();
899 }
900 }
901
902 OS.indent(NumSpaces: Indent);
903 OS << Twine::utohexstr(Val: Start) << Separator << Twine::utohexstr(Val: End) << ":"
904 << I.second << "\n";
905 }
906 };
907
908 for (auto &CI : OrderedCounters) {
909 uint32_t Indent = 0;
910 if (ProfileIsCS) {
911 // Context string key
912 OS << "[" << CI.first << "]\n";
913 Indent = 2;
914 }
915
916 SampleCounter &Counter = *CI.second;
917 SCounterPrinter(Counter.RangeCounter, "-", Indent);
918 SCounterPrinter(Counter.BranchCounter, "->", Indent);
919 }
920}
921
922// Format of input:
923// number of entries in RangeCounter
924// from_1-to_1:count_1
925// from_2-to_2:count_2
926// ......
927// from_n-to_n:count_n
928// number of entries in BranchCounter
929// src_1->dst_1:count_1
930// src_2->dst_2:count_2
931// ......
932// src_n->dst_n:count_n
933void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
934 SampleCounter &SCounters) {
935 auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) {
936 std::string Msg = TraceIt.isAtEoF()
937 ? "Invalid raw profile!"
938 : "Invalid raw profile at line " +
939 Twine(TraceIt.getLineNumber()).str() + ": " +
940 TraceIt.getCurrentLine().str();
941 exitWithError(Message: Msg);
942 };
943 auto ReadNumber = [&](uint64_t &Num) {
944 if (TraceIt.isAtEoF())
945 exitWithErrorForTraceLine(TraceIt);
946 if (TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 10, Result&: Num))
947 exitWithErrorForTraceLine(TraceIt);
948 TraceIt.advance();
949 };
950
951 auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) {
952 uint64_t Num = 0;
953 ReadNumber(Num);
954 while (Num--) {
955 if (TraceIt.isAtEoF())
956 exitWithErrorForTraceLine(TraceIt);
957 StringRef Line = TraceIt.getCurrentLine().ltrim();
958
959 uint64_t Count = 0;
960 auto LineSplit = Line.split(Separator: ":");
961 if (LineSplit.second.empty() || LineSplit.second.getAsInteger(Radix: 10, Result&: Count))
962 exitWithErrorForTraceLine(TraceIt);
963
964 uint64_t Source = 0;
965 uint64_t Target = 0;
966 auto Range = LineSplit.first.split(Separator);
967 if (Range.second.empty() || Range.first.getAsInteger(Radix: 16, Result&: Source) ||
968 Range.second.getAsInteger(Radix: 16, Result&: Target))
969 exitWithErrorForTraceLine(TraceIt);
970
971 if (UseOffset) {
972 if (UseLoadableSegmentAsBase) {
973 Source += Binary->getFirstLoadableAddress();
974 Target += Binary->getFirstLoadableAddress();
975 } else {
976 Source += Binary->getPreferredBaseAddress();
977 Target += Binary->getPreferredBaseAddress();
978 }
979 }
980
981 Counter[{Source, Target}] += Count;
982 TraceIt.advance();
983 }
984 };
985
986 ReadCounter(SCounters.RangeCounter, "-");
987 ReadCounter(SCounters.BranchCounter, "->");
988}
989
990void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
991 TraceStream TraceIt(FileName);
992 while (!TraceIt.isAtEoF()) {
993 std::shared_ptr<StringBasedCtxKey> Key =
994 std::make_shared<StringBasedCtxKey>();
995 StringRef Line = TraceIt.getCurrentLine();
996 // Read context stack for CS profile.
997 if (Line.starts_with(Prefix: "[")) {
998 ProfileIsCS = true;
999 auto I = ContextStrSet.insert(key: Line);
1000 SampleContext::createCtxVectorFromStr(ContextStr: I.first->getKey(), Context&: Key->Context);
1001 TraceIt.advance();
1002 }
1003 auto Ret = SampleCounters.try_emplace(Key: Hashable<ContextKey>(Key));
1004 readSampleCounters(TraceIt, SCounters&: Ret.first->second);
1005 }
1006}
1007
1008void UnsymbolizedProfileReader::parsePerfTraces() {
1009 readUnsymbolizedProfile(FileName: PerfTraceFile);
1010}
1011
1012void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
1013 uint64_t Repeat) {
1014 SampleCounter &Counter = SampleCounters.begin()->second;
1015 uint64_t EndAddress = 0;
1016 for (const LBREntry &LBR : Sample->LBRStack) {
1017 uint64_t SourceAddress = LBR.Source;
1018 uint64_t TargetAddress = LBR.Target;
1019
1020 // Record the branch if its SourceAddress is external. It can be the case an
1021 // external source call an internal function, later this branch will be used
1022 // to generate the function's head sample.
1023 if (Binary->addressIsCode(Address: TargetAddress)) {
1024 Counter.recordBranchCount(Source: SourceAddress, Target: TargetAddress, Repeat);
1025 }
1026
1027 // If this not the first LBR, update the range count between TO of current
1028 // LBR and FROM of next LBR.
1029 uint64_t StartAddress = TargetAddress;
1030 if (Binary->addressIsCode(Address: StartAddress) &&
1031 Binary->addressIsCode(Address: EndAddress) &&
1032 isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary))
1033 Counter.recordRangeCount(Start: StartAddress, End: EndAddress, Repeat);
1034 EndAddress = SourceAddress;
1035 }
1036}
1037
1038void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
1039 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
1040 // Parsing LBR stack and populate into PerfSample.LBRStack
1041 if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) {
1042 warnIfMissingMMap();
1043 // Record LBR only samples by aggregation
1044 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
1045 }
1046}
1047
1048void PerfScriptReader::generateUnsymbolizedProfile() {
1049 // There is no context for LBR only sample, so initialize one entry with
1050 // fake "empty" context key.
1051 assert(SampleCounters.empty() &&
1052 "Sample counter map should be empty before raw profile generation");
1053 std::shared_ptr<StringBasedCtxKey> Key =
1054 std::make_shared<StringBasedCtxKey>();
1055 SampleCounters.try_emplace(Key: Hashable<ContextKey>(Key));
1056 for (const auto &Item : AggregatedSamples) {
1057 const PerfSample *Sample = Item.first.getPtr();
1058 computeCounterFromLBR(Sample, Repeat: Item.second);
1059 }
1060}
1061
1062uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
1063 // The aggregated count is optional, so do not skip the line and return 1 if
1064 // it's unmatched
1065 uint64_t Count = 1;
1066 if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: Count))
1067 TraceIt.advance();
1068 return Count;
1069}
1070
1071void PerfScriptReader::parseSample(TraceStream &TraceIt) {
1072 NumTotalSample++;
1073 uint64_t Count = parseAggregatedCount(TraceIt);
1074 assert(Count >= 1 && "Aggregated count should be >= 1!");
1075 parseSample(TraceIt, Count);
1076}
1077
1078bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
1079 StringRef Line,
1080 MMapEvent &MMap) {
1081 if (!Binary->isKernel() && !Line.contains(Other: Binary->getName()) &&
1082 !ShowMmapEvents)
1083 return false;
1084 // Parse a MMap2 line like:
1085 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
1086 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
1087 constexpr static const char *const MMap2Pattern =
1088 "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
1089 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
1090 "(0x[a-f0-9]+|0) .*\\]: ([-a-z]+) (.*)";
1091 // Parse a MMap line like
1092 // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
1093 // 0xffffffff81e00000]: x [kernel.kallsyms]_text
1094 constexpr static const char *const MMapPattern =
1095 "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
1096 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
1097 "(0x[a-f0-9]+|0)\\]: ([-a-z]+) (.*)";
1098 // Field 0 - whole line
1099 // Field 1 - PID
1100 // Field 2 - base address
1101 // Field 3 - mmapped size
1102 // Field 4 - page offset
1103 // Field 5 - binary path
1104 enum EventIndex {
1105 WHOLE_LINE = 0,
1106 PID = 1,
1107 MMAPPED_ADDRESS = 2,
1108 MMAPPED_SIZE = 3,
1109 PAGE_OFFSET = 4,
1110 MEM_PROTECTION_FLAG = 5,
1111 BINARY_PATH = 6,
1112 };
1113
1114 bool R = false;
1115 SmallVector<StringRef, 7> Fields;
1116 if (Line.contains(Other: "PERF_RECORD_MMAP2 ")) {
1117 Regex RegMmap2(MMap2Pattern);
1118 R = RegMmap2.match(String: Line, Matches: &Fields);
1119 } else if (Line.contains(Other: "PERF_RECORD_MMAP ")) {
1120 Regex RegMmap(MMapPattern);
1121 R = RegMmap.match(String: Line, Matches: &Fields);
1122 } else
1123 llvm_unreachable("unexpected MMAP event entry");
1124
1125 if (!R) {
1126 std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
1127 WithColor::warning() << WarningMsg;
1128 return false;
1129 }
1130 long long MMapPID = 0;
1131 getAsSignedInteger(Str: Fields[PID], Radix: 10, Result&: MMapPID);
1132 MMap.PID = MMapPID;
1133 Fields[MMAPPED_ADDRESS].getAsInteger(Radix: 0, Result&: MMap.Address);
1134 Fields[MMAPPED_SIZE].getAsInteger(Radix: 0, Result&: MMap.Size);
1135 Fields[PAGE_OFFSET].getAsInteger(Radix: 0, Result&: MMap.Offset);
1136 MMap.MemProtectionFlag = Fields[MEM_PROTECTION_FLAG];
1137 MMap.BinaryPath = Fields[BINARY_PATH];
1138 if (ShowMmapEvents) {
1139 outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
1140 << format(Fmt: "0x%" PRIx64 ":", Vals: MMap.Address) << " \n";
1141 }
1142
1143 StringRef BinaryName = filename(Path: MMap.BinaryPath, UseBackSlash: Binary->isCOFF());
1144 if (Binary->isKernel()) {
1145 return Binary->isKernelImageName(BinaryName);
1146 }
1147 return Binary->getName() == BinaryName;
1148}
1149
1150void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
1151 MMapEvent MMap;
1152 if (extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap))
1153 updateBinaryAddress(Event: MMap);
1154 TraceIt.advance();
1155}
1156
1157void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
1158 if (isMMapEvent(Line: TraceIt.getCurrentLine()))
1159 parseMMapEvent(TraceIt);
1160 else
1161 parseSample(TraceIt);
1162}
1163
1164void PerfScriptReader::parseAndAggregateTrace() {
1165 NamedRegionTimer T("parseTrace", "Parse and aggregate trace", TimerGroupName,
1166 TimerGroupDesc, TimeProfGen);
1167 // Trace line iterator
1168 TraceStream TraceIt(PerfTraceFile);
1169 while (!TraceIt.isAtEoF())
1170 parseEventOrSample(TraceIt);
1171}
1172
1173// A LBR sample is like:
1174// 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ...
1175// A heuristic for fast detection by checking whether a
1176// leading " 0x" and the '/' exist.
1177bool PerfScriptReader::isLBRSample(StringRef Line, bool CheckLineStart) {
1178 // Skip the leading instruction pointer
1179 SmallVector<StringRef, 32> Records;
1180 if (!CheckLineStart)
1181 Line = Line.trim();
1182 Line.split(A&: Records, Separator: " ", MaxSplit: 2, KeepEmpty: CheckLineStart);
1183 if (Records.size() < 2)
1184 return false;
1185 if (Records[1].starts_with(Prefix: "0x") && Records[1].contains(C: '/'))
1186 return true;
1187 return false;
1188}
1189
1190bool PerfScriptReader::isMMapEvent(StringRef Line) {
1191 // Short cut to avoid string find is possible.
1192 if (Line.empty() || Line.size() < 50)
1193 return false;
1194
1195 if (std::isdigit(Line[0]))
1196 return false;
1197
1198 // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
1199 // the line for ` perf script --show-mmap-events -i ...`
1200 return Line.contains(Other: "PERF_RECORD_MMAP");
1201}
1202
1203// The raw hybird sample is like
1204// e.g.
1205// 4005dc # call stack leaf
1206// 400634
1207// 400684 # call stack root
1208// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
1209// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
1210// Determine the perfscript contains hybrid samples(call stack + LBRs) by
1211// checking whether there is a non-empty call stack immediately followed by
1212// a LBR sample
1213PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) {
1214 TraceStream TraceIt(FileName);
1215 uint64_t FrameAddr = 0;
1216 while (!TraceIt.isAtEoF()) {
1217 // Skip the aggregated count
1218 if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: FrameAddr))
1219 TraceIt.advance();
1220
1221 // Detect sample with call stack
1222 int32_t Count = 0;
1223 while (!TraceIt.isAtEoF() &&
1224 !parseAddress(Str: TraceIt.getCurrentLine().ltrim(), Addr&: FrameAddr, HasPrefix: false)) {
1225 Count++;
1226 TraceIt.advance();
1227 }
1228 if (!TraceIt.isAtEoF()) {
1229 if (isLBRSample(Line: TraceIt.getCurrentLine(), CheckLineStart: false)) {
1230 if (Count > 0)
1231 return PerfContent::LBRStack;
1232 else
1233 return PerfContent::LBR;
1234 }
1235 TraceIt.advance();
1236 }
1237 }
1238
1239 exitWithError(Message: "Invalid perf script input!");
1240 return PerfContent::UnknownContent;
1241}
1242
1243void HybridPerfReader::generateUnsymbolizedProfile() {
1244 ProfileIsCS = !IgnoreStackSamples;
1245 if (ProfileIsCS)
1246 unwindSamples();
1247 else
1248 PerfScriptReader::generateUnsymbolizedProfile();
1249}
1250
1251void PerfScriptReader::warnTruncatedStack() {
1252 if (ShowDetailedWarning) {
1253 for (auto Address : InvalidReturnAddresses) {
1254 WithColor::warning()
1255 << "Truncated stack sample due to invalid return address at "
1256 << format(Fmt: "0x%" PRIx64, Vals: Address)
1257 << ", likely caused by frame pointer omission\n";
1258 }
1259 }
1260 emitWarningSummary(
1261 Num: InvalidReturnAddresses.size(), Total: AggregatedSamples.size(),
1262 Msg: "of truncated stack samples due to invalid return address, "
1263 "likely caused by frame pointer omission.");
1264}
1265
1266void PerfScriptReader::warnInvalidRange() {
1267 DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> Ranges;
1268
1269 for (const auto &Item : AggregatedSamples) {
1270 const PerfSample *Sample = Item.first.getPtr();
1271 uint64_t Count = Item.second;
1272 uint64_t EndAddress = 0;
1273 for (const LBREntry &LBR : Sample->LBRStack) {
1274 uint64_t SourceAddress = LBR.Source;
1275 uint64_t StartAddress = LBR.Target;
1276 if (EndAddress != 0)
1277 Ranges[{StartAddress, EndAddress}] += Count;
1278 EndAddress = SourceAddress;
1279 }
1280 }
1281
1282 if (Ranges.empty()) {
1283 WithColor::warning() << "No samples in perf script!\n";
1284 return;
1285 }
1286
1287 auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress,
1288 StringRef Msg) {
1289 if (!ShowDetailedWarning)
1290 return;
1291 WithColor::warning() << "[" << format(Fmt: "%8" PRIx64, Vals: StartAddress) << ","
1292 << format(Fmt: "%8" PRIx64, Vals: EndAddress) << "]: " << Msg
1293 << "\n";
1294 };
1295
1296 const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
1297 "likely due to profile and binary mismatch.";
1298 const char *DanglingRangeMsg = "Range does not belong to any functions, "
1299 "likely from PLT, .init or .fini section.";
1300 const char *RangeCrossFuncMsg =
1301 "Fall through range should not cross function boundaries, likely due to "
1302 "profile and binary mismatch.";
1303 const char *BogusRangeMsg = "Range start is after or too far from range end.";
1304
1305 uint64_t TotalRangeNum = 0;
1306 uint64_t InstNotBoundary = 0;
1307 uint64_t UnmatchedRange = 0;
1308 uint64_t RecoveredRange = 0;
1309 uint64_t RangeCrossFunc = 0;
1310 uint64_t BogusRange = 0;
1311
1312 for (auto &I : Ranges) {
1313 uint64_t StartAddress = I.first.first;
1314 uint64_t EndAddress = I.first.second;
1315 TotalRangeNum += I.second;
1316
1317 if (!Binary->addressIsCode(Address: StartAddress) &&
1318 !Binary->addressIsCode(Address: EndAddress))
1319 continue;
1320
1321 if (!Binary->addressIsCode(Address: StartAddress) ||
1322 !Binary->addressIsTransfer(Address: EndAddress)) {
1323 InstNotBoundary += I.second;
1324 WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg);
1325 }
1326
1327 auto *FRange = Binary->findFuncRange(Address: StartAddress);
1328 if (!FRange) {
1329 UnmatchedRange += I.second;
1330 WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
1331 continue;
1332 }
1333
1334 if (FRange->Func->NameStatus != DwarfNameStatus::Matched)
1335 RecoveredRange += I.second;
1336
1337 if (EndAddress >= FRange->EndAddress) {
1338 RangeCrossFunc += I.second;
1339 WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg);
1340 }
1341
1342 if (Binary->addressIsCode(Address: StartAddress) &&
1343 Binary->addressIsCode(Address: EndAddress) &&
1344 !isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) {
1345 BogusRange += I.second;
1346 WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg);
1347 }
1348 }
1349
1350 emitWarningSummary(
1351 Num: InstNotBoundary, Total: TotalRangeNum,
1352 Msg: "of samples are from ranges that are not on instruction boundary.");
1353 emitWarningSummary(
1354 Num: UnmatchedRange, Total: TotalRangeNum,
1355 Msg: "of samples are from ranges that do not belong to any functions.");
1356 emitWarningSummary(Num: RecoveredRange, Total: TotalRangeNum,
1357 Msg: "of samples are from ranges that belong to functions "
1358 "recovered from symbol table.");
1359 emitWarningSummary(
1360 Num: RangeCrossFunc, Total: TotalRangeNum,
1361 Msg: "of samples are from ranges that do cross function boundaries.");
1362 emitWarningSummary(
1363 Num: BogusRange, Total: TotalRangeNum,
1364 Msg: "of samples are from ranges that have range start after or too far from "
1365 "range end acrossing the unconditinal jmp.");
1366}
1367
1368void PerfScriptReader::warnIfBranchTargetMismatch() {
1369 // Collect unique branch source and target addresses from LBR samples,
1370 // then check what percentage don't match known instructions in the binary.
1371
1372 uint64_t MismatchedBranches = 0;
1373 uint64_t MismatchedIndirectTargets = 0;
1374 uint64_t MismatchedTargets = 0;
1375 uint64_t TotalSamples = 0;
1376
1377 for (const auto &Item : AggregatedSamples) {
1378 const PerfSample *Sample = Item.first.getPtr();
1379 for (const LBREntry &LBR : Sample->LBRStack) {
1380 uint64_t Source = LBR.Source;
1381 uint64_t Target = LBR.Target;
1382 if (Source == ExternalAddr || Target == ExternalAddr)
1383 continue;
1384 TotalSamples++;
1385
1386 // Validate Branch sources are Call/Branch/Indirect Branch
1387 if (!Binary->addressIsTransfer(Address: Source))
1388 MismatchedBranches++;
1389
1390 // Validate Indirect Branch targets landed in code. This may over estimate
1391 // the vaid targets only because there's no good way to determine jump
1392 // table targets
1393 if (Binary->addressIsIndirectBranch(Address: Source)) {
1394 if (!Binary->addressIsCode(Address: Target))
1395 MismatchedIndirectTargets++;
1396 } else if (!Binary->addressIsBranchTarget(Address: Target) &&
1397 !Binary->findFuncRangeForStartAddr(Address: Target))
1398 MismatchedTargets++;
1399 }
1400 }
1401
1402 emitWarningSummary(Num: MismatchedBranches, Total: TotalSamples,
1403 Msg: "of branch samples do not match the binary.");
1404 emitWarningSummary(Num: MismatchedTargets, Total: TotalSamples,
1405 Msg: "of branch targets do not match the binary.");
1406 emitWarningSummary(Num: MismatchedIndirectTargets, Total: TotalSamples,
1407 Msg: "of indirect branch targets do not match the binary.");
1408}
1409
1410void PerfScriptReader::parsePerfTraces() {
1411 // Parse perf traces and do aggregation.
1412 parseAndAggregateTrace();
1413 if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) {
1414 exitWithError(
1415 Message: "Kernel is requested, but no kernel is found in mmap events.");
1416 }
1417
1418 emitWarningSummary(Num: NumLeafExternalFrame, Total: NumTotalSample,
1419 Msg: "of samples have leaf external frame in call stack.");
1420 emitWarningSummary(Num: NumLeadingOutgoingLBR, Total: NumTotalSample,
1421 Msg: "of samples have leading external LBR.");
1422
1423 // Generate unsymbolized profile.
1424 warnTruncatedStack();
1425 warnInvalidRange();
1426 warnIfBranchTargetMismatch();
1427 generateUnsymbolizedProfile();
1428 AggregatedSamples.clear();
1429
1430 if (SkipSymbolization)
1431 writeUnsymbolizedProfile(Filename: OutputFilename);
1432}
1433
1434SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups;
1435
1436void ETMReader::recordProcessedRange(uint64_t Start, uint64_t End,
1437 uint64_t Count) {
1438 assert(!Counters.empty() && "Counters should not be empty!");
1439 auto &Counter = Counters.begin()->second;
1440 Counter.recordRangeCount(Start, End, Repeat: Count);
1441}
1442
1443class ETMCallback : public ETMDecoder::Callback {
1444 ETMReader *Reader;
1445
1446public:
1447 ETMCallback(ETMReader *R) : Reader(R) {}
1448 void processInstructionRange(uint64_t Start, uint64_t End) override {
1449 Reader->recordProcessedRange(Start, End, Count: 1);
1450 }
1451};
1452
1453void ETMReader::parseETMTraces() {
1454 auto BufferOrErr = MemoryBuffer::getFile(Filename: TraceFile);
1455 if (std::error_code EC = BufferOrErr.getError())
1456 exitWithError(Message: "Could not open ETM trace file: " + EC.message());
1457
1458 ArrayRef<uint8_t> Data(
1459 reinterpret_cast<const uint8_t *>((*BufferOrErr)->getBufferStart()),
1460 (*BufferOrErr)->getBufferSize());
1461
1462 // There is no context for ETM instruction traces.
1463 // Initialize the SampleCounters map with a single empty context key
1464 // to aggregate all instruction hits into a global bucket.
1465 auto Key = std::make_shared<StringBasedCtxKey>();
1466 Counters.try_emplace(Key: Hashable<ContextKey>(Key));
1467
1468 // The protocol utilizes a 0x80 byte as an initial synchronization header.
1469 // Perform a manual search for this sync point to discard any leading
1470 // padding or truncated packets before decoding begins.
1471 size_t StartIdx = 0;
1472 while (StartIdx < Data.size() && Data[StartIdx] != 0x80)
1473 StartIdx++;
1474 if (StartIdx >= Data.size())
1475 exitWithError(Message: "No synchronization header (0x80) found in the bitstream.");
1476 ArrayRef<uint8_t> TraceSlice = Data.slice(N: StartIdx);
1477
1478 auto DecoderOrErr = ETMDecoder::create(
1479 Binary: Binary->getBinary(), TargetTriple: Binary->getTriple(), TraceID: static_cast<uint8_t>(TraceID));
1480
1481 if (!DecoderOrErr)
1482 exitWithError(Message: toString(E: DecoderOrErr.takeError()));
1483 auto Decoder = std::move(*DecoderOrErr);
1484
1485 ETMCallback CB(this);
1486 if (Error E = Decoder->processTrace(TraceData: TraceSlice, TraceCallback&: CB))
1487 exitWithError(Message: toString(E: std::move(E)));
1488}
1489
1490} // end namespace sampleprof
1491} // end namespace llvm
1492