1//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "PerfReader.h"
9#include "ErrorHandling.h"
10#include "Options.h"
11#include "ProfileGenerator.h"
12#include "llvm/ADT/SmallString.h"
13#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14#include "llvm/Support/FileSystem.h"
15#include "llvm/Support/LineIterator.h"
16#include "llvm/Support/MemoryBuffer.h"
17#include "llvm/Support/Process.h"
18#include "llvm/Support/ToolOutputFile.h"
19
20#define DEBUG_TYPE "perf-reader"
21
22namespace llvm {
23
24cl::opt<bool> SkipSymbolization("skip-symbolization",
25 cl::desc("Dump the unsymbolized profile to the "
26 "output file. It will show unwinder "
27 "output for CS profile generation."),
28 cl::cat(ProfGenCategory));
29
30static cl::opt<bool> ShowMmapEvents("show-mmap-events",
31 cl::desc("Print binary load events."),
32 cl::cat(ProfGenCategory));
33
34static cl::opt<bool>
35 UseOffset("use-offset", cl::init(Val: true),
36 cl::desc("Work with `--skip-symbolization` or "
37 "`--unsymbolized-profile` to write/read the "
38 "offset instead of virtual address."),
39 cl::cat(ProfGenCategory));
40
41static cl::opt<bool> UseLoadableSegmentAsBase(
42 "use-first-loadable-segment-as-base",
43 cl::desc("Use first loadable segment address as base address "
44 "for offsets in unsymbolized profile. By default "
45 "first executable segment address is used"),
46 cl::cat(ProfGenCategory));
47
48static cl::opt<bool>
49 IgnoreStackSamples("ignore-stack-samples",
50 cl::desc("Ignore call stack samples for hybrid samples "
51 "and produce context-insensitive profile."),
52 cl::cat(ProfGenCategory));
53cl::opt<bool> ShowDetailedWarning("show-detailed-warning",
54 cl::desc("Show detailed warning message."),
55 cl::cat(ProfGenCategory));
56
57static cl::opt<int> CSProfMaxUnsymbolizedCtxDepth(
58 "csprof-max-unsymbolized-context-depth", cl::init(Val: -1),
59 cl::desc("Keep the last K contexts while merging unsymbolized profile. -1 "
60 "means no depth limit."),
61 cl::cat(ProfGenCategory));
62
63namespace sampleprof {
64
65void VirtualUnwinder::unwindCall(UnwindState &State) {
66 uint64_t Source = State.getCurrentLBRSource();
67 auto *ParentFrame = State.getParentFrame();
68 // The 2nd frame after leaf could be missing if stack sample is
69 // taken when IP is within prolog/epilog, as frame chain isn't
70 // setup yet. Fill in the missing frame in that case.
71 // TODO: Currently we just assume all the addr that can't match the
72 // 2nd frame is in prolog/epilog. In the future, we will switch to
73 // pro/epi tracker(Dwarf CFI) for the precise check.
74 if (ParentFrame == State.getDummyRootPtr() ||
75 ParentFrame->Address != Source) {
76 State.switchToFrame(Address: Source);
77 if (ParentFrame != State.getDummyRootPtr()) {
78 if (Source == ExternalAddr)
79 NumMismatchedExtCallBranch++;
80 else
81 NumMismatchedProEpiBranch++;
82 }
83 } else {
84 State.popFrame();
85 }
86 State.InstPtr.update(Addr: Source);
87}
88
89void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
90 InstructionPointer &IP = State.InstPtr;
91 uint64_t Target = State.getCurrentLBRTarget();
92 uint64_t End = IP.Address;
93
94 if (End == ExternalAddr && Target == ExternalAddr) {
95 // Filter out the case when leaf external frame matches the external LBR
96 // target, this is a valid state, it happens that the code run into external
97 // address then return back. The call frame under the external frame
98 // remains valid and can be unwound later, just skip recording this range.
99 NumPairedExtAddr++;
100 return;
101 }
102
103 if (End == ExternalAddr || Target == ExternalAddr) {
104 // Range is invalid if only one point is external address. This means LBR
105 // traces contains a standalone external address failing to pair another
106 // one, likely due to interrupt jmp or broken perf script. Set the
107 // state to invalid.
108 NumUnpairedExtAddr++;
109 State.setInvalid();
110 return;
111 }
112
113 if (!isValidFallThroughRange(Start: Target, End, Binary)) {
114 // Skip unwinding the rest of LBR trace when a bogus range is seen.
115 State.setInvalid();
116 return;
117 }
118
119 if (Binary->usePseudoProbes()) {
120 // We don't need to top frame probe since it should be extracted
121 // from the range.
122 // The outcome of the virtual unwinding with pseudo probes is a
123 // map from a context key to the address range being unwound.
124 // This means basically linear unwinding is not needed for pseudo
125 // probes. The range will be simply recorded here and will be
126 // converted to a list of pseudo probes to report in ProfileGenerator.
127 State.getParentFrame()->recordRangeCount(Start: Target, End, Count: Repeat);
128 } else {
129 // Unwind linear execution part.
130 // Split and record the range by different inline context. For example:
131 // [0x01] ... main:1 # Target
132 // [0x02] ... main:2
133 // [0x03] ... main:3 @ foo:1
134 // [0x04] ... main:3 @ foo:2
135 // [0x05] ... main:3 @ foo:3
136 // [0x06] ... main:4
137 // [0x07] ... main:5 # End
138 // It will be recorded:
139 // [main:*] : [0x06, 0x07], [0x01, 0x02]
140 // [main:3 @ foo:*] : [0x03, 0x05]
141 while (IP.Address > Target) {
142 uint64_t PrevIP = IP.Address;
143 IP.backward();
144 // Break into segments for implicit call/return due to inlining
145 bool SameInlinee = Binary->inlineContextEqual(Add1: PrevIP, Add2: IP.Address);
146 if (!SameInlinee) {
147 State.switchToFrame(Address: PrevIP);
148 State.CurrentLeafFrame->recordRangeCount(Start: PrevIP, End, Count: Repeat);
149 End = IP.Address;
150 }
151 }
152 assert(IP.Address == Target && "The last one must be the target address.");
153 // Record the remaining range, [0x01, 0x02] in the example
154 State.switchToFrame(Address: IP.Address);
155 State.CurrentLeafFrame->recordRangeCount(Start: IP.Address, End, Count: Repeat);
156 }
157}
158
159void VirtualUnwinder::unwindReturn(UnwindState &State) {
160 // Add extra frame as we unwind through the return
161 const LBREntry &LBR = State.getCurrentLBR();
162 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr: LBR.Target);
163 State.switchToFrame(Address: CallAddr);
164 State.pushFrame(Address: LBR.Source);
165 State.InstPtr.update(Addr: LBR.Source);
166}
167
168void VirtualUnwinder::unwindBranch(UnwindState &State) {
169 // TODO: Tolerate tail call for now, as we may see tail call from libraries.
170 // This is only for intra function branches, excluding tail calls.
171 uint64_t Source = State.getCurrentLBRSource();
172 State.switchToFrame(Address: Source);
173 State.InstPtr.update(Addr: Source);
174}
175
176std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
177 std::shared_ptr<StringBasedCtxKey> KeyStr =
178 std::make_shared<StringBasedCtxKey>();
179 KeyStr->Context = Binary->getExpandedContext(Stack, WasLeafInlined&: KeyStr->WasLeafInlined);
180 return KeyStr;
181}
182
183std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
184 std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
185 KeyStr->Context = Stack;
186 CSProfileGenerator::compressRecursionContext<uint64_t>(Context&: KeyStr->Context);
187 // MaxContextDepth(--csprof-max-context-depth) is used to trim both symbolized
188 // and unsymbolized profile context. Sometimes we want to at least preserve
189 // the inlinings for the leaf frame(the profiled binary inlining),
190 // --csprof-max-context-depth may not be flexible enough, in this case,
191 // --csprof-max-unsymbolized-context-depth is used to limit the context for
192 // unsymbolized profile. If both are set, use the minimum of them.
193 int Depth = CSProfileGenerator::MaxContextDepth != -1
194 ? CSProfileGenerator::MaxContextDepth
195 : KeyStr->Context.size();
196 Depth = CSProfMaxUnsymbolizedCtxDepth != -1
197 ? std::min(a: static_cast<int>(CSProfMaxUnsymbolizedCtxDepth), b: Depth)
198 : Depth;
199 CSProfileGenerator::trimContext<uint64_t>(S&: KeyStr->Context, Depth);
200 return KeyStr;
201}
202
203template <typename T>
204void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
205 T &Stack) {
206 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
207 return;
208
209 std::shared_ptr<ContextKey> Key = Stack.getContextKey();
210 if (Key == nullptr)
211 return;
212 auto Ret = CtxCounterMap->emplace(args: Hashable<ContextKey>(Key), args: SampleCounter());
213 SampleCounter &SCounter = Ret.first->second;
214 for (auto &I : Cur->RangeSamples)
215 SCounter.recordRangeCount(Start: std::get<0>(t&: I), End: std::get<1>(t&: I), Repeat: std::get<2>(t&: I));
216
217 for (auto &I : Cur->BranchSamples)
218 SCounter.recordBranchCount(Source: std::get<0>(t&: I), Target: std::get<1>(t&: I), Repeat: std::get<2>(t&: I));
219}
220
221template <typename T>
222void VirtualUnwinder::collectSamplesFromFrameTrie(
223 UnwindState::ProfiledFrame *Cur, T &Stack) {
224 if (!Cur->isDummyRoot()) {
225 // Truncate the context for external frame since this isn't a real call
226 // context the compiler will see.
227 if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) {
228 // Process truncated context
229 // Start a new traversal ignoring its bottom context
230 T EmptyStack(Binary);
231 collectSamplesFromFrame(Cur, EmptyStack);
232 for (const auto &Item : Cur->Children) {
233 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
234 }
235
236 // Keep note of untracked call site and deduplicate them
237 // for warning later.
238 if (!Cur->isLeafFrame())
239 UntrackedCallsites.insert(x: Cur->Address);
240
241 return;
242 }
243 }
244
245 collectSamplesFromFrame(Cur, Stack);
246 // Process children frame
247 for (const auto &Item : Cur->Children) {
248 collectSamplesFromFrameTrie(Item.second.get(), Stack);
249 }
250 // Recover the call stack
251 Stack.popFrame();
252}
253
254void VirtualUnwinder::collectSamplesFromFrameTrie(
255 UnwindState::ProfiledFrame *Cur) {
256 if (Binary->usePseudoProbes()) {
257 AddressStack Stack(Binary);
258 collectSamplesFromFrameTrie<AddressStack>(Cur, Stack);
259 } else {
260 FrameStack Stack(Binary);
261 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
262 }
263}
264
265void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
266 UnwindState &State, uint64_t Repeat) {
267 if (Branch.Target == ExternalAddr)
268 return;
269
270 // Record external-to-internal pattern on the trie root, it later can be
271 // used for generating head samples.
272 if (Branch.Source == ExternalAddr) {
273 State.getDummyRootPtr()->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
274 Count: Repeat);
275 return;
276 }
277
278 if (Binary->usePseudoProbes()) {
279 // Same as recordRangeCount, We don't need to top frame probe since we will
280 // extract it from branch's source address
281 State.getParentFrame()->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
282 Count: Repeat);
283 } else {
284 State.CurrentLeafFrame->recordBranchCount(Source: Branch.Source, Target: Branch.Target,
285 Count: Repeat);
286 }
287}
288
289bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
290 // Capture initial state as starting point for unwinding.
291 UnwindState State(Sample, Binary);
292
293 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
294 // Stack sample sometimes can be unreliable, so filter out bogus ones.
295 if (!State.validateInitialState())
296 return false;
297
298 NumTotalBranches += State.LBRStack.size();
299 // Now process the LBR samples in parrallel with stack sample
300 // Note that we do not reverse the LBR entry order so we can
301 // unwind the sample stack as we walk through LBR entries.
302 while (State.hasNextLBR()) {
303 State.checkStateConsistency();
304
305 // Do not attempt linear unwind for the leaf range as it's incomplete.
306 if (!State.IsLastLBR()) {
307 // Unwind implicit calls/returns from inlining, along the linear path,
308 // break into smaller sub section each with its own calling context.
309 unwindLinear(State, Repeat);
310 }
311
312 // Save the LBR branch before it gets unwound.
313 const LBREntry &Branch = State.getCurrentLBR();
314 if (isCallState(State)) {
315 // Unwind calls - we know we encountered call if LBR overlaps with
316 // transition between leaf the 2nd frame. Note that for calls that
317 // were not in the original stack sample, we should have added the
318 // extra frame when processing the return paired with this call.
319 unwindCall(State);
320 } else if (isReturnState(State)) {
321 // Unwind returns - check whether the IP is indeed at a return
322 // instruction
323 unwindReturn(State);
324 } else if (isValidState(State)) {
325 // Unwind branches
326 unwindBranch(State);
327 } else {
328 // Skip unwinding the rest of LBR trace. Reset the stack and update the
329 // state so that the rest of the trace can still be processed as if they
330 // do not have stack samples.
331 State.clearCallStack();
332 State.InstPtr.update(Addr: State.getCurrentLBRSource());
333 State.pushFrame(Address: State.InstPtr.Address);
334 }
335
336 State.advanceLBR();
337 // Record `branch` with calling context after unwinding.
338 recordBranchCount(Branch, State, Repeat);
339 }
340 // As samples are aggregated on trie, record them into counter map
341 collectSamplesFromFrameTrie(Cur: State.getDummyRootPtr());
342
343 return true;
344}
345
346std::unique_ptr<PerfReaderBase>
347PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
348 std::optional<int32_t> PIDFilter) {
349 std::unique_ptr<PerfReaderBase> PerfReader;
350
351 if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
352 PerfReader.reset(
353 p: new UnsymbolizedProfileReader(Binary, PerfInput.InputFile));
354 return PerfReader;
355 }
356
357 // For perf data input, we need to convert them into perf script first.
358 // If this is a kernel perf file, there is no need for retrieving PIDs.
359 if (PerfInput.Format == PerfFormat::PerfData)
360 PerfInput = PerfScriptReader::convertPerfDataToTrace(
361 Binary, SkipPID: Binary->isKernel(), File&: PerfInput, PIDFilter);
362
363 assert((PerfInput.Format == PerfFormat::PerfScript) &&
364 "Should be a perfscript!");
365
366 PerfInput.Content =
367 PerfScriptReader::checkPerfScriptType(FileName: PerfInput.InputFile);
368 if (PerfInput.Content == PerfContent::LBRStack) {
369 PerfReader.reset(
370 p: new HybridPerfReader(Binary, PerfInput.InputFile, PIDFilter));
371 } else if (PerfInput.Content == PerfContent::LBR) {
372 PerfReader.reset(p: new LBRPerfReader(Binary, PerfInput.InputFile, PIDFilter));
373 } else {
374 exitWithError(Message: "Unsupported perfscript!");
375 }
376
377 return PerfReader;
378}
379
380Error PerfReaderBase::parseDataAccessPerfTraces(
381 StringRef DataAccessPerfTraceFile, std::optional<int32_t> PIDFilter) {
382 // A perf_record_sample line is like
383 // . 1282514022939813 0x87b0 [0x60]: PERF_RECORD_SAMPLE(IP, 0x4002):
384 // 3446532/3446532: 0x2608a2 period: 233 addr: 0x3b3fb0
385 constexpr static StringRef DataAccessSamplePattern =
386 "PERF_RECORD_SAMPLE\\([A-Za-z]+, 0x[0-9a-fA-F]+\\): "
387 "([0-9]+)\\/[0-9]+: 0x([0-9a-fA-F]+) period: [0-9]+ addr: "
388 "0x([0-9a-fA-F]+)";
389
390 llvm::Regex LogRegex(DataAccessSamplePattern);
391
392 auto BufferOrErr = MemoryBuffer::getFile(Filename: DataAccessPerfTraceFile);
393 std::error_code EC = BufferOrErr.getError();
394 if (EC)
395 return make_error<StringError>(Args: "Failed to open perf trace file: " +
396 DataAccessPerfTraceFile,
397 Args: inconvertibleErrorCode());
398
399 assert(!SampleCounters.empty() && "Sample counters should not be empty!");
400 SampleCounter &Counter = SampleCounters.begin()->second;
401 line_iterator LineIt(*BufferOrErr.get(), true);
402
403 for (; !LineIt.is_at_eof(); ++LineIt) {
404 StringRef Line = *LineIt;
405
406 MMapEvent MMap;
407 if (Line.contains(Other: "PERF_RECORD_MMAP2")) {
408 if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) {
409 if (!MMap.MemProtectionFlag.contains(Other: "x")) {
410 if (Error E = Binary->addMMapNonTextEvent(Event: MMap)) {
411 return E;
412 }
413 }
414 }
415 continue;
416 }
417
418 SmallVector<StringRef> Fields;
419 if (LogRegex.match(String: Line, Matches: &Fields)) {
420 int32_t PID = 0;
421 if (Fields[1].getAsInteger(Radix: 10, Result&: PID))
422 return make_error<StringError>(
423 Args: "Failed to parse PID from perf trace line: " + Line,
424 Args: inconvertibleErrorCode());
425
426 if (PIDFilter.has_value() && *PIDFilter != PID) {
427 continue;
428 }
429
430 uint64_t DataAddress = 0;
431 if (Fields[3].getAsInteger(Radix: 16, Result&: DataAddress))
432 return make_error<StringError>(
433 Args: "Failed to parse data address from perf trace line: " + Line,
434 Args: inconvertibleErrorCode());
435 // Out of all the memory access events, the vtable accesses are used to
436 // construct type profiles. We assume that this is under the Itanium
437 // C++ ABI so we can use `_ZTV` prefix to identify vtable.
438 StringRef DataSymbol = Binary->symbolizeDataAddress(
439 Address: Binary->CanonicalizeNonTextAddress(Address: DataAddress));
440 if (DataSymbol.starts_with(Prefix: "_ZTV")) {
441 uint64_t IP = 0;
442 Fields[2].getAsInteger(Radix: 16, Result&: IP);
443 Counter.recordDataAccessCount(InstAddr: Binary->canonicalizeVirtualAddress(Address: IP),
444 DataSymbol, Repeat: 1);
445 }
446 }
447 }
448 return Error::success();
449}
450
451PerfInputFile
452PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
453 PerfInputFile &File,
454 std::optional<int32_t> PIDFilter) {
455 StringRef PerfData = File.InputFile;
456 // Run perf script to retrieve PIDs matching binary we're interested in.
457 auto PerfExecutable = sys::Process::FindInEnvPath(EnvName: "PATH", FileName: "perf");
458 if (!PerfExecutable) {
459 exitWithError(Message: "Perf not found.");
460 }
461 std::string PerfPath = *PerfExecutable;
462 SmallString<128> PerfTraceFile;
463 sys::fs::createUniquePath(Model: "perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
464 ResultPath&: PerfTraceFile, /*MakeAbsolute=*/true);
465 std::string ErrorFile = std::string(PerfTraceFile) + ".err";
466 std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin
467 StringRef(PerfTraceFile), // Stdout
468 StringRef(ErrorFile)}; // Stderr
469 PerfScriptReader::TempFileCleanups.emplace_back(Args&: PerfTraceFile);
470 PerfScriptReader::TempFileCleanups.emplace_back(Args&: ErrorFile);
471
472 std::string PIDs;
473 if (!SkipPID) {
474 StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
475 "-F", "comm,pid", "-i",
476 PerfData};
477 sys::ExecuteAndWait(Program: PerfPath, Args: ScriptMMapArgs, Env: std::nullopt, Redirects);
478
479 // Collect the PIDs
480 TraceStream TraceIt(PerfTraceFile);
481 std::unordered_set<int32_t> PIDSet;
482 while (!TraceIt.isAtEoF()) {
483 MMapEvent MMap;
484 if (isMMapEvent(Line: TraceIt.getCurrentLine()) &&
485 extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) {
486 auto It = PIDSet.emplace(args&: MMap.PID);
487 if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
488 if (!PIDs.empty()) {
489 PIDs.append(s: ",");
490 }
491 PIDs.append(str: utostr(X: MMap.PID));
492 }
493 }
494 TraceIt.advance();
495 }
496
497 if (PIDs.empty()) {
498 exitWithError(Message: "No relevant mmap event is found in perf data.");
499 }
500 }
501
502 // Run perf script again to retrieve events for PIDs collected above
503 SmallVector<StringRef, 8> ScriptSampleArgs;
504 ScriptSampleArgs.push_back(Elt: PerfPath);
505 ScriptSampleArgs.push_back(Elt: "script");
506 ScriptSampleArgs.push_back(Elt: "--show-mmap-events");
507 ScriptSampleArgs.push_back(Elt: "-F");
508 ScriptSampleArgs.push_back(Elt: "ip,brstack");
509 ScriptSampleArgs.push_back(Elt: "-i");
510 ScriptSampleArgs.push_back(Elt: PerfData);
511 if (!PIDs.empty()) {
512 ScriptSampleArgs.push_back(Elt: "--pid");
513 ScriptSampleArgs.push_back(Elt: PIDs);
514 }
515 sys::ExecuteAndWait(Program: PerfPath, Args: ScriptSampleArgs, Env: std::nullopt, Redirects);
516
517 return {.InputFile: std::string(PerfTraceFile), .Format: PerfFormat::PerfScript,
518 .Content: PerfContent::UnknownContent};
519}
520
521static StringRef filename(StringRef Path, bool UseBackSlash) {
522 llvm::sys::path::Style PathStyle =
523 UseBackSlash ? llvm::sys::path::Style::windows_backslash
524 : llvm::sys::path::Style::native;
525 StringRef FileName = llvm::sys::path::filename(path: Path, style: PathStyle);
526
527 // In case this file use \r\n as newline.
528 if (UseBackSlash && FileName.back() == '\r')
529 return FileName.drop_back();
530
531 return FileName;
532}
533
534void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
535 // Drop the event which doesn't belong to user-provided binary
536 StringRef BinaryName = filename(Path: Event.BinaryPath, UseBackSlash: Binary->isCOFF());
537 bool IsKernel = Binary->isKernel();
538 if (!IsKernel && Binary->getName() != BinaryName)
539 return;
540 if (IsKernel && !Binary->isKernelImageName(BinaryName))
541 return;
542
543 // Drop the event if process does not match pid filter
544 if (PIDFilter && Event.PID != *PIDFilter)
545 return;
546
547 // Drop the event if its image is loaded at the same address
548 if (Event.Address == Binary->getBaseAddress()) {
549 Binary->setIsLoadedByMMap(true);
550 return;
551 }
552
553 if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
554 // A binary image could be unloaded and then reloaded at different
555 // place, so update binary load address.
556 // Only update for the first executable segment and assume all other
557 // segments are loaded at consecutive memory addresses, which is the case on
558 // X64.
559 Binary->setBaseAddress(Event.Address);
560 Binary->setIsLoadedByMMap(true);
561 } else {
562 // Verify segments are loaded consecutively.
563 const auto &Offsets = Binary->getTextSegmentOffsets();
564 auto It = llvm::lower_bound(Range: Offsets, Value: Event.Offset);
565 if (It != Offsets.end() && *It == Event.Offset) {
566 // The event is for loading a separate executable segment.
567 auto I = std::distance(first: Offsets.begin(), last: It);
568 const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses();
569 if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() !=
570 Event.Address - Binary->getBaseAddress())
571 exitWithError(Message: "Executable segments not loaded consecutively");
572 } else {
573 if (It == Offsets.begin())
574 exitWithError(Message: "File offset not found");
575 else {
576 // Find the segment the event falls in. A large segment could be loaded
577 // via multiple mmap calls with consecutive memory addresses.
578 --It;
579 assert(*It < Event.Offset);
580 if (Event.Offset - *It != Event.Address - Binary->getBaseAddress())
581 exitWithError(Message: "Segment not loaded by consecutive mmaps");
582 }
583 }
584 }
585}
586
587static std::string getContextKeyStr(ContextKey *K,
588 const ProfiledBinary *Binary) {
589 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(Val: K)) {
590 return SampleContext::getContextString(Context: CtxKey->Context);
591 } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: K)) {
592 std::ostringstream OContextStr;
593 for (uint32_t I = 0; I < CtxKey->Context.size(); I++) {
594 if (OContextStr.str().size())
595 OContextStr << " @ ";
596 uint64_t Address = CtxKey->Context[I];
597 if (UseOffset) {
598 if (UseLoadableSegmentAsBase)
599 Address -= Binary->getFirstLoadableAddress();
600 else
601 Address -= Binary->getPreferredBaseAddress();
602 }
603 OContextStr << "0x"
604 << utohexstr(X: Address,
605 /*LowerCase=*/true);
606 }
607 return OContextStr.str();
608 } else {
609 llvm_unreachable("unexpected key type");
610 }
611}
612
613void HybridPerfReader::unwindSamples() {
614 VirtualUnwinder Unwinder(&SampleCounters, Binary);
615 for (const auto &Item : AggregatedSamples) {
616 const PerfSample *Sample = Item.first.getPtr();
617 Unwinder.unwind(Sample, Repeat: Item.second);
618 }
619
620 // Warn about untracked frames due to missing probes.
621 if (ShowDetailedWarning) {
622 for (auto Address : Unwinder.getUntrackedCallsites())
623 WithColor::warning() << "Profile context truncated due to missing probe "
624 << "for call instruction at "
625 << format(Fmt: "0x%" PRIx64, Vals: Address) << "\n";
626 }
627
628 emitWarningSummary(Num: Unwinder.getUntrackedCallsites().size(),
629 Total: SampleCounters.size(),
630 Msg: "of profiled contexts are truncated due to missing probe "
631 "for call instruction.");
632
633 emitWarningSummary(
634 Num: Unwinder.NumMismatchedExtCallBranch, Total: Unwinder.NumTotalBranches,
635 Msg: "of branches'source is a call instruction but doesn't match call frame "
636 "stack, likely due to unwinding error of external frame.");
637
638 emitWarningSummary(Num: Unwinder.NumPairedExtAddr * 2, Total: Unwinder.NumTotalBranches,
639 Msg: "of branches containing paired external address.");
640
641 emitWarningSummary(Num: Unwinder.NumUnpairedExtAddr, Total: Unwinder.NumTotalBranches,
642 Msg: "of branches containing external address but doesn't have "
643 "another external address to pair, likely due to "
644 "interrupt jmp or broken perf script.");
645
646 emitWarningSummary(
647 Num: Unwinder.NumMismatchedProEpiBranch, Total: Unwinder.NumTotalBranches,
648 Msg: "of branches'source is a call instruction but doesn't match call frame "
649 "stack, likely due to frame in prolog/epilog.");
650
651 emitWarningSummary(Num: Unwinder.NumMissingExternalFrame,
652 Total: Unwinder.NumExtCallBranch,
653 Msg: "of artificial call branches but doesn't have an external "
654 "frame to match.");
655}
656
657bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
658 SmallVectorImpl<LBREntry> &LBRStack) {
659 // The raw format of LBR stack is like:
660 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
661 // ... 0x4005c8/0x4005dc/P/-/-/0
662 // It's in FIFO order and separated by whitespace.
663 SmallVector<StringRef, 32> Records;
664 TraceIt.getCurrentLine().rtrim().split(A&: Records, Separator: " ", MaxSplit: -1, KeepEmpty: false);
665 auto WarnInvalidLBR = [](TraceStream &TraceIt) {
666 WithColor::warning() << "Invalid address in LBR record at line "
667 << TraceIt.getLineNumber() << ": "
668 << TraceIt.getCurrentLine() << "\n";
669 };
670
671 // Skip the leading instruction pointer.
672 size_t Index = 0;
673 uint64_t LeadingAddr;
674 if (!Records.empty() && !Records[0].contains(C: '/')) {
675 if (Records[0].getAsInteger(Radix: 16, Result&: LeadingAddr)) {
676 WarnInvalidLBR(TraceIt);
677 TraceIt.advance();
678 return false;
679 }
680 Index = 1;
681 }
682
683 // Now extract LBR samples - note that we do not reverse the
684 // LBR entry order so we can unwind the sample stack as we walk
685 // through LBR entries.
686 while (Index < Records.size()) {
687 auto &Token = Records[Index++];
688 if (Token.size() == 0)
689 continue;
690
691 SmallVector<StringRef, 8> Addresses;
692 Token.split(A&: Addresses, Separator: "/");
693 uint64_t Src;
694 uint64_t Dst;
695
696 // Stop at broken LBR records.
697 if (Addresses.size() < 2 || Addresses[0].substr(Start: 2).getAsInteger(Radix: 16, Result&: Src) ||
698 Addresses[1].substr(Start: 2).getAsInteger(Radix: 16, Result&: Dst)) {
699 WarnInvalidLBR(TraceIt);
700 break;
701 }
702
703 // Canonicalize to use preferred load address as base address.
704 Src = Binary->canonicalizeVirtualAddress(Address: Src);
705 Dst = Binary->canonicalizeVirtualAddress(Address: Dst);
706 bool SrcIsInternal = Binary->addressIsCode(Address: Src);
707 bool DstIsInternal = Binary->addressIsCode(Address: Dst);
708 if (!SrcIsInternal)
709 Src = ExternalAddr;
710 if (!DstIsInternal)
711 Dst = ExternalAddr;
712 // Filter external-to-external case to reduce LBR trace size.
713 if (!SrcIsInternal && !DstIsInternal)
714 continue;
715
716 LBRStack.emplace_back(Args: LBREntry(Src, Dst));
717 }
718 TraceIt.advance();
719 return !LBRStack.empty();
720}
721
722bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
723 SmallVectorImpl<uint64_t> &CallStack) {
724 // The raw format of call stack is like:
725 // 4005dc # leaf frame
726 // 400634
727 // 400684 # root frame
728 // It's in bottom-up order with each frame in one line.
729
730 // Extract stack frames from sample
731 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) {
732 StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
733 uint64_t FrameAddr = 0;
734 if (FrameStr.getAsInteger(Radix: 16, Result&: FrameAddr)) {
735 // We might parse a non-perf sample line like empty line and comments,
736 // skip it
737 TraceIt.advance();
738 return false;
739 }
740 TraceIt.advance();
741
742 FrameAddr = Binary->canonicalizeVirtualAddress(Address: FrameAddr);
743 // Currently intermixed frame from different binaries is not supported.
744 if (!Binary->addressIsCode(Address: FrameAddr)) {
745 if (CallStack.empty())
746 NumLeafExternalFrame++;
747 // Push a special value(ExternalAddr) for the external frames so that
748 // unwinder can still work on this with artificial Call/Return branch.
749 // After unwinding, the context will be truncated for external frame.
750 // Also deduplicate the consecutive external addresses.
751 if (CallStack.empty() || CallStack.back() != ExternalAddr)
752 CallStack.emplace_back(Args: ExternalAddr);
753 continue;
754 }
755
756 // We need to translate return address to call address for non-leaf frames.
757 if (!CallStack.empty()) {
758 auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
759 if (!CallAddr) {
760 // Stop at an invalid return address caused by bad unwinding. This could
761 // happen to frame-pointer-based unwinding and the callee functions that
762 // do not have the frame pointer chain set up.
763 InvalidReturnAddresses.insert(x: FrameAddr);
764 break;
765 }
766 FrameAddr = CallAddr;
767 }
768
769 CallStack.emplace_back(Args&: FrameAddr);
770 }
771
772 // Strip out the bottom external addr.
773 if (CallStack.size() > 1 && CallStack.back() == ExternalAddr)
774 CallStack.pop_back();
775
776 // Skip other unrelated line, find the next valid LBR line
777 // Note that even for empty call stack, we should skip the address at the
778 // bottom, otherwise the following pass may generate a truncated callstack
779 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) {
780 TraceIt.advance();
781 }
782 // Filter out broken stack sample. We may not have complete frame info
783 // if sample end up in prolog/epilog, the result is dangling context not
784 // connected to entry point. This should be relatively rare thus not much
785 // impact on overall profile quality. However we do want to filter them
786 // out to reduce the number of different calling contexts. One instance
787 // of such case - when sample landed in prolog/epilog, somehow stack
788 // walking will be broken in an unexpected way that higher frames will be
789 // missing.
790 return !CallStack.empty() &&
791 !Binary->addressInPrologEpilog(Address: CallStack.front());
792}
793
794void PerfScriptReader::warnIfMissingMMap() {
795 if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
796 WithColor::warning() << "No relevant mmap event is matched for "
797 << Binary->getName()
798 << ", will use preferred address ("
799 << format(Fmt: "0x%" PRIx64,
800 Vals: Binary->getPreferredBaseAddress())
801 << ") as the base loading address!\n";
802 // Avoid redundant warning, only warn at the first unmatched sample.
803 Binary->setMissingMMapWarned(true);
804 }
805}
806
807void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
808 // The raw hybird sample started with call stack in FILO order and followed
809 // intermediately by LBR sample
810 // e.g.
811 // 4005dc # call stack leaf
812 // 400634
813 // 400684 # call stack root
814 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
815 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
816 //
817 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
818#ifndef NDEBUG
819 Sample->Linenum = TraceIt.getLineNumber();
820#endif
821 // Parsing call stack and populate into PerfSample.CallStack
822 if (!extractCallstack(TraceIt, CallStack&: Sample->CallStack)) {
823 // Skip the next LBR line matched current call stack
824 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x"))
825 TraceIt.advance();
826 return;
827 }
828
829 warnIfMissingMMap();
830
831 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x")) {
832 // Parsing LBR stack and populate into PerfSample.LBRStack
833 if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) {
834 if (IgnoreStackSamples) {
835 Sample->CallStack.clear();
836 } else {
837 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
838 // ranges
839 Sample->CallStack.front() = Sample->LBRStack[0].Target;
840 }
841 // Record samples by aggregation
842 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
843 }
844 } else {
845 // LBR sample is encoded in single line after stack sample
846 exitWithError(Message: "'Hybrid perf sample is corrupted, No LBR sample line");
847 }
848}
849
850void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) {
851 std::error_code EC;
852 raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
853 if (EC)
854 exitWithError(EC, Whence: Filename);
855 writeUnsymbolizedProfile(OS);
856}
857
858// Use ordered map to make the output deterministic
859using OrderedCounterForPrint = std::map<std::string, SampleCounter *>;
860
861void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) {
862 OrderedCounterForPrint OrderedCounters;
863 for (auto &CI : SampleCounters) {
864 OrderedCounters[getContextKeyStr(K: CI.first.getPtr(), Binary)] = &CI.second;
865 }
866
867 auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator,
868 uint32_t Indent) {
869 OS.indent(NumSpaces: Indent);
870 OS << Counter.size() << "\n";
871 for (auto &I : Counter) {
872 uint64_t Start = I.first.first;
873 uint64_t End = I.first.second;
874
875 if (UseOffset) {
876 if (UseLoadableSegmentAsBase) {
877 Start -= Binary->getFirstLoadableAddress();
878 End -= Binary->getFirstLoadableAddress();
879 } else {
880 Start -= Binary->getPreferredBaseAddress();
881 End -= Binary->getPreferredBaseAddress();
882 }
883 }
884
885 OS.indent(NumSpaces: Indent);
886 OS << Twine::utohexstr(Val: Start) << Separator << Twine::utohexstr(Val: End) << ":"
887 << I.second << "\n";
888 }
889 };
890
891 for (auto &CI : OrderedCounters) {
892 uint32_t Indent = 0;
893 if (ProfileIsCS) {
894 // Context string key
895 OS << "[" << CI.first << "]\n";
896 Indent = 2;
897 }
898
899 SampleCounter &Counter = *CI.second;
900 SCounterPrinter(Counter.RangeCounter, "-", Indent);
901 SCounterPrinter(Counter.BranchCounter, "->", Indent);
902 }
903}
904
905// Format of input:
906// number of entries in RangeCounter
907// from_1-to_1:count_1
908// from_2-to_2:count_2
909// ......
910// from_n-to_n:count_n
911// number of entries in BranchCounter
912// src_1->dst_1:count_1
913// src_2->dst_2:count_2
914// ......
915// src_n->dst_n:count_n
916void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
917 SampleCounter &SCounters) {
918 auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) {
919 std::string Msg = TraceIt.isAtEoF()
920 ? "Invalid raw profile!"
921 : "Invalid raw profile at line " +
922 Twine(TraceIt.getLineNumber()).str() + ": " +
923 TraceIt.getCurrentLine().str();
924 exitWithError(Message: Msg);
925 };
926 auto ReadNumber = [&](uint64_t &Num) {
927 if (TraceIt.isAtEoF())
928 exitWithErrorForTraceLine(TraceIt);
929 if (TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 10, Result&: Num))
930 exitWithErrorForTraceLine(TraceIt);
931 TraceIt.advance();
932 };
933
934 auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) {
935 uint64_t Num = 0;
936 ReadNumber(Num);
937 while (Num--) {
938 if (TraceIt.isAtEoF())
939 exitWithErrorForTraceLine(TraceIt);
940 StringRef Line = TraceIt.getCurrentLine().ltrim();
941
942 uint64_t Count = 0;
943 auto LineSplit = Line.split(Separator: ":");
944 if (LineSplit.second.empty() || LineSplit.second.getAsInteger(Radix: 10, Result&: Count))
945 exitWithErrorForTraceLine(TraceIt);
946
947 uint64_t Source = 0;
948 uint64_t Target = 0;
949 auto Range = LineSplit.first.split(Separator);
950 if (Range.second.empty() || Range.first.getAsInteger(Radix: 16, Result&: Source) ||
951 Range.second.getAsInteger(Radix: 16, Result&: Target))
952 exitWithErrorForTraceLine(TraceIt);
953
954 if (UseOffset) {
955 if (UseLoadableSegmentAsBase) {
956 Source += Binary->getFirstLoadableAddress();
957 Target += Binary->getFirstLoadableAddress();
958 } else {
959 Source += Binary->getPreferredBaseAddress();
960 Target += Binary->getPreferredBaseAddress();
961 }
962 }
963
964 Counter[{Source, Target}] += Count;
965 TraceIt.advance();
966 }
967 };
968
969 ReadCounter(SCounters.RangeCounter, "-");
970 ReadCounter(SCounters.BranchCounter, "->");
971}
972
973void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
974 TraceStream TraceIt(FileName);
975 while (!TraceIt.isAtEoF()) {
976 std::shared_ptr<StringBasedCtxKey> Key =
977 std::make_shared<StringBasedCtxKey>();
978 StringRef Line = TraceIt.getCurrentLine();
979 // Read context stack for CS profile.
980 if (Line.starts_with(Prefix: "[")) {
981 ProfileIsCS = true;
982 auto I = ContextStrSet.insert(x: Line.str());
983 SampleContext::createCtxVectorFromStr(ContextStr: *I.first, Context&: Key->Context);
984 TraceIt.advance();
985 }
986 auto Ret =
987 SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter());
988 readSampleCounters(TraceIt, SCounters&: Ret.first->second);
989 }
990}
991
992void UnsymbolizedProfileReader::parsePerfTraces() {
993 readUnsymbolizedProfile(FileName: PerfTraceFile);
994}
995
996void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
997 uint64_t Repeat) {
998 SampleCounter &Counter = SampleCounters.begin()->second;
999 uint64_t EndAddress = 0;
1000 for (const LBREntry &LBR : Sample->LBRStack) {
1001 uint64_t SourceAddress = LBR.Source;
1002 uint64_t TargetAddress = LBR.Target;
1003
1004 // Record the branch if its SourceAddress is external. It can be the case an
1005 // external source call an internal function, later this branch will be used
1006 // to generate the function's head sample.
1007 if (Binary->addressIsCode(Address: TargetAddress)) {
1008 Counter.recordBranchCount(Source: SourceAddress, Target: TargetAddress, Repeat);
1009 }
1010
1011 // If this not the first LBR, update the range count between TO of current
1012 // LBR and FROM of next LBR.
1013 uint64_t StartAddress = TargetAddress;
1014 if (Binary->addressIsCode(Address: StartAddress) &&
1015 Binary->addressIsCode(Address: EndAddress) &&
1016 isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary))
1017 Counter.recordRangeCount(Start: StartAddress, End: EndAddress, Repeat);
1018 EndAddress = SourceAddress;
1019 }
1020}
1021
1022void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
1023 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
1024 // Parsing LBR stack and populate into PerfSample.LBRStack
1025 if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) {
1026 warnIfMissingMMap();
1027 // Record LBR only samples by aggregation
1028 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
1029 }
1030}
1031
1032void PerfScriptReader::generateUnsymbolizedProfile() {
1033 // There is no context for LBR only sample, so initialize one entry with
1034 // fake "empty" context key.
1035 assert(SampleCounters.empty() &&
1036 "Sample counter map should be empty before raw profile generation");
1037 std::shared_ptr<StringBasedCtxKey> Key =
1038 std::make_shared<StringBasedCtxKey>();
1039 SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter());
1040 for (const auto &Item : AggregatedSamples) {
1041 const PerfSample *Sample = Item.first.getPtr();
1042 computeCounterFromLBR(Sample, Repeat: Item.second);
1043 }
1044}
1045
1046uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
1047 // The aggregated count is optional, so do not skip the line and return 1 if
1048 // it's unmatched
1049 uint64_t Count = 1;
1050 if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: Count))
1051 TraceIt.advance();
1052 return Count;
1053}
1054
1055void PerfScriptReader::parseSample(TraceStream &TraceIt) {
1056 NumTotalSample++;
1057 uint64_t Count = parseAggregatedCount(TraceIt);
1058 assert(Count >= 1 && "Aggregated count should be >= 1!");
1059 parseSample(TraceIt, Count);
1060}
1061
1062bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
1063 StringRef Line,
1064 MMapEvent &MMap) {
1065 // Parse a MMap2 line like:
1066 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
1067 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
1068 constexpr static const char *const MMap2Pattern =
1069 "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
1070 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
1071 "(0x[a-f0-9]+|0) .*\\]: ([-a-z]+) (.*)";
1072 // Parse a MMap line like
1073 // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
1074 // 0xffffffff81e00000]: x [kernel.kallsyms]_text
1075 constexpr static const char *const MMapPattern =
1076 "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
1077 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
1078 "(0x[a-f0-9]+|0)\\]: ([-a-z]+) (.*)";
1079 // Field 0 - whole line
1080 // Field 1 - PID
1081 // Field 2 - base address
1082 // Field 3 - mmapped size
1083 // Field 4 - page offset
1084 // Field 5 - binary path
1085 enum EventIndex {
1086 WHOLE_LINE = 0,
1087 PID = 1,
1088 MMAPPED_ADDRESS = 2,
1089 MMAPPED_SIZE = 3,
1090 PAGE_OFFSET = 4,
1091 MEM_PROTECTION_FLAG = 5,
1092 BINARY_PATH = 6,
1093 };
1094
1095 bool R = false;
1096 SmallVector<StringRef, 7> Fields;
1097 if (Line.contains(Other: "PERF_RECORD_MMAP2 ")) {
1098 Regex RegMmap2(MMap2Pattern);
1099 R = RegMmap2.match(String: Line, Matches: &Fields);
1100 } else if (Line.contains(Other: "PERF_RECORD_MMAP ")) {
1101 Regex RegMmap(MMapPattern);
1102 R = RegMmap.match(String: Line, Matches: &Fields);
1103 } else
1104 llvm_unreachable("unexpected MMAP event entry");
1105
1106 if (!R) {
1107 std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
1108 WithColor::warning() << WarningMsg;
1109 return false;
1110 }
1111 long long MMapPID = 0;
1112 getAsSignedInteger(Str: Fields[PID], Radix: 10, Result&: MMapPID);
1113 MMap.PID = MMapPID;
1114 Fields[MMAPPED_ADDRESS].getAsInteger(Radix: 0, Result&: MMap.Address);
1115 Fields[MMAPPED_SIZE].getAsInteger(Radix: 0, Result&: MMap.Size);
1116 Fields[PAGE_OFFSET].getAsInteger(Radix: 0, Result&: MMap.Offset);
1117 MMap.MemProtectionFlag = Fields[MEM_PROTECTION_FLAG];
1118 MMap.BinaryPath = Fields[BINARY_PATH];
1119 if (ShowMmapEvents) {
1120 outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
1121 << format(Fmt: "0x%" PRIx64 ":", Vals: MMap.Address) << " \n";
1122 }
1123
1124 StringRef BinaryName = filename(Path: MMap.BinaryPath, UseBackSlash: Binary->isCOFF());
1125 if (Binary->isKernel()) {
1126 return Binary->isKernelImageName(BinaryName);
1127 }
1128 return Binary->getName() == BinaryName;
1129}
1130
1131void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
1132 MMapEvent MMap;
1133 if (extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap))
1134 updateBinaryAddress(Event: MMap);
1135 TraceIt.advance();
1136}
1137
1138void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
1139 if (isMMapEvent(Line: TraceIt.getCurrentLine()))
1140 parseMMapEvent(TraceIt);
1141 else
1142 parseSample(TraceIt);
1143}
1144
1145void PerfScriptReader::parseAndAggregateTrace() {
1146 // Trace line iterator
1147 TraceStream TraceIt(PerfTraceFile);
1148 while (!TraceIt.isAtEoF())
1149 parseEventOrSample(TraceIt);
1150}
1151
1152// A LBR sample is like:
1153// 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ...
1154// A heuristic for fast detection by checking whether a
1155// leading " 0x" and the '/' exist.
1156bool PerfScriptReader::isLBRSample(StringRef Line) {
1157 // Skip the leading instruction pointer
1158 SmallVector<StringRef, 32> Records;
1159 Line.trim().split(A&: Records, Separator: " ", MaxSplit: 2, KeepEmpty: false);
1160 if (Records.size() < 2)
1161 return false;
1162 if (Records[1].starts_with(Prefix: "0x") && Records[1].contains(C: '/'))
1163 return true;
1164 return false;
1165}
1166
1167bool PerfScriptReader::isMMapEvent(StringRef Line) {
1168 // Short cut to avoid string find is possible.
1169 if (Line.empty() || Line.size() < 50)
1170 return false;
1171
1172 if (std::isdigit(Line[0]))
1173 return false;
1174
1175 // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
1176 // the line for ` perf script --show-mmap-events -i ...`
1177 return Line.contains(Other: "PERF_RECORD_MMAP");
1178}
1179
1180// The raw hybird sample is like
1181// e.g.
1182// 4005dc # call stack leaf
1183// 400634
1184// 400684 # call stack root
1185// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
1186// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
1187// Determine the perfscript contains hybrid samples(call stack + LBRs) by
1188// checking whether there is a non-empty call stack immediately followed by
1189// a LBR sample
1190PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) {
1191 TraceStream TraceIt(FileName);
1192 uint64_t FrameAddr = 0;
1193 while (!TraceIt.isAtEoF()) {
1194 // Skip the aggregated count
1195 if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: FrameAddr))
1196 TraceIt.advance();
1197
1198 // Detect sample with call stack
1199 int32_t Count = 0;
1200 while (!TraceIt.isAtEoF() &&
1201 !TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 16, Result&: FrameAddr)) {
1202 Count++;
1203 TraceIt.advance();
1204 }
1205 if (!TraceIt.isAtEoF()) {
1206 if (isLBRSample(Line: TraceIt.getCurrentLine())) {
1207 if (Count > 0)
1208 return PerfContent::LBRStack;
1209 else
1210 return PerfContent::LBR;
1211 }
1212 TraceIt.advance();
1213 }
1214 }
1215
1216 exitWithError(Message: "Invalid perf script input!");
1217 return PerfContent::UnknownContent;
1218}
1219
1220void HybridPerfReader::generateUnsymbolizedProfile() {
1221 ProfileIsCS = !IgnoreStackSamples;
1222 if (ProfileIsCS)
1223 unwindSamples();
1224 else
1225 PerfScriptReader::generateUnsymbolizedProfile();
1226}
1227
1228void PerfScriptReader::warnTruncatedStack() {
1229 if (ShowDetailedWarning) {
1230 for (auto Address : InvalidReturnAddresses) {
1231 WithColor::warning()
1232 << "Truncated stack sample due to invalid return address at "
1233 << format(Fmt: "0x%" PRIx64, Vals: Address)
1234 << ", likely caused by frame pointer omission\n";
1235 }
1236 }
1237 emitWarningSummary(
1238 Num: InvalidReturnAddresses.size(), Total: AggregatedSamples.size(),
1239 Msg: "of truncated stack samples due to invalid return address, "
1240 "likely caused by frame pointer omission.");
1241}
1242
1243void PerfScriptReader::warnInvalidRange() {
1244 std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t,
1245 pair_hash<uint64_t, uint64_t>>
1246 Ranges;
1247
1248 for (const auto &Item : AggregatedSamples) {
1249 const PerfSample *Sample = Item.first.getPtr();
1250 uint64_t Count = Item.second;
1251 uint64_t EndAddress = 0;
1252 for (const LBREntry &LBR : Sample->LBRStack) {
1253 uint64_t SourceAddress = LBR.Source;
1254 uint64_t StartAddress = LBR.Target;
1255 if (EndAddress != 0)
1256 Ranges[{StartAddress, EndAddress}] += Count;
1257 EndAddress = SourceAddress;
1258 }
1259 }
1260
1261 if (Ranges.empty()) {
1262 WithColor::warning() << "No samples in perf script!\n";
1263 return;
1264 }
1265
1266 auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress,
1267 StringRef Msg) {
1268 if (!ShowDetailedWarning)
1269 return;
1270 WithColor::warning() << "[" << format(Fmt: "%8" PRIx64, Vals: StartAddress) << ","
1271 << format(Fmt: "%8" PRIx64, Vals: EndAddress) << "]: " << Msg
1272 << "\n";
1273 };
1274
1275 const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
1276 "likely due to profile and binary mismatch.";
1277 const char *DanglingRangeMsg = "Range does not belong to any functions, "
1278 "likely from PLT, .init or .fini section.";
1279 const char *RangeCrossFuncMsg =
1280 "Fall through range should not cross function boundaries, likely due to "
1281 "profile and binary mismatch.";
1282 const char *BogusRangeMsg = "Range start is after or too far from range end.";
1283
1284 uint64_t TotalRangeNum = 0;
1285 uint64_t InstNotBoundary = 0;
1286 uint64_t UnmatchedRange = 0;
1287 uint64_t RecoveredRange = 0;
1288 uint64_t RangeCrossFunc = 0;
1289 uint64_t BogusRange = 0;
1290
1291 for (auto &I : Ranges) {
1292 uint64_t StartAddress = I.first.first;
1293 uint64_t EndAddress = I.first.second;
1294 TotalRangeNum += I.second;
1295
1296 if (!Binary->addressIsCode(Address: StartAddress) &&
1297 !Binary->addressIsCode(Address: EndAddress))
1298 continue;
1299
1300 if (!Binary->addressIsCode(Address: StartAddress) ||
1301 !Binary->addressIsTransfer(Address: EndAddress)) {
1302 InstNotBoundary += I.second;
1303 WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg);
1304 }
1305
1306 auto *FRange = Binary->findFuncRange(Address: StartAddress);
1307 if (!FRange) {
1308 UnmatchedRange += I.second;
1309 WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
1310 continue;
1311 }
1312
1313 if (FRange->Func->NameStatus != DwarfNameStatus::Matched)
1314 RecoveredRange += I.second;
1315
1316 if (EndAddress >= FRange->EndAddress) {
1317 RangeCrossFunc += I.second;
1318 WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg);
1319 }
1320
1321 if (Binary->addressIsCode(Address: StartAddress) &&
1322 Binary->addressIsCode(Address: EndAddress) &&
1323 !isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) {
1324 BogusRange += I.second;
1325 WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg);
1326 }
1327 }
1328
1329 emitWarningSummary(
1330 Num: InstNotBoundary, Total: TotalRangeNum,
1331 Msg: "of samples are from ranges that are not on instruction boundary.");
1332 emitWarningSummary(
1333 Num: UnmatchedRange, Total: TotalRangeNum,
1334 Msg: "of samples are from ranges that do not belong to any functions.");
1335 emitWarningSummary(Num: RecoveredRange, Total: TotalRangeNum,
1336 Msg: "of samples are from ranges that belong to functions "
1337 "recovered from symbol table.");
1338 emitWarningSummary(
1339 Num: RangeCrossFunc, Total: TotalRangeNum,
1340 Msg: "of samples are from ranges that do cross function boundaries.");
1341 emitWarningSummary(
1342 Num: BogusRange, Total: TotalRangeNum,
1343 Msg: "of samples are from ranges that have range start after or too far from "
1344 "range end acrossing the unconditinal jmp.");
1345}
1346
1347void PerfScriptReader::parsePerfTraces() {
1348 // Parse perf traces and do aggregation.
1349 parseAndAggregateTrace();
1350 if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) {
1351 exitWithError(
1352 Message: "Kernel is requested, but no kernel is found in mmap events.");
1353 }
1354
1355 emitWarningSummary(Num: NumLeafExternalFrame, Total: NumTotalSample,
1356 Msg: "of samples have leaf external frame in call stack.");
1357 emitWarningSummary(Num: NumLeadingOutgoingLBR, Total: NumTotalSample,
1358 Msg: "of samples have leading external LBR.");
1359
1360 // Generate unsymbolized profile.
1361 warnTruncatedStack();
1362 warnInvalidRange();
1363 generateUnsymbolizedProfile();
1364 AggregatedSamples.clear();
1365
1366 if (SkipSymbolization)
1367 writeUnsymbolizedProfile(Filename: OutputFilename);
1368}
1369
1370SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups;
1371
1372} // end namespace sampleprof
1373} // end namespace llvm
1374