1 | //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "PerfReader.h" |
9 | #include "ProfileGenerator.h" |
10 | #include "llvm/ADT/SmallString.h" |
11 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
12 | #include "llvm/Support/FileSystem.h" |
13 | #include "llvm/Support/Process.h" |
14 | #include "llvm/Support/ToolOutputFile.h" |
15 | |
16 | #define DEBUG_TYPE "perf-reader" |
17 | |
18 | cl::opt<bool> SkipSymbolization("skip-symbolization" , |
19 | cl::desc("Dump the unsymbolized profile to the " |
20 | "output file. It will show unwinder " |
21 | "output for CS profile generation." )); |
22 | |
23 | static cl::opt<bool> ShowMmapEvents("show-mmap-events" , |
24 | cl::desc("Print binary load events." )); |
25 | |
26 | static cl::opt<bool> |
27 | UseOffset("use-offset" , cl::init(Val: true), |
28 | cl::desc("Work with `--skip-symbolization` or " |
29 | "`--unsymbolized-profile` to write/read the " |
30 | "offset instead of virtual address." )); |
31 | |
32 | static cl::opt<bool> UseLoadableSegmentAsBase( |
33 | "use-first-loadable-segment-as-base" , |
34 | cl::desc("Use first loadable segment address as base address " |
35 | "for offsets in unsymbolized profile. By default " |
36 | "first executable segment address is used" )); |
37 | |
38 | static cl::opt<bool> |
39 | IgnoreStackSamples("ignore-stack-samples" , |
40 | cl::desc("Ignore call stack samples for hybrid samples " |
41 | "and produce context-insensitive profile." )); |
42 | cl::opt<bool> ShowDetailedWarning("show-detailed-warning" , |
43 | cl::desc("Show detailed warning message." )); |
44 | cl::opt<bool> |
45 | LeadingIPOnly("leading-ip-only" , |
46 | cl::desc("Form a profile based only on sample IPs" )); |
47 | |
48 | static cl::list<std::string> PerfEventFilter( |
49 | "perf-event" , |
50 | cl::desc("Ignore samples not matching the given event names" )); |
51 | static cl::alias |
52 | PerfEventFilterPlural("perf-events" , cl::CommaSeparated, |
53 | cl::desc("Comma-delimited version of -perf-event" ), |
54 | cl::aliasopt(PerfEventFilter)); |
55 | |
56 | static cl::opt<uint64_t> |
57 | SamplePeriod("sample-period" , cl::init(Val: 1), |
58 | cl::desc("The sampling period (-c) used for perf data" )); |
59 | |
60 | extern cl::opt<std::string> PerfTraceFilename; |
61 | extern cl::opt<bool> ShowDisassemblyOnly; |
62 | extern cl::opt<bool> ShowSourceLocations; |
63 | extern cl::opt<std::string> OutputFilename; |
64 | |
65 | namespace llvm { |
66 | namespace sampleprof { |
67 | |
68 | void VirtualUnwinder::unwindCall(UnwindState &State) { |
69 | uint64_t Source = State.getCurrentLBRSource(); |
70 | auto *ParentFrame = State.getParentFrame(); |
71 | // The 2nd frame after leaf could be missing if stack sample is |
72 | // taken when IP is within prolog/epilog, as frame chain isn't |
73 | // setup yet. Fill in the missing frame in that case. |
74 | // TODO: Currently we just assume all the addr that can't match the |
75 | // 2nd frame is in prolog/epilog. In the future, we will switch to |
76 | // pro/epi tracker(Dwarf CFI) for the precise check. |
77 | if (ParentFrame == State.getDummyRootPtr() || |
78 | ParentFrame->Address != Source) { |
79 | State.switchToFrame(Address: Source); |
80 | if (ParentFrame != State.getDummyRootPtr()) { |
81 | if (Source == ExternalAddr) |
82 | NumMismatchedExtCallBranch++; |
83 | else |
84 | NumMismatchedProEpiBranch++; |
85 | } |
86 | } else { |
87 | State.popFrame(); |
88 | } |
89 | State.InstPtr.update(Addr: Source); |
90 | } |
91 | |
92 | void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { |
93 | InstructionPointer &IP = State.InstPtr; |
94 | uint64_t Target = State.getCurrentLBRTarget(); |
95 | uint64_t End = IP.Address; |
96 | |
97 | if (End == ExternalAddr && Target == ExternalAddr) { |
98 | // Filter out the case when leaf external frame matches the external LBR |
99 | // target, this is a valid state, it happens that the code run into external |
100 | // address then return back. The call frame under the external frame |
101 | // remains valid and can be unwound later, just skip recording this range. |
102 | NumPairedExtAddr++; |
103 | return; |
104 | } |
105 | |
106 | if (End == ExternalAddr || Target == ExternalAddr) { |
107 | // Range is invalid if only one point is external address. This means LBR |
108 | // traces contains a standalone external address failing to pair another |
109 | // one, likely due to interrupt jmp or broken perf script. Set the |
110 | // state to invalid. |
111 | NumUnpairedExtAddr++; |
112 | State.setInvalid(); |
113 | return; |
114 | } |
115 | |
116 | if (!isValidFallThroughRange(Start: Target, End, Binary)) { |
117 | // Skip unwinding the rest of LBR trace when a bogus range is seen. |
118 | State.setInvalid(); |
119 | return; |
120 | } |
121 | |
122 | if (Binary->usePseudoProbes()) { |
123 | // We don't need to top frame probe since it should be extracted |
124 | // from the range. |
125 | // The outcome of the virtual unwinding with pseudo probes is a |
126 | // map from a context key to the address range being unwound. |
127 | // This means basically linear unwinding is not needed for pseudo |
128 | // probes. The range will be simply recorded here and will be |
129 | // converted to a list of pseudo probes to report in ProfileGenerator. |
130 | State.getParentFrame()->recordRangeCount(Start: Target, End, Count: Repeat); |
131 | } else { |
132 | // Unwind linear execution part. |
133 | // Split and record the range by different inline context. For example: |
134 | // [0x01] ... main:1 # Target |
135 | // [0x02] ... main:2 |
136 | // [0x03] ... main:3 @ foo:1 |
137 | // [0x04] ... main:3 @ foo:2 |
138 | // [0x05] ... main:3 @ foo:3 |
139 | // [0x06] ... main:4 |
140 | // [0x07] ... main:5 # End |
141 | // It will be recorded: |
142 | // [main:*] : [0x06, 0x07], [0x01, 0x02] |
143 | // [main:3 @ foo:*] : [0x03, 0x05] |
144 | while (IP.Address > Target) { |
145 | uint64_t PrevIP = IP.Address; |
146 | IP.backward(); |
147 | // Break into segments for implicit call/return due to inlining |
148 | bool SameInlinee = Binary->inlineContextEqual(Add1: PrevIP, Add2: IP.Address); |
149 | if (!SameInlinee) { |
150 | State.switchToFrame(Address: PrevIP); |
151 | State.CurrentLeafFrame->recordRangeCount(Start: PrevIP, End, Count: Repeat); |
152 | End = IP.Address; |
153 | } |
154 | } |
155 | assert(IP.Address == Target && "The last one must be the target address." ); |
156 | // Record the remaining range, [0x01, 0x02] in the example |
157 | State.switchToFrame(Address: IP.Address); |
158 | State.CurrentLeafFrame->recordRangeCount(Start: IP.Address, End, Count: Repeat); |
159 | } |
160 | } |
161 | |
162 | void VirtualUnwinder::unwindReturn(UnwindState &State) { |
163 | // Add extra frame as we unwind through the return |
164 | const LBREntry &LBR = State.getCurrentLBR(); |
165 | uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr: LBR.Target); |
166 | State.switchToFrame(Address: CallAddr); |
167 | State.pushFrame(Address: LBR.Source); |
168 | State.InstPtr.update(Addr: LBR.Source); |
169 | } |
170 | |
171 | void VirtualUnwinder::unwindBranch(UnwindState &State) { |
172 | // TODO: Tolerate tail call for now, as we may see tail call from libraries. |
173 | // This is only for intra function branches, excluding tail calls. |
174 | uint64_t Source = State.getCurrentLBRSource(); |
175 | State.switchToFrame(Address: Source); |
176 | State.InstPtr.update(Addr: Source); |
177 | } |
178 | |
179 | std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { |
180 | std::shared_ptr<StringBasedCtxKey> KeyStr = |
181 | std::make_shared<StringBasedCtxKey>(); |
182 | KeyStr->Context = Binary->getExpandedContext(Stack, WasLeafInlined&: KeyStr->WasLeafInlined); |
183 | return KeyStr; |
184 | } |
185 | |
186 | std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() { |
187 | std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>(); |
188 | KeyStr->Context = Stack; |
189 | CSProfileGenerator::compressRecursionContext<uint64_t>(Context&: KeyStr->Context); |
190 | CSProfileGenerator::trimContext<uint64_t>(S&: KeyStr->Context); |
191 | return KeyStr; |
192 | } |
193 | |
194 | template <typename T> |
195 | void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, |
196 | T &Stack) { |
197 | if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) |
198 | return; |
199 | |
200 | std::shared_ptr<ContextKey> Key = Stack.getContextKey(); |
201 | if (Key == nullptr) |
202 | return; |
203 | auto Ret = CtxCounterMap->emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
204 | SampleCounter &SCounter = Ret.first->second; |
205 | for (auto &I : Cur->RangeSamples) |
206 | SCounter.recordRangeCount(Start: std::get<0>(t&: I), End: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
207 | |
208 | for (auto &I : Cur->BranchSamples) |
209 | SCounter.recordBranchCount(Source: std::get<0>(t&: I), Target: std::get<1>(t&: I), Repeat: std::get<2>(t&: I)); |
210 | } |
211 | |
212 | template <typename T> |
213 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
214 | UnwindState::ProfiledFrame *Cur, T &Stack) { |
215 | if (!Cur->isDummyRoot()) { |
216 | // Truncate the context for external frame since this isn't a real call |
217 | // context the compiler will see. |
218 | if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) { |
219 | // Process truncated context |
220 | // Start a new traversal ignoring its bottom context |
221 | T EmptyStack(Binary); |
222 | collectSamplesFromFrame(Cur, EmptyStack); |
223 | for (const auto &Item : Cur->Children) { |
224 | collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); |
225 | } |
226 | |
227 | // Keep note of untracked call site and deduplicate them |
228 | // for warning later. |
229 | if (!Cur->isLeafFrame()) |
230 | UntrackedCallsites.insert(x: Cur->Address); |
231 | |
232 | return; |
233 | } |
234 | } |
235 | |
236 | collectSamplesFromFrame(Cur, Stack); |
237 | // Process children frame |
238 | for (const auto &Item : Cur->Children) { |
239 | collectSamplesFromFrameTrie(Item.second.get(), Stack); |
240 | } |
241 | // Recover the call stack |
242 | Stack.popFrame(); |
243 | } |
244 | |
245 | void VirtualUnwinder::collectSamplesFromFrameTrie( |
246 | UnwindState::ProfiledFrame *Cur) { |
247 | if (Binary->usePseudoProbes()) { |
248 | AddressStack Stack(Binary); |
249 | collectSamplesFromFrameTrie<AddressStack>(Cur, Stack); |
250 | } else { |
251 | FrameStack Stack(Binary); |
252 | collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); |
253 | } |
254 | } |
255 | |
256 | void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, |
257 | UnwindState &State, uint64_t Repeat) { |
258 | if (Branch.Target == ExternalAddr) |
259 | return; |
260 | |
261 | // Record external-to-internal pattern on the trie root, it later can be |
262 | // used for generating head samples. |
263 | if (Branch.Source == ExternalAddr) { |
264 | State.getDummyRootPtr()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
265 | Count: Repeat); |
266 | return; |
267 | } |
268 | |
269 | if (Binary->usePseudoProbes()) { |
270 | // Same as recordRangeCount, We don't need to top frame probe since we will |
271 | // extract it from branch's source address |
272 | State.getParentFrame()->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
273 | Count: Repeat); |
274 | } else { |
275 | State.CurrentLeafFrame->recordBranchCount(Source: Branch.Source, Target: Branch.Target, |
276 | Count: Repeat); |
277 | } |
278 | } |
279 | |
280 | bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { |
281 | // Capture initial state as starting point for unwinding. |
282 | UnwindState State(Sample, Binary); |
283 | |
284 | // Sanity check - making sure leaf of LBR aligns with leaf of stack sample |
285 | // Stack sample sometimes can be unreliable, so filter out bogus ones. |
286 | if (!State.validateInitialState()) |
287 | return false; |
288 | |
289 | NumTotalBranches += State.LBRStack.size(); |
290 | // Now process the LBR samples in parrallel with stack sample |
291 | // Note that we do not reverse the LBR entry order so we can |
292 | // unwind the sample stack as we walk through LBR entries. |
293 | while (State.hasNextLBR()) { |
294 | State.checkStateConsistency(); |
295 | |
296 | // Do not attempt linear unwind for the leaf range as it's incomplete. |
297 | if (!State.IsLastLBR()) { |
298 | // Unwind implicit calls/returns from inlining, along the linear path, |
299 | // break into smaller sub section each with its own calling context. |
300 | unwindLinear(State, Repeat); |
301 | } |
302 | |
303 | // Save the LBR branch before it gets unwound. |
304 | const LBREntry &Branch = State.getCurrentLBR(); |
305 | if (isCallState(State)) { |
306 | // Unwind calls - we know we encountered call if LBR overlaps with |
307 | // transition between leaf the 2nd frame. Note that for calls that |
308 | // were not in the original stack sample, we should have added the |
309 | // extra frame when processing the return paired with this call. |
310 | unwindCall(State); |
311 | } else if (isReturnState(State)) { |
312 | // Unwind returns - check whether the IP is indeed at a return |
313 | // instruction |
314 | unwindReturn(State); |
315 | } else if (isValidState(State)) { |
316 | // Unwind branches |
317 | unwindBranch(State); |
318 | } else { |
319 | // Skip unwinding the rest of LBR trace. Reset the stack and update the |
320 | // state so that the rest of the trace can still be processed as if they |
321 | // do not have stack samples. |
322 | State.clearCallStack(); |
323 | State.InstPtr.update(Addr: State.getCurrentLBRSource()); |
324 | State.pushFrame(Address: State.InstPtr.Address); |
325 | } |
326 | |
327 | State.advanceLBR(); |
328 | // Record `branch` with calling context after unwinding. |
329 | recordBranchCount(Branch, State, Repeat); |
330 | } |
331 | // As samples are aggregated on trie, record them into counter map |
332 | collectSamplesFromFrameTrie(Cur: State.getDummyRootPtr()); |
333 | |
334 | return true; |
335 | } |
336 | |
337 | std::unique_ptr<PerfReaderBase> |
338 | PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, |
339 | std::optional<int32_t> PIDFilter) { |
340 | std::unique_ptr<PerfReaderBase> PerfReader; |
341 | |
342 | if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { |
343 | PerfReader.reset( |
344 | p: new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); |
345 | return PerfReader; |
346 | } |
347 | |
348 | // For perf data input, we need to convert them into perf script first. |
349 | // If this is a kernel perf file, there is no need for retrieving PIDs. |
350 | if (PerfInput.Format == PerfFormat::PerfData) |
351 | PerfInput = PerfScriptReader::convertPerfDataToTrace( |
352 | Binary, SkipPID: Binary->isKernel(), File&: PerfInput, PIDFilter); |
353 | |
354 | assert((PerfInput.Format == PerfFormat::PerfScript) && |
355 | "Should be a perfscript!" ); |
356 | |
357 | PerfInput.Content = |
358 | PerfScriptReader::checkPerfScriptType(FileName: PerfInput.InputFile); |
359 | if (PerfInput.Content == PerfContent::LBRStack) { |
360 | PerfReader.reset( |
361 | p: new HybridPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
362 | } else if (PerfInput.Content == PerfContent::LBR) { |
363 | PerfReader.reset(p: new LBRPerfReader(Binary, PerfInput.InputFile, PIDFilter)); |
364 | } else { |
365 | exitWithError(Message: "Unsupported perfscript!" ); |
366 | } |
367 | |
368 | return PerfReader; |
369 | } |
370 | |
371 | PerfInputFile |
372 | PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, |
373 | PerfInputFile &File, |
374 | std::optional<int32_t> PIDFilter) { |
375 | StringRef PerfData = File.InputFile; |
376 | // Run perf script to retrieve PIDs matching binary we're interested in. |
377 | auto PerfExecutable = sys::Process::FindInEnvPath(EnvName: "PATH" , FileName: "perf" ); |
378 | if (!PerfExecutable) { |
379 | exitWithError(Message: "Perf not found." ); |
380 | } |
381 | std::string PerfPath = *PerfExecutable; |
382 | SmallString<128> PerfTraceFile; |
383 | sys::fs::createUniquePath(Model: "perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp" , |
384 | ResultPath&: PerfTraceFile, /*MakeAbsolute=*/true); |
385 | std::string ErrorFile = std::string(PerfTraceFile) + ".err" ; |
386 | std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin |
387 | StringRef(PerfTraceFile), // Stdout |
388 | StringRef(ErrorFile)}; // Stderr |
389 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: PerfTraceFile); |
390 | PerfScriptReader::TempFileCleanups.emplace_back(Args&: ErrorFile); |
391 | |
392 | std::string PIDs; |
393 | if (!SkipPID) { |
394 | StringRef ScriptMMapArgs[] = {PerfPath, "script" , "--show-mmap-events" , |
395 | "-F" , "comm,pid" , "-i" , |
396 | PerfData}; |
397 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptMMapArgs, Env: std::nullopt, Redirects); |
398 | |
399 | // Collect the PIDs |
400 | TraceStream TraceIt(PerfTraceFile); |
401 | std::unordered_set<int32_t> PIDSet; |
402 | while (!TraceIt.isAtEoF()) { |
403 | MMapEvent MMap; |
404 | if (isMMapEvent(Line: TraceIt.getCurrentLine()) && |
405 | extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) { |
406 | auto It = PIDSet.emplace(args&: MMap.PID); |
407 | if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) { |
408 | if (!PIDs.empty()) { |
409 | PIDs.append(s: "," ); |
410 | } |
411 | PIDs.append(str: utostr(X: MMap.PID)); |
412 | } |
413 | } |
414 | TraceIt.advance(); |
415 | } |
416 | |
417 | if (PIDs.empty()) { |
418 | exitWithError(Message: "No relevant mmap event is found in perf data." ); |
419 | } |
420 | } |
421 | |
422 | // If filtering by events was requested, additionally request the "event" |
423 | // field. |
424 | const std::string FieldList = |
425 | PerfEventFilter.empty() ? "ip,brstack" : "event,ip,brstack" ; |
426 | |
427 | // Run perf script again to retrieve events for PIDs collected above |
428 | SmallVector<StringRef, 8> ScriptSampleArgs; |
429 | ScriptSampleArgs.push_back(Elt: PerfPath); |
430 | ScriptSampleArgs.push_back(Elt: "script" ); |
431 | ScriptSampleArgs.push_back(Elt: "--show-mmap-events" ); |
432 | ScriptSampleArgs.push_back(Elt: "-F" ); |
433 | ScriptSampleArgs.push_back(Elt: FieldList); |
434 | ScriptSampleArgs.push_back(Elt: "-i" ); |
435 | ScriptSampleArgs.push_back(Elt: PerfData); |
436 | if (!PIDs.empty()) { |
437 | ScriptSampleArgs.push_back(Elt: "--pid" ); |
438 | ScriptSampleArgs.push_back(Elt: PIDs); |
439 | } |
440 | sys::ExecuteAndWait(Program: PerfPath, Args: ScriptSampleArgs, Env: std::nullopt, Redirects); |
441 | |
442 | return {.InputFile: std::string(PerfTraceFile), .Format: PerfFormat::PerfScript, |
443 | .Content: PerfContent::UnknownContent}; |
444 | } |
445 | |
446 | static StringRef filename(StringRef Path, bool UseBackSlash) { |
447 | llvm::sys::path::Style PathStyle = |
448 | UseBackSlash ? llvm::sys::path::Style::windows_backslash |
449 | : llvm::sys::path::Style::native; |
450 | StringRef FileName = llvm::sys::path::filename(path: Path, style: PathStyle); |
451 | |
452 | // In case this file use \r\n as newline. |
453 | if (UseBackSlash && FileName.back() == '\r') |
454 | return FileName.drop_back(); |
455 | |
456 | return FileName; |
457 | } |
458 | |
459 | void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { |
460 | // Drop the event which doesn't belong to user-provided binary |
461 | StringRef BinaryName = filename(Path: Event.BinaryPath, UseBackSlash: Binary->isCOFF()); |
462 | bool IsKernel = Binary->isKernel(); |
463 | if (!IsKernel && Binary->getName() != BinaryName) |
464 | return; |
465 | if (IsKernel && !Binary->isKernelImageName(BinaryName)) |
466 | return; |
467 | |
468 | // Drop the event if process does not match pid filter |
469 | if (PIDFilter && Event.PID != *PIDFilter) |
470 | return; |
471 | |
472 | // Drop the event if its image is loaded at the same address |
473 | if (Event.Address == Binary->getBaseAddress()) { |
474 | Binary->setIsLoadedByMMap(true); |
475 | return; |
476 | } |
477 | |
478 | if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) { |
479 | // A binary image could be unloaded and then reloaded at different |
480 | // place, so update binary load address. |
481 | // Only update for the first executable segment and assume all other |
482 | // segments are loaded at consecutive memory addresses, which is the case on |
483 | // X64. |
484 | Binary->setBaseAddress(Event.Address); |
485 | Binary->setIsLoadedByMMap(true); |
486 | } else { |
487 | // Verify segments are loaded consecutively. |
488 | const auto &Offsets = Binary->getTextSegmentOffsets(); |
489 | auto It = llvm::lower_bound(Range: Offsets, Value: Event.Offset); |
490 | if (It != Offsets.end() && *It == Event.Offset) { |
491 | // The event is for loading a separate executable segment. |
492 | auto I = std::distance(first: Offsets.begin(), last: It); |
493 | const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); |
494 | if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != |
495 | Event.Address - Binary->getBaseAddress()) |
496 | exitWithError(Message: "Executable segments not loaded consecutively" ); |
497 | } else { |
498 | if (It == Offsets.begin()) |
499 | exitWithError(Message: "File offset not found" ); |
500 | else { |
501 | // Find the segment the event falls in. A large segment could be loaded |
502 | // via multiple mmap calls with consecutive memory addresses. |
503 | --It; |
504 | assert(*It < Event.Offset); |
505 | if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) |
506 | exitWithError(Message: "Segment not loaded by consecutive mmaps" ); |
507 | } |
508 | } |
509 | } |
510 | } |
511 | |
512 | static std::string getContextKeyStr(ContextKey *K, |
513 | const ProfiledBinary *Binary) { |
514 | if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(Val: K)) { |
515 | return SampleContext::getContextString(Context: CtxKey->Context); |
516 | } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: K)) { |
517 | std::ostringstream OContextStr; |
518 | for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { |
519 | if (OContextStr.str().size()) |
520 | OContextStr << " @ " ; |
521 | uint64_t Address = CtxKey->Context[I]; |
522 | if (UseOffset) { |
523 | if (UseLoadableSegmentAsBase) |
524 | Address -= Binary->getFirstLoadableAddress(); |
525 | else |
526 | Address -= Binary->getPreferredBaseAddress(); |
527 | } |
528 | OContextStr << "0x" |
529 | << utohexstr(X: Address, |
530 | /*LowerCase=*/true); |
531 | } |
532 | return OContextStr.str(); |
533 | } else { |
534 | llvm_unreachable("unexpected key type" ); |
535 | } |
536 | } |
537 | |
538 | void HybridPerfReader::unwindSamples() { |
539 | VirtualUnwinder Unwinder(&SampleCounters, Binary); |
540 | for (const auto &Item : AggregatedSamples) { |
541 | const PerfSample *Sample = Item.first.getPtr(); |
542 | Unwinder.unwind(Sample, Repeat: Item.second); |
543 | } |
544 | |
545 | // Warn about untracked frames due to missing probes. |
546 | if (ShowDetailedWarning) { |
547 | for (auto Address : Unwinder.getUntrackedCallsites()) |
548 | WithColor::warning() << "Profile context truncated due to missing probe " |
549 | << "for call instruction at " |
550 | << format(Fmt: "0x%" PRIx64, Vals: Address) << "\n" ; |
551 | } |
552 | |
553 | emitWarningSummary(Num: Unwinder.getUntrackedCallsites().size(), |
554 | Total: SampleCounters.size(), |
555 | Msg: "of profiled contexts are truncated due to missing probe " |
556 | "for call instruction." ); |
557 | |
558 | emitWarningSummary( |
559 | Num: Unwinder.NumMismatchedExtCallBranch, Total: Unwinder.NumTotalBranches, |
560 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
561 | "stack, likely due to unwinding error of external frame." ); |
562 | |
563 | emitWarningSummary(Num: Unwinder.NumPairedExtAddr * 2, Total: Unwinder.NumTotalBranches, |
564 | Msg: "of branches containing paired external address." ); |
565 | |
566 | emitWarningSummary(Num: Unwinder.NumUnpairedExtAddr, Total: Unwinder.NumTotalBranches, |
567 | Msg: "of branches containing external address but doesn't have " |
568 | "another external address to pair, likely due to " |
569 | "interrupt jmp or broken perf script." ); |
570 | |
571 | emitWarningSummary( |
572 | Num: Unwinder.NumMismatchedProEpiBranch, Total: Unwinder.NumTotalBranches, |
573 | Msg: "of branches'source is a call instruction but doesn't match call frame " |
574 | "stack, likely due to frame in prolog/epilog." ); |
575 | |
576 | emitWarningSummary(Num: Unwinder.NumMissingExternalFrame, |
577 | Total: Unwinder.NumExtCallBranch, |
578 | Msg: "of artificial call branches but doesn't have an external " |
579 | "frame to match." ); |
580 | } |
581 | |
582 | bool PerfScriptReader::(TraceStream &TraceIt, |
583 | SmallVectorImpl<LBREntry> &LBRStack) { |
584 | // The raw format of LBR stack is like: |
585 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
586 | // ... 0x4005c8/0x4005dc/P/-/-/0 |
587 | // It's in FIFO order and separated by whitespace. |
588 | SmallVector<StringRef, 32> Records; |
589 | TraceIt.getCurrentLine().rtrim().split(A&: Records, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
590 | auto WarnInvalidLBR = [](TraceStream &TraceIt) { |
591 | WithColor::warning() << "Invalid address in LBR record at line " |
592 | << TraceIt.getLineNumber() << ": " |
593 | << TraceIt.getCurrentLine() << "\n" ; |
594 | }; |
595 | |
596 | // Skip the leading instruction pointer. |
597 | size_t Index = 0; |
598 | |
599 | StringRef EventName; |
600 | // Skip a perf event name. This may or may not exist. |
601 | if (Records.size() > Index && Records[Index].ends_with(Suffix: ":" )) { |
602 | EventName = Records[Index].ltrim().rtrim(Char: ':'); |
603 | Index++; |
604 | |
605 | if (PerfEventFilter.empty()) { |
606 | WithColor::warning() << "No --perf-event filter was specified, but an " |
607 | "\"event\" field was found in line " |
608 | << TraceIt.getLineNumber() << ": " |
609 | << TraceIt.getCurrentLine() << "\n" ; |
610 | } else if (std::find(first: PerfEventFilter.begin(), last: PerfEventFilter.end(), |
611 | val: EventName) == PerfEventFilter.end()) { |
612 | TraceIt.advance(); |
613 | return false; |
614 | } |
615 | |
616 | } else if (!PerfEventFilter.empty()) { |
617 | WithColor::warning() << "A --perf-event filter was specified, but no " |
618 | "\"event\" field found in line " |
619 | << TraceIt.getLineNumber() << ": " |
620 | << TraceIt.getCurrentLine() << "\n" ; |
621 | } |
622 | |
623 | uint64_t LeadingAddr; |
624 | if (Records.size() > Index && !Records[Index].contains(C: '/')) { |
625 | if (Records[Index].getAsInteger(Radix: 16, Result&: LeadingAddr)) { |
626 | WarnInvalidLBR(TraceIt); |
627 | TraceIt.advance(); |
628 | return false; |
629 | } |
630 | Index++; |
631 | } |
632 | |
633 | // We assume that if we saw an event name we also saw a leading addr. |
634 | // In other words, LeadingAddr is set if Index is 1 or 2. |
635 | if (LeadingIPOnly && Index > 0) { |
636 | // Form a profile only from the sample IP. Do not assume an LBR stack |
637 | // follows, and ignore it if it does. |
638 | uint64_t SampleIP = Binary->canonicalizeVirtualAddress(Address: LeadingAddr); |
639 | bool SampleIPIsInternal = Binary->addressIsCode(Address: SampleIP); |
640 | if (SampleIPIsInternal) { |
641 | // Form a half LBR entry where the sample IP is the destination. |
642 | LBRStack.emplace_back(Args: LBREntry(SampleIP, SampleIP)); |
643 | } |
644 | TraceIt.advance(); |
645 | return !LBRStack.empty(); |
646 | } |
647 | |
648 | // Now extract LBR samples - note that we do not reverse the |
649 | // LBR entry order so we can unwind the sample stack as we walk |
650 | // through LBR entries. |
651 | while (Index < Records.size()) { |
652 | auto &Token = Records[Index++]; |
653 | if (Token.size() == 0) |
654 | continue; |
655 | |
656 | SmallVector<StringRef, 8> Addresses; |
657 | Token.split(A&: Addresses, Separator: "/" ); |
658 | uint64_t Src; |
659 | uint64_t Dst; |
660 | |
661 | // Stop at broken LBR records. |
662 | if (Addresses.size() < 2 || Addresses[0].substr(Start: 2).getAsInteger(Radix: 16, Result&: Src) || |
663 | Addresses[1].substr(Start: 2).getAsInteger(Radix: 16, Result&: Dst)) { |
664 | WarnInvalidLBR(TraceIt); |
665 | break; |
666 | } |
667 | |
668 | // Canonicalize to use preferred load address as base address. |
669 | Src = Binary->canonicalizeVirtualAddress(Address: Src); |
670 | Dst = Binary->canonicalizeVirtualAddress(Address: Dst); |
671 | bool SrcIsInternal = Binary->addressIsCode(Address: Src); |
672 | bool DstIsInternal = Binary->addressIsCode(Address: Dst); |
673 | if (!SrcIsInternal) |
674 | Src = ExternalAddr; |
675 | if (!DstIsInternal) |
676 | Dst = ExternalAddr; |
677 | // Filter external-to-external case to reduce LBR trace size. |
678 | if (!SrcIsInternal && !DstIsInternal) |
679 | continue; |
680 | |
681 | LBRStack.emplace_back(Args: LBREntry(Src, Dst)); |
682 | } |
683 | TraceIt.advance(); |
684 | return !LBRStack.empty(); |
685 | } |
686 | |
687 | bool PerfScriptReader::(TraceStream &TraceIt, |
688 | SmallVectorImpl<uint64_t> &CallStack) { |
689 | // The raw format of call stack is like: |
690 | // 4005dc # leaf frame |
691 | // 400634 |
692 | // 400684 # root frame |
693 | // It's in bottom-up order with each frame in one line. |
694 | |
695 | // Extract stack frames from sample |
696 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
697 | StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); |
698 | uint64_t FrameAddr = 0; |
699 | if (FrameStr.getAsInteger(Radix: 16, Result&: FrameAddr)) { |
700 | // We might parse a non-perf sample line like empty line and comments, |
701 | // skip it |
702 | TraceIt.advance(); |
703 | return false; |
704 | } |
705 | TraceIt.advance(); |
706 | |
707 | FrameAddr = Binary->canonicalizeVirtualAddress(Address: FrameAddr); |
708 | // Currently intermixed frame from different binaries is not supported. |
709 | if (!Binary->addressIsCode(Address: FrameAddr)) { |
710 | if (CallStack.empty()) |
711 | NumLeafExternalFrame++; |
712 | // Push a special value(ExternalAddr) for the external frames so that |
713 | // unwinder can still work on this with artificial Call/Return branch. |
714 | // After unwinding, the context will be truncated for external frame. |
715 | // Also deduplicate the consecutive external addresses. |
716 | if (CallStack.empty() || CallStack.back() != ExternalAddr) |
717 | CallStack.emplace_back(Args: ExternalAddr); |
718 | continue; |
719 | } |
720 | |
721 | // We need to translate return address to call address for non-leaf frames. |
722 | if (!CallStack.empty()) { |
723 | auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); |
724 | if (!CallAddr) { |
725 | // Stop at an invalid return address caused by bad unwinding. This could |
726 | // happen to frame-pointer-based unwinding and the callee functions that |
727 | // do not have the frame pointer chain set up. |
728 | InvalidReturnAddresses.insert(x: FrameAddr); |
729 | break; |
730 | } |
731 | FrameAddr = CallAddr; |
732 | } |
733 | |
734 | CallStack.emplace_back(Args&: FrameAddr); |
735 | } |
736 | |
737 | // Strip out the bottom external addr. |
738 | if (CallStack.size() > 1 && CallStack.back() == ExternalAddr) |
739 | CallStack.pop_back(); |
740 | |
741 | // Skip other unrelated line, find the next valid LBR line |
742 | // Note that even for empty call stack, we should skip the address at the |
743 | // bottom, otherwise the following pass may generate a truncated callstack |
744 | while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
745 | TraceIt.advance(); |
746 | } |
747 | // Filter out broken stack sample. We may not have complete frame info |
748 | // if sample end up in prolog/epilog, the result is dangling context not |
749 | // connected to entry point. This should be relatively rare thus not much |
750 | // impact on overall profile quality. However we do want to filter them |
751 | // out to reduce the number of different calling contexts. One instance |
752 | // of such case - when sample landed in prolog/epilog, somehow stack |
753 | // walking will be broken in an unexpected way that higher frames will be |
754 | // missing. |
755 | return !CallStack.empty() && |
756 | !Binary->addressInPrologEpilog(Address: CallStack.front()); |
757 | } |
758 | |
759 | void PerfScriptReader::warnIfMissingMMap() { |
760 | if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { |
761 | WithColor::warning() << "No relevant mmap event is matched for " |
762 | << Binary->getName() |
763 | << ", will use preferred address (" |
764 | << format(Fmt: "0x%" PRIx64, |
765 | Vals: Binary->getPreferredBaseAddress()) |
766 | << ") as the base loading address!\n" ; |
767 | // Avoid redundant warning, only warn at the first unmatched sample. |
768 | Binary->setMissingMMapWarned(true); |
769 | } |
770 | } |
771 | |
772 | void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
773 | // The raw hybird sample started with call stack in FILO order and followed |
774 | // intermediately by LBR sample |
775 | // e.g. |
776 | // 4005dc # call stack leaf |
777 | // 400634 |
778 | // 400684 # call stack root |
779 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
780 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
781 | // |
782 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
783 | #ifndef NDEBUG |
784 | Sample->Linenum = TraceIt.getLineNumber(); |
785 | #endif |
786 | // Parsing call stack and populate into PerfSample.CallStack |
787 | if (!extractCallstack(TraceIt, CallStack&: Sample->CallStack)) { |
788 | // Skip the next LBR line matched current call stack |
789 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) |
790 | TraceIt.advance(); |
791 | return; |
792 | } |
793 | |
794 | warnIfMissingMMap(); |
795 | |
796 | if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(Prefix: " 0x" )) { |
797 | // Parsing LBR stack and populate into PerfSample.LBRStack |
798 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
799 | if (IgnoreStackSamples) { |
800 | Sample->CallStack.clear(); |
801 | } else { |
802 | // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR |
803 | // ranges |
804 | Sample->CallStack.front() = Sample->LBRStack[0].Target; |
805 | } |
806 | // Record samples by aggregation |
807 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
808 | } |
809 | } else { |
810 | // LBR sample is encoded in single line after stack sample |
811 | exitWithError(Message: "'Hybrid perf sample is corrupted, No LBR sample line" ); |
812 | } |
813 | } |
814 | |
815 | void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) { |
816 | std::error_code EC; |
817 | raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); |
818 | if (EC) |
819 | exitWithError(EC, Whence: Filename); |
820 | writeUnsymbolizedProfile(OS); |
821 | } |
822 | |
823 | // Use ordered map to make the output deterministic |
824 | using OrderedCounterForPrint = std::map<std::string, SampleCounter *>; |
825 | |
826 | void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { |
827 | OrderedCounterForPrint OrderedCounters; |
828 | for (auto &CI : SampleCounters) { |
829 | OrderedCounters[getContextKeyStr(K: CI.first.getPtr(), Binary)] = &CI.second; |
830 | } |
831 | |
832 | auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator, |
833 | uint32_t Indent) { |
834 | OS.indent(NumSpaces: Indent); |
835 | OS << Counter.size() << "\n" ; |
836 | for (auto &I : Counter) { |
837 | uint64_t Start = I.first.first; |
838 | uint64_t End = I.first.second; |
839 | |
840 | if (UseOffset) { |
841 | if (UseLoadableSegmentAsBase) { |
842 | Start -= Binary->getFirstLoadableAddress(); |
843 | End -= Binary->getFirstLoadableAddress(); |
844 | } else { |
845 | Start -= Binary->getPreferredBaseAddress(); |
846 | End -= Binary->getPreferredBaseAddress(); |
847 | } |
848 | } |
849 | |
850 | OS.indent(NumSpaces: Indent); |
851 | OS << Twine::utohexstr(Val: Start) << Separator << Twine::utohexstr(Val: End) << ":" |
852 | << I.second << "\n" ; |
853 | } |
854 | }; |
855 | |
856 | for (auto &CI : OrderedCounters) { |
857 | uint32_t Indent = 0; |
858 | if (ProfileIsCS) { |
859 | // Context string key |
860 | OS << "[" << CI.first << "]\n" ; |
861 | Indent = 2; |
862 | } |
863 | |
864 | SampleCounter &Counter = *CI.second; |
865 | SCounterPrinter(Counter.RangeCounter, "-" , Indent); |
866 | SCounterPrinter(Counter.BranchCounter, "->" , Indent); |
867 | } |
868 | } |
869 | |
870 | // Format of input: |
871 | // number of entries in RangeCounter |
872 | // from_1-to_1:count_1 |
873 | // from_2-to_2:count_2 |
874 | // ...... |
875 | // from_n-to_n:count_n |
876 | // number of entries in BranchCounter |
877 | // src_1->dst_1:count_1 |
878 | // src_2->dst_2:count_2 |
879 | // ...... |
880 | // src_n->dst_n:count_n |
881 | void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, |
882 | SampleCounter &SCounters) { |
883 | auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) { |
884 | std::string Msg = TraceIt.isAtEoF() |
885 | ? "Invalid raw profile!" |
886 | : "Invalid raw profile at line " + |
887 | Twine(TraceIt.getLineNumber()).str() + ": " + |
888 | TraceIt.getCurrentLine().str(); |
889 | exitWithError(Message: Msg); |
890 | }; |
891 | auto ReadNumber = [&](uint64_t &Num) { |
892 | if (TraceIt.isAtEoF()) |
893 | exitWithErrorForTraceLine(TraceIt); |
894 | if (TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 10, Result&: Num)) |
895 | exitWithErrorForTraceLine(TraceIt); |
896 | TraceIt.advance(); |
897 | }; |
898 | |
899 | auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) { |
900 | uint64_t Num = 0; |
901 | ReadNumber(Num); |
902 | while (Num--) { |
903 | if (TraceIt.isAtEoF()) |
904 | exitWithErrorForTraceLine(TraceIt); |
905 | StringRef Line = TraceIt.getCurrentLine().ltrim(); |
906 | |
907 | uint64_t Count = 0; |
908 | auto LineSplit = Line.split(Separator: ":" ); |
909 | if (LineSplit.second.empty() || LineSplit.second.getAsInteger(Radix: 10, Result&: Count)) |
910 | exitWithErrorForTraceLine(TraceIt); |
911 | |
912 | uint64_t Source = 0; |
913 | uint64_t Target = 0; |
914 | auto Range = LineSplit.first.split(Separator); |
915 | if (Range.second.empty() || Range.first.getAsInteger(Radix: 16, Result&: Source) || |
916 | Range.second.getAsInteger(Radix: 16, Result&: Target)) |
917 | exitWithErrorForTraceLine(TraceIt); |
918 | |
919 | if (UseOffset) { |
920 | if (UseLoadableSegmentAsBase) { |
921 | Source += Binary->getFirstLoadableAddress(); |
922 | Target += Binary->getFirstLoadableAddress(); |
923 | } else { |
924 | Source += Binary->getPreferredBaseAddress(); |
925 | Target += Binary->getPreferredBaseAddress(); |
926 | } |
927 | } |
928 | |
929 | Counter[{Source, Target}] += Count; |
930 | TraceIt.advance(); |
931 | } |
932 | }; |
933 | |
934 | ReadCounter(SCounters.RangeCounter, "-" ); |
935 | ReadCounter(SCounters.BranchCounter, "->" ); |
936 | } |
937 | |
938 | void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { |
939 | TraceStream TraceIt(FileName); |
940 | while (!TraceIt.isAtEoF()) { |
941 | std::shared_ptr<StringBasedCtxKey> Key = |
942 | std::make_shared<StringBasedCtxKey>(); |
943 | StringRef Line = TraceIt.getCurrentLine(); |
944 | // Read context stack for CS profile. |
945 | if (Line.starts_with(Prefix: "[" )) { |
946 | ProfileIsCS = true; |
947 | auto I = ContextStrSet.insert(x: Line.str()); |
948 | SampleContext::createCtxVectorFromStr(ContextStr: *I.first, Context&: Key->Context); |
949 | TraceIt.advance(); |
950 | } |
951 | auto Ret = |
952 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
953 | readSampleCounters(TraceIt, SCounters&: Ret.first->second); |
954 | } |
955 | } |
956 | |
957 | void UnsymbolizedProfileReader::parsePerfTraces() { |
958 | readUnsymbolizedProfile(FileName: PerfTraceFile); |
959 | } |
960 | |
961 | void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, |
962 | uint64_t Repeat) { |
963 | SampleCounter &Counter = SampleCounters.begin()->second; |
964 | uint64_t EndAddress = 0; |
965 | |
966 | if (LeadingIPOnly) { |
967 | assert(Sample->LBRStack.size() == 1 && |
968 | "Expected only half LBR entries for ip-only mode" ); |
969 | const LBREntry &LBR = *(Sample->LBRStack.begin()); |
970 | uint64_t SourceAddress = LBR.Source; |
971 | uint64_t TargetAddress = LBR.Target; |
972 | if (SourceAddress == TargetAddress && |
973 | Binary->addressIsCode(Address: TargetAddress)) { |
974 | Counter.recordRangeCount(Start: SourceAddress, End: TargetAddress, Repeat); |
975 | } |
976 | return; |
977 | } |
978 | |
979 | for (const LBREntry &LBR : Sample->LBRStack) { |
980 | uint64_t SourceAddress = LBR.Source; |
981 | uint64_t TargetAddress = LBR.Target; |
982 | |
983 | // Record the branch if its SourceAddress is external. It can be the case an |
984 | // external source call an internal function, later this branch will be used |
985 | // to generate the function's head sample. |
986 | if (Binary->addressIsCode(Address: TargetAddress)) { |
987 | Counter.recordBranchCount(Source: SourceAddress, Target: TargetAddress, Repeat); |
988 | } |
989 | |
990 | // If this not the first LBR, update the range count between TO of current |
991 | // LBR and FROM of next LBR. |
992 | uint64_t StartAddress = TargetAddress; |
993 | if (Binary->addressIsCode(Address: StartAddress) && |
994 | Binary->addressIsCode(Address: EndAddress) && |
995 | isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) |
996 | Counter.recordRangeCount(Start: StartAddress, End: EndAddress, Repeat); |
997 | EndAddress = SourceAddress; |
998 | } |
999 | } |
1000 | |
1001 | void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { |
1002 | std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); |
1003 | // Parsing LBR stack and populate into PerfSample.LBRStack |
1004 | if (extractLBRStack(TraceIt, LBRStack&: Sample->LBRStack)) { |
1005 | warnIfMissingMMap(); |
1006 | // Record LBR only samples by aggregation |
1007 | // If a sampling period is given we can adjust the magnitude of sample |
1008 | // counts to estimate the absolute magnitute. |
1009 | if (SamplePeriod.getNumOccurrences()) { |
1010 | Count *= SamplePeriod; |
1011 | // If counts are LBR-based, as opposed to IP-based, then the magnitude is |
1012 | // now amplified by roughly the LBR stack size. By adjusting this down, we |
1013 | // can produce LBR-based and IP-based profiles with comparable magnitudes. |
1014 | if (!LeadingIPOnly && Sample->LBRStack.size() > 1) |
1015 | Count /= (Sample->LBRStack.size() - 1); |
1016 | } |
1017 | AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; |
1018 | } |
1019 | } |
1020 | |
1021 | void PerfScriptReader::generateUnsymbolizedProfile() { |
1022 | // There is no context for LBR only sample, so initialize one entry with |
1023 | // fake "empty" context key. |
1024 | assert(SampleCounters.empty() && |
1025 | "Sample counter map should be empty before raw profile generation" ); |
1026 | std::shared_ptr<StringBasedCtxKey> Key = |
1027 | std::make_shared<StringBasedCtxKey>(); |
1028 | SampleCounters.emplace(args: Hashable<ContextKey>(Key), args: SampleCounter()); |
1029 | for (const auto &Item : AggregatedSamples) { |
1030 | const PerfSample *Sample = Item.first.getPtr(); |
1031 | computeCounterFromLBR(Sample, Repeat: Item.second); |
1032 | } |
1033 | } |
1034 | |
1035 | uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) { |
1036 | // The aggregated count is optional, so do not skip the line and return 1 if |
1037 | // it's unmatched |
1038 | uint64_t Count = 1; |
1039 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: Count)) |
1040 | TraceIt.advance(); |
1041 | return Count; |
1042 | } |
1043 | |
1044 | void PerfScriptReader::parseSample(TraceStream &TraceIt) { |
1045 | NumTotalSample++; |
1046 | uint64_t Count = parseAggregatedCount(TraceIt); |
1047 | assert(Count >= 1 && "Aggregated count should be >= 1!" ); |
1048 | parseSample(TraceIt, Count); |
1049 | } |
1050 | |
1051 | bool PerfScriptReader::(ProfiledBinary *Binary, |
1052 | StringRef Line, |
1053 | MMapEvent &MMap) { |
1054 | // Parse a MMap2 line like: |
1055 | // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 |
1056 | // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so |
1057 | constexpr static const char *const MMap2Pattern = |
1058 | "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: " |
1059 | "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " |
1060 | "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)" ; |
1061 | // Parse a MMap line like |
1062 | // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \ |
1063 | // 0xffffffff81e00000]: x [kernel.kallsyms]_text |
1064 | constexpr static const char *const MMapPattern = |
1065 | "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: " |
1066 | "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " |
1067 | "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)" ; |
1068 | // Field 0 - whole line |
1069 | // Field 1 - PID |
1070 | // Field 2 - base address |
1071 | // Field 3 - mmapped size |
1072 | // Field 4 - page offset |
1073 | // Field 5 - binary path |
1074 | enum EventIndex { |
1075 | WHOLE_LINE = 0, |
1076 | PID = 1, |
1077 | MMAPPED_ADDRESS = 2, |
1078 | MMAPPED_SIZE = 3, |
1079 | PAGE_OFFSET = 4, |
1080 | BINARY_PATH = 5 |
1081 | }; |
1082 | |
1083 | bool R = false; |
1084 | SmallVector<StringRef, 6> Fields; |
1085 | if (Line.contains(Other: "PERF_RECORD_MMAP2 " )) { |
1086 | Regex RegMmap2(MMap2Pattern); |
1087 | R = RegMmap2.match(String: Line, Matches: &Fields); |
1088 | } else if (Line.contains(Other: "PERF_RECORD_MMAP " )) { |
1089 | Regex RegMmap(MMapPattern); |
1090 | R = RegMmap.match(String: Line, Matches: &Fields); |
1091 | } else |
1092 | llvm_unreachable("unexpected MMAP event entry" ); |
1093 | |
1094 | if (!R) { |
1095 | std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n" ; |
1096 | WithColor::warning() << WarningMsg; |
1097 | return false; |
1098 | } |
1099 | long long MMapPID = 0; |
1100 | getAsSignedInteger(Str: Fields[PID], Radix: 10, Result&: MMapPID); |
1101 | MMap.PID = MMapPID; |
1102 | Fields[MMAPPED_ADDRESS].getAsInteger(Radix: 0, Result&: MMap.Address); |
1103 | Fields[MMAPPED_SIZE].getAsInteger(Radix: 0, Result&: MMap.Size); |
1104 | Fields[PAGE_OFFSET].getAsInteger(Radix: 0, Result&: MMap.Offset); |
1105 | MMap.BinaryPath = Fields[BINARY_PATH]; |
1106 | if (ShowMmapEvents) { |
1107 | outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " |
1108 | << format(Fmt: "0x%" PRIx64 ":" , Vals: MMap.Address) << " \n" ; |
1109 | } |
1110 | |
1111 | StringRef BinaryName = filename(Path: MMap.BinaryPath, UseBackSlash: Binary->isCOFF()); |
1112 | if (Binary->isKernel()) { |
1113 | return Binary->isKernelImageName(BinaryName); |
1114 | } |
1115 | return Binary->getName() == BinaryName; |
1116 | } |
1117 | |
1118 | void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) { |
1119 | MMapEvent MMap; |
1120 | if (extractMMapEventForBinary(Binary, Line: TraceIt.getCurrentLine(), MMap)) |
1121 | updateBinaryAddress(Event: MMap); |
1122 | TraceIt.advance(); |
1123 | } |
1124 | |
1125 | void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { |
1126 | if (isMMapEvent(Line: TraceIt.getCurrentLine())) |
1127 | parseMMapEvent(TraceIt); |
1128 | else |
1129 | parseSample(TraceIt); |
1130 | } |
1131 | |
1132 | void PerfScriptReader::parseAndAggregateTrace() { |
1133 | // Trace line iterator |
1134 | TraceStream TraceIt(PerfTraceFile); |
1135 | while (!TraceIt.isAtEoF()) |
1136 | parseEventOrSample(TraceIt); |
1137 | } |
1138 | |
1139 | // A LBR sample is like: |
1140 | // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... |
1141 | // A heuristic for fast detection by checking whether a |
1142 | // leading " 0x" and the '/' exist. |
1143 | bool PerfScriptReader::isLBRSample(StringRef Line) { |
1144 | // Skip the leading instruction pointer |
1145 | SmallVector<StringRef, 32> Records; |
1146 | Line.trim().split(A&: Records, Separator: " " , MaxSplit: 2, KeepEmpty: false); |
1147 | if (Records.size() < 2) |
1148 | return false; |
1149 | // Check if there is an event name before the leading IP. |
1150 | // If there is, it will be in Records[0]. To skip it, we'll re-split on |
1151 | // Records[1], which should contain the rest of the line. |
1152 | if (Records[0].contains(Other: ":" )) { |
1153 | // If so, consume the event name and continue processing the rest of the |
1154 | // line. |
1155 | StringRef IPAndLBR = Records[1].ltrim(); |
1156 | Records.clear(); |
1157 | IPAndLBR.split(A&: Records, Separator: " " , MaxSplit: 2, KeepEmpty: false); |
1158 | if (Records.size() < 2) |
1159 | return false; |
1160 | } |
1161 | if (Records[1].starts_with(Prefix: "0x" ) && Records[1].contains(C: '/')) |
1162 | return true; |
1163 | return false; |
1164 | } |
1165 | |
1166 | bool PerfScriptReader::isMMapEvent(StringRef Line) { |
1167 | // Short cut to avoid string find is possible. |
1168 | if (Line.empty() || Line.size() < 50) |
1169 | return false; |
1170 | |
1171 | if (std::isdigit(Line[0])) |
1172 | return false; |
1173 | |
1174 | // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of |
1175 | // the line for ` perf script --show-mmap-events -i ...` |
1176 | return Line.contains(Other: "PERF_RECORD_MMAP" ); |
1177 | } |
1178 | |
1179 | // The raw hybird sample is like |
1180 | // e.g. |
1181 | // 4005dc # call stack leaf |
1182 | // 400634 |
1183 | // 400684 # call stack root |
1184 | // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... |
1185 | // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries |
1186 | // Determine the perfscript contains hybrid samples(call stack + LBRs) by |
1187 | // checking whether there is a non-empty call stack immediately followed by |
1188 | // a LBR sample |
1189 | PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { |
1190 | TraceStream TraceIt(FileName); |
1191 | uint64_t FrameAddr = 0; |
1192 | while (!TraceIt.isAtEoF()) { |
1193 | // Skip the aggregated count |
1194 | if (!TraceIt.getCurrentLine().getAsInteger(Radix: 10, Result&: FrameAddr)) |
1195 | TraceIt.advance(); |
1196 | |
1197 | // Detect sample with call stack |
1198 | int32_t Count = 0; |
1199 | while (!TraceIt.isAtEoF() && |
1200 | !TraceIt.getCurrentLine().ltrim().getAsInteger(Radix: 16, Result&: FrameAddr)) { |
1201 | Count++; |
1202 | TraceIt.advance(); |
1203 | } |
1204 | if (!TraceIt.isAtEoF()) { |
1205 | if (isLBRSample(Line: TraceIt.getCurrentLine())) { |
1206 | if (Count > 0) |
1207 | return PerfContent::LBRStack; |
1208 | else |
1209 | return PerfContent::LBR; |
1210 | } |
1211 | TraceIt.advance(); |
1212 | } |
1213 | } |
1214 | |
1215 | exitWithError(Message: "Invalid perf script input!" ); |
1216 | return PerfContent::UnknownContent; |
1217 | } |
1218 | |
1219 | void HybridPerfReader::generateUnsymbolizedProfile() { |
1220 | ProfileIsCS = !IgnoreStackSamples; |
1221 | if (ProfileIsCS) |
1222 | unwindSamples(); |
1223 | else |
1224 | PerfScriptReader::generateUnsymbolizedProfile(); |
1225 | } |
1226 | |
1227 | void PerfScriptReader::warnTruncatedStack() { |
1228 | if (ShowDetailedWarning) { |
1229 | for (auto Address : InvalidReturnAddresses) { |
1230 | WithColor::warning() |
1231 | << "Truncated stack sample due to invalid return address at " |
1232 | << format(Fmt: "0x%" PRIx64, Vals: Address) |
1233 | << ", likely caused by frame pointer omission\n" ; |
1234 | } |
1235 | } |
1236 | emitWarningSummary( |
1237 | Num: InvalidReturnAddresses.size(), Total: AggregatedSamples.size(), |
1238 | Msg: "of truncated stack samples due to invalid return address, " |
1239 | "likely caused by frame pointer omission." ); |
1240 | } |
1241 | |
1242 | void PerfScriptReader::warnInvalidRange() { |
1243 | std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t, |
1244 | pair_hash<uint64_t, uint64_t>> |
1245 | Ranges; |
1246 | |
1247 | for (const auto &Item : AggregatedSamples) { |
1248 | const PerfSample *Sample = Item.first.getPtr(); |
1249 | uint64_t Count = Item.second; |
1250 | uint64_t EndAddress = 0; |
1251 | |
1252 | if (LeadingIPOnly) { |
1253 | assert(Sample->LBRStack.size() == 1 && |
1254 | "Expected only half LBR entries for ip-only mode" ); |
1255 | const LBREntry &LBR = *(Sample->LBRStack.begin()); |
1256 | if (LBR.Source == LBR.Target && LBR.Source != ExternalAddr) { |
1257 | // This is an leading-addr-only profile. |
1258 | Ranges[{LBR.Source, LBR.Source}] += Count; |
1259 | } |
1260 | continue; |
1261 | } |
1262 | |
1263 | for (const LBREntry &LBR : Sample->LBRStack) { |
1264 | uint64_t SourceAddress = LBR.Source; |
1265 | uint64_t StartAddress = LBR.Target; |
1266 | if (EndAddress != 0) |
1267 | Ranges[{StartAddress, EndAddress}] += Count; |
1268 | EndAddress = SourceAddress; |
1269 | } |
1270 | } |
1271 | |
1272 | if (Ranges.empty()) { |
1273 | WithColor::warning() << "No samples in perf script!\n" ; |
1274 | return; |
1275 | } |
1276 | |
1277 | auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress, |
1278 | StringRef Msg) { |
1279 | if (!ShowDetailedWarning) |
1280 | return; |
1281 | WithColor::warning() << "[" << format(Fmt: "%8" PRIx64, Vals: StartAddress) << "," |
1282 | << format(Fmt: "%8" PRIx64, Vals: EndAddress) << "]: " << Msg |
1283 | << "\n" ; |
1284 | }; |
1285 | |
1286 | const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " |
1287 | "likely due to profile and binary mismatch." ; |
1288 | const char *DanglingRangeMsg = "Range does not belong to any functions, " |
1289 | "likely from PLT, .init or .fini section." ; |
1290 | const char *RangeCrossFuncMsg = |
1291 | "Fall through range should not cross function boundaries, likely due to " |
1292 | "profile and binary mismatch." ; |
1293 | const char *BogusRangeMsg = "Range start is after or too far from range end." ; |
1294 | |
1295 | uint64_t TotalRangeNum = 0; |
1296 | uint64_t InstNotBoundary = 0; |
1297 | uint64_t UnmatchedRange = 0; |
1298 | uint64_t RangeCrossFunc = 0; |
1299 | uint64_t BogusRange = 0; |
1300 | |
1301 | for (auto &I : Ranges) { |
1302 | uint64_t StartAddress = I.first.first; |
1303 | uint64_t EndAddress = I.first.second; |
1304 | TotalRangeNum += I.second; |
1305 | |
1306 | if (!Binary->addressIsCode(Address: StartAddress) && |
1307 | !Binary->addressIsCode(Address: EndAddress)) |
1308 | continue; |
1309 | |
1310 | // IP samples can indicate activity on individual instructions rather than |
1311 | // basic blocks/edges. In this mode, don't warn if sampled IPs aren't |
1312 | // branches. |
1313 | if (!LeadingIPOnly) |
1314 | if (!Binary->addressIsCode(Address: StartAddress) || |
1315 | !Binary->addressIsTransfer(Address: EndAddress)) { |
1316 | InstNotBoundary += I.second; |
1317 | WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); |
1318 | } |
1319 | |
1320 | auto *FRange = Binary->findFuncRange(Address: StartAddress); |
1321 | if (!FRange) { |
1322 | UnmatchedRange += I.second; |
1323 | WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); |
1324 | continue; |
1325 | } |
1326 | |
1327 | if (EndAddress >= FRange->EndAddress) { |
1328 | RangeCrossFunc += I.second; |
1329 | WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); |
1330 | } |
1331 | |
1332 | if (Binary->addressIsCode(Address: StartAddress) && |
1333 | Binary->addressIsCode(Address: EndAddress) && |
1334 | !isValidFallThroughRange(Start: StartAddress, End: EndAddress, Binary)) { |
1335 | BogusRange += I.second; |
1336 | WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg); |
1337 | } |
1338 | } |
1339 | |
1340 | emitWarningSummary( |
1341 | Num: InstNotBoundary, Total: TotalRangeNum, |
1342 | Msg: "of samples are from ranges that are not on instruction boundary." ); |
1343 | emitWarningSummary( |
1344 | Num: UnmatchedRange, Total: TotalRangeNum, |
1345 | Msg: "of samples are from ranges that do not belong to any functions." ); |
1346 | emitWarningSummary( |
1347 | Num: RangeCrossFunc, Total: TotalRangeNum, |
1348 | Msg: "of samples are from ranges that do cross function boundaries." ); |
1349 | emitWarningSummary( |
1350 | Num: BogusRange, Total: TotalRangeNum, |
1351 | Msg: "of samples are from ranges that have range start after or too far from " |
1352 | "range end acrossing the unconditinal jmp." ); |
1353 | } |
1354 | |
1355 | void PerfScriptReader::parsePerfTraces() { |
1356 | // Parse perf traces and do aggregation. |
1357 | parseAndAggregateTrace(); |
1358 | if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) { |
1359 | exitWithError( |
1360 | Message: "Kernel is requested, but no kernel is found in mmap events." ); |
1361 | } |
1362 | |
1363 | emitWarningSummary(Num: NumLeafExternalFrame, Total: NumTotalSample, |
1364 | Msg: "of samples have leaf external frame in call stack." ); |
1365 | emitWarningSummary(Num: NumLeadingOutgoingLBR, Total: NumTotalSample, |
1366 | Msg: "of samples have leading external LBR." ); |
1367 | |
1368 | // Generate unsymbolized profile. |
1369 | warnTruncatedStack(); |
1370 | warnInvalidRange(); |
1371 | generateUnsymbolizedProfile(); |
1372 | AggregatedSamples.clear(); |
1373 | |
1374 | if (SkipSymbolization) |
1375 | writeUnsymbolizedProfile(Filename: OutputFilename); |
1376 | } |
1377 | |
1378 | SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups; |
1379 | |
1380 | } // end namespace sampleprof |
1381 | } // end namespace llvm |
1382 | |