1 | //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements hierarchical time profiler. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Support/TimeProfiler.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/STLFunctionalExtras.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/ADT/StringMap.h" |
18 | #include "llvm/Support/JSON.h" |
19 | #include "llvm/Support/Path.h" |
20 | #include "llvm/Support/Process.h" |
21 | #include "llvm/Support/Threading.h" |
22 | #include <algorithm> |
23 | #include <cassert> |
24 | #include <chrono> |
25 | #include <memory> |
26 | #include <mutex> |
27 | #include <string> |
28 | #include <vector> |
29 | |
30 | using namespace llvm; |
31 | |
32 | namespace { |
33 | |
34 | using std::chrono::duration; |
35 | using std::chrono::duration_cast; |
36 | using std::chrono::microseconds; |
37 | using std::chrono::steady_clock; |
38 | using std::chrono::system_clock; |
39 | using std::chrono::time_point; |
40 | using std::chrono::time_point_cast; |
41 | |
42 | struct TimeTraceProfilerInstances { |
43 | std::mutex Lock; |
44 | std::vector<TimeTraceProfiler *> List; |
45 | }; |
46 | |
47 | TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { |
48 | static TimeTraceProfilerInstances Instances; |
49 | return Instances; |
50 | } |
51 | |
52 | } // anonymous namespace |
53 | |
54 | // Per Thread instance |
55 | static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; |
56 | |
57 | TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { |
58 | return TimeTraceProfilerInstance; |
59 | } |
60 | |
61 | namespace { |
62 | |
63 | using ClockType = steady_clock; |
64 | using TimePointType = time_point<ClockType>; |
65 | using DurationType = duration<ClockType::rep, ClockType::period>; |
66 | using CountAndDurationType = std::pair<size_t, DurationType>; |
67 | using NameAndCountAndDurationType = |
68 | std::pair<std::string, CountAndDurationType>; |
69 | |
70 | } // anonymous namespace |
71 | |
72 | /// Represents an open or completed time section entry to be captured. |
73 | struct llvm::TimeTraceProfilerEntry { |
74 | const TimePointType Start; |
75 | TimePointType End; |
76 | const std::string Name; |
77 | TimeTraceMetadata Metadata; |
78 | |
79 | const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent; |
80 | TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, |
81 | std::string &&Dt, TimeTraceEventType Et) |
82 | : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), |
83 | EventType(Et) { |
84 | Metadata.Detail = std::move(Dt); |
85 | } |
86 | |
87 | TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, |
88 | TimeTraceMetadata &&Mt, TimeTraceEventType Et) |
89 | : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), |
90 | Metadata(std::move(Mt)), EventType(Et) {} |
91 | |
92 | // Calculate timings for FlameGraph. Cast time points to microsecond precision |
93 | // rather than casting duration. This avoids truncation issues causing inner |
94 | // scopes overruning outer scopes. |
95 | ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { |
96 | return (time_point_cast<microseconds>(t: Start) - |
97 | time_point_cast<microseconds>(t: StartTime)) |
98 | .count(); |
99 | } |
100 | |
101 | ClockType::rep getFlameGraphDurUs() const { |
102 | return (time_point_cast<microseconds>(t: End) - |
103 | time_point_cast<microseconds>(t: Start)) |
104 | .count(); |
105 | } |
106 | }; |
107 | |
108 | // Represents a currently open (in-progress) time trace entry. InstantEvents |
109 | // that happen during an open event are associated with the duration of this |
110 | // parent event and they are dropped if parent duration is shorter than |
111 | // the granularity. |
112 | struct InProgressEntry { |
113 | TimeTraceProfilerEntry Event; |
114 | std::vector<TimeTraceProfilerEntry> InstantEvents; |
115 | |
116 | InProgressEntry(TimePointType S, TimePointType E, std::string N, |
117 | std::string Dt, TimeTraceEventType Et) |
118 | : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et), |
119 | InstantEvents() {} |
120 | |
121 | InProgressEntry(TimePointType S, TimePointType E, std::string N, |
122 | TimeTraceMetadata Mt, TimeTraceEventType Et) |
123 | : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et), |
124 | InstantEvents() {} |
125 | }; |
126 | |
127 | struct llvm::TimeTraceProfiler { |
128 | TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "" , |
129 | bool TimeTraceVerbose = false) |
130 | : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), |
131 | ProcName(ProcName), Pid(sys::Process::getProcessId()), |
132 | Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity), |
133 | TimeTraceVerbose(TimeTraceVerbose) { |
134 | llvm::get_thread_name(Name&: ThreadName); |
135 | } |
136 | |
137 | TimeTraceProfilerEntry * |
138 | begin(std::string Name, llvm::function_ref<std::string()> Detail, |
139 | TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { |
140 | assert(EventType != TimeTraceEventType::InstantEvent && |
141 | "Instant Events don't have begin and end." ); |
142 | Stack.emplace_back(Args: std::make_unique<InProgressEntry>( |
143 | args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Detail(), |
144 | args&: EventType)); |
145 | return &Stack.back()->Event; |
146 | } |
147 | |
148 | TimeTraceProfilerEntry * |
149 | begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata, |
150 | TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { |
151 | assert(EventType != TimeTraceEventType::InstantEvent && |
152 | "Instant Events don't have begin and end." ); |
153 | Stack.emplace_back(Args: std::make_unique<InProgressEntry>( |
154 | args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Metadata(), |
155 | args&: EventType)); |
156 | return &Stack.back()->Event; |
157 | } |
158 | |
159 | void insert(std::string Name, llvm::function_ref<std::string()> Detail) { |
160 | if (Stack.empty()) |
161 | return; |
162 | |
163 | Stack.back()->InstantEvents.emplace_back(args: TimeTraceProfilerEntry( |
164 | ClockType::now(), TimePointType(), std::move(Name), Detail(), |
165 | TimeTraceEventType::InstantEvent)); |
166 | } |
167 | |
168 | void end() { |
169 | assert(!Stack.empty() && "Must call begin() first" ); |
170 | end(E&: Stack.back()->Event); |
171 | } |
172 | |
173 | void end(TimeTraceProfilerEntry &E) { |
174 | assert(!Stack.empty() && "Must call begin() first" ); |
175 | E.End = ClockType::now(); |
176 | |
177 | // Calculate duration at full precision for overall counts. |
178 | DurationType Duration = E.End - E.Start; |
179 | |
180 | const auto *Iter = |
181 | llvm::find_if(Range&: Stack, P: [&](const std::unique_ptr<InProgressEntry> &Val) { |
182 | return &Val->Event == &E; |
183 | }); |
184 | assert(Iter != Stack.end() && "Event not in the Stack" ); |
185 | |
186 | // Only include sections longer or equal to TimeTraceGranularity msec. |
187 | if (duration_cast<microseconds>(d: Duration).count() >= TimeTraceGranularity) { |
188 | Entries.emplace_back(Args&: E); |
189 | for (auto &IE : Iter->get()->InstantEvents) { |
190 | Entries.emplace_back(Args&: IE); |
191 | } |
192 | } |
193 | |
194 | // Track total time taken by each "name", but only the topmost levels of |
195 | // them; e.g. if there's a template instantiation that instantiates other |
196 | // templates from within, we only want to add the topmost one. "topmost" |
197 | // happens to be the ones that don't have any currently open entries above |
198 | // itself. |
199 | if (llvm::none_of(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: Stack)), |
200 | P: [&](const std::unique_ptr<InProgressEntry> &Val) { |
201 | return Val->Event.Name == E.Name; |
202 | })) { |
203 | auto &CountAndTotal = CountAndTotalPerName[E.Name]; |
204 | CountAndTotal.first++; |
205 | CountAndTotal.second += Duration; |
206 | }; |
207 | |
208 | Stack.erase(CI: Iter); |
209 | } |
210 | |
211 | // Write events from this TimeTraceProfilerInstance and |
212 | // ThreadTimeTraceProfilerInstances. |
213 | void write(raw_pwrite_stream &OS) { |
214 | // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. |
215 | auto &Instances = getTimeTraceProfilerInstances(); |
216 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
217 | assert(Stack.empty() && |
218 | "All profiler sections should be ended when calling write" ); |
219 | assert(llvm::all_of(Instances.List, |
220 | [](const auto &TTP) { return TTP->Stack.empty(); }) && |
221 | "All profiler sections should be ended when calling write" ); |
222 | |
223 | json::OStream J(OS); |
224 | J.objectBegin(); |
225 | J.attributeBegin(Key: "traceEvents" ); |
226 | J.arrayBegin(); |
227 | |
228 | // Emit all events for the main flame graph. |
229 | auto writeEvent = [&](const auto &E, uint64_t Tid) { |
230 | auto StartUs = E.getFlameGraphStartUs(StartTime); |
231 | auto DurUs = E.getFlameGraphDurUs(); |
232 | |
233 | J.object(Contents: [&] { |
234 | J.attribute(Key: "pid" , Contents: Pid); |
235 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
236 | J.attribute(Key: "ts" , Contents: StartUs); |
237 | if (E.EventType == TimeTraceEventType::AsyncEvent) { |
238 | J.attribute(Key: "cat" , Contents: E.Name); |
239 | J.attribute(Key: "ph" , Contents: "b" ); |
240 | J.attribute(Key: "id" , Contents: 0); |
241 | } else if (E.EventType == TimeTraceEventType::CompleteEvent) { |
242 | J.attribute(Key: "ph" , Contents: "X" ); |
243 | J.attribute(Key: "dur" , Contents: DurUs); |
244 | } else { // instant event |
245 | assert(E.EventType == TimeTraceEventType::InstantEvent && |
246 | "InstantEvent expected" ); |
247 | J.attribute(Key: "ph" , Contents: "i" ); |
248 | } |
249 | J.attribute(Key: "name" , Contents: E.Name); |
250 | if (!E.Metadata.isEmpty()) { |
251 | J.attributeObject(Key: "args" , Contents: [&] { |
252 | if (!E.Metadata.Detail.empty()) |
253 | J.attribute(Key: "detail" , Contents: E.Metadata.Detail); |
254 | if (!E.Metadata.File.empty()) |
255 | J.attribute(Key: "file" , Contents: E.Metadata.File); |
256 | if (E.Metadata.Line > 0) |
257 | J.attribute(Key: "line" , Contents: E.Metadata.Line); |
258 | }); |
259 | } |
260 | }); |
261 | |
262 | if (E.EventType == TimeTraceEventType::AsyncEvent) { |
263 | J.object(Contents: [&] { |
264 | J.attribute(Key: "pid" , Contents: Pid); |
265 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
266 | J.attribute(Key: "ts" , Contents: StartUs + DurUs); |
267 | J.attribute(Key: "cat" , Contents: E.Name); |
268 | J.attribute(Key: "ph" , Contents: "e" ); |
269 | J.attribute(Key: "id" , Contents: 0); |
270 | J.attribute(Key: "name" , Contents: E.Name); |
271 | }); |
272 | } |
273 | }; |
274 | for (const TimeTraceProfilerEntry &E : Entries) |
275 | writeEvent(E, this->Tid); |
276 | for (const TimeTraceProfiler *TTP : Instances.List) |
277 | for (const TimeTraceProfilerEntry &E : TTP->Entries) |
278 | writeEvent(E, TTP->Tid); |
279 | |
280 | // Emit totals by section name as additional "thread" events, sorted from |
281 | // longest one. |
282 | // Find highest used thread id. |
283 | uint64_t MaxTid = this->Tid; |
284 | for (const TimeTraceProfiler *TTP : Instances.List) |
285 | MaxTid = std::max(a: MaxTid, b: TTP->Tid); |
286 | |
287 | // Combine all CountAndTotalPerName from threads into one. |
288 | StringMap<CountAndDurationType> AllCountAndTotalPerName; |
289 | auto combineStat = [&](const auto &Stat) { |
290 | StringRef Key = Stat.getKey(); |
291 | auto Value = Stat.getValue(); |
292 | auto &CountAndTotal = AllCountAndTotalPerName[Key]; |
293 | CountAndTotal.first += Value.first; |
294 | CountAndTotal.second += Value.second; |
295 | }; |
296 | for (const auto &Stat : CountAndTotalPerName) |
297 | combineStat(Stat); |
298 | for (const TimeTraceProfiler *TTP : Instances.List) |
299 | for (const auto &Stat : TTP->CountAndTotalPerName) |
300 | combineStat(Stat); |
301 | |
302 | std::vector<NameAndCountAndDurationType> SortedTotals; |
303 | SortedTotals.reserve(n: AllCountAndTotalPerName.size()); |
304 | for (const auto &Total : AllCountAndTotalPerName) |
305 | SortedTotals.emplace_back(args: std::string(Total.getKey()), args: Total.getValue()); |
306 | |
307 | llvm::sort(C&: SortedTotals, Comp: [](const NameAndCountAndDurationType &A, |
308 | const NameAndCountAndDurationType &B) { |
309 | return A.second.second > B.second.second; |
310 | }); |
311 | |
312 | // Report totals on separate threads of tracing file. |
313 | uint64_t TotalTid = MaxTid + 1; |
314 | for (const NameAndCountAndDurationType &Total : SortedTotals) { |
315 | auto DurUs = duration_cast<microseconds>(d: Total.second.second).count(); |
316 | auto Count = AllCountAndTotalPerName[Total.first].first; |
317 | |
318 | J.object(Contents: [&] { |
319 | J.attribute(Key: "pid" , Contents: Pid); |
320 | J.attribute(Key: "tid" , Contents: int64_t(TotalTid)); |
321 | J.attribute(Key: "ph" , Contents: "X" ); |
322 | J.attribute(Key: "ts" , Contents: 0); |
323 | J.attribute(Key: "dur" , Contents: DurUs); |
324 | J.attribute(Key: "name" , Contents: "Total " + Total.first); |
325 | J.attributeObject(Key: "args" , Contents: [&] { |
326 | J.attribute(Key: "count" , Contents: int64_t(Count)); |
327 | J.attribute(Key: "avg ms" , Contents: int64_t(DurUs / Count / 1000)); |
328 | }); |
329 | }); |
330 | |
331 | ++TotalTid; |
332 | } |
333 | |
334 | auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, |
335 | StringRef arg) { |
336 | J.object(Contents: [&] { |
337 | J.attribute(Key: "cat" , Contents: "" ); |
338 | J.attribute(Key: "pid" , Contents: Pid); |
339 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
340 | J.attribute(Key: "ts" , Contents: 0); |
341 | J.attribute(Key: "ph" , Contents: "M" ); |
342 | J.attribute(Key: "name" , Contents: Name); |
343 | J.attributeObject(Key: "args" , Contents: [&] { J.attribute(Key: "name" , Contents: arg); }); |
344 | }); |
345 | }; |
346 | |
347 | writeMetadataEvent("process_name" , Tid, ProcName); |
348 | writeMetadataEvent("thread_name" , Tid, ThreadName); |
349 | for (const TimeTraceProfiler *TTP : Instances.List) |
350 | writeMetadataEvent("thread_name" , TTP->Tid, TTP->ThreadName); |
351 | |
352 | J.arrayEnd(); |
353 | J.attributeEnd(); |
354 | |
355 | // Emit the absolute time when this TimeProfiler started. |
356 | // This can be used to combine the profiling data from |
357 | // multiple processes and preserve actual time intervals. |
358 | J.attribute(Key: "beginningOfTime" , |
359 | Contents: time_point_cast<microseconds>(t: BeginningOfTime) |
360 | .time_since_epoch() |
361 | .count()); |
362 | |
363 | J.objectEnd(); |
364 | } |
365 | |
366 | SmallVector<std::unique_ptr<InProgressEntry>, 16> Stack; |
367 | SmallVector<TimeTraceProfilerEntry, 128> Entries; |
368 | StringMap<CountAndDurationType> CountAndTotalPerName; |
369 | // System clock time when the session was begun. |
370 | const time_point<system_clock> BeginningOfTime; |
371 | // Profiling clock time when the session was begun. |
372 | const TimePointType StartTime; |
373 | const std::string ProcName; |
374 | const sys::Process::Pid Pid; |
375 | SmallString<0> ThreadName; |
376 | const uint64_t Tid; |
377 | |
378 | // Minimum time granularity (in microseconds) |
379 | const unsigned TimeTraceGranularity; |
380 | |
381 | // Make time trace capture verbose event details (e.g. source filenames). This |
382 | // can increase the size of the output by 2-3 times. |
383 | const bool TimeTraceVerbose; |
384 | }; |
385 | |
386 | bool llvm::isTimeTraceVerbose() { |
387 | return getTimeTraceProfilerInstance() && |
388 | getTimeTraceProfilerInstance()->TimeTraceVerbose; |
389 | } |
390 | |
391 | void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, |
392 | StringRef ProcName, |
393 | bool TimeTraceVerbose) { |
394 | assert(TimeTraceProfilerInstance == nullptr && |
395 | "Profiler should not be initialized" ); |
396 | TimeTraceProfilerInstance = new TimeTraceProfiler( |
397 | TimeTraceGranularity, llvm::sys::path::filename(path: ProcName), |
398 | TimeTraceVerbose); |
399 | } |
400 | |
401 | // Removes all TimeTraceProfilerInstances. |
402 | // Called from main thread. |
403 | void llvm::timeTraceProfilerCleanup() { |
404 | delete TimeTraceProfilerInstance; |
405 | TimeTraceProfilerInstance = nullptr; |
406 | |
407 | auto &Instances = getTimeTraceProfilerInstances(); |
408 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
409 | for (auto *TTP : Instances.List) |
410 | delete TTP; |
411 | Instances.List.clear(); |
412 | } |
413 | |
414 | // Finish TimeTraceProfilerInstance on a worker thread. |
415 | // This doesn't remove the instance, just moves the pointer to global vector. |
416 | void llvm::timeTraceProfilerFinishThread() { |
417 | auto &Instances = getTimeTraceProfilerInstances(); |
418 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
419 | Instances.List.push_back(x: TimeTraceProfilerInstance); |
420 | TimeTraceProfilerInstance = nullptr; |
421 | } |
422 | |
423 | void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { |
424 | assert(TimeTraceProfilerInstance != nullptr && |
425 | "Profiler object can't be null" ); |
426 | TimeTraceProfilerInstance->write(OS); |
427 | } |
428 | |
429 | Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, |
430 | StringRef FallbackFileName) { |
431 | assert(TimeTraceProfilerInstance != nullptr && |
432 | "Profiler object can't be null" ); |
433 | |
434 | std::string Path = PreferredFileName.str(); |
435 | if (Path.empty()) { |
436 | Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); |
437 | Path += ".time-trace" ; |
438 | } |
439 | |
440 | std::error_code EC; |
441 | raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); |
442 | if (EC) |
443 | return createStringError(EC, S: "Could not open " + Path); |
444 | |
445 | timeTraceProfilerWrite(OS); |
446 | return Error::success(); |
447 | } |
448 | |
449 | TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, |
450 | StringRef Detail) { |
451 | if (TimeTraceProfilerInstance != nullptr) |
452 | return TimeTraceProfilerInstance->begin( |
453 | Name: std::string(Name), Detail: [&]() { return std::string(Detail); }, |
454 | EventType: TimeTraceEventType::CompleteEvent); |
455 | return nullptr; |
456 | } |
457 | |
458 | TimeTraceProfilerEntry * |
459 | llvm::timeTraceProfilerBegin(StringRef Name, |
460 | llvm::function_ref<std::string()> Detail) { |
461 | if (TimeTraceProfilerInstance != nullptr) |
462 | return TimeTraceProfilerInstance->begin(Name: std::string(Name), Detail, |
463 | EventType: TimeTraceEventType::CompleteEvent); |
464 | return nullptr; |
465 | } |
466 | |
467 | TimeTraceProfilerEntry * |
468 | llvm::timeTraceProfilerBegin(StringRef Name, |
469 | llvm::function_ref<TimeTraceMetadata()> Metadata) { |
470 | if (TimeTraceProfilerInstance != nullptr) |
471 | return TimeTraceProfilerInstance->begin(Name: std::string(Name), Metadata, |
472 | EventType: TimeTraceEventType::CompleteEvent); |
473 | return nullptr; |
474 | } |
475 | |
476 | TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, |
477 | StringRef Detail) { |
478 | if (TimeTraceProfilerInstance != nullptr) |
479 | return TimeTraceProfilerInstance->begin( |
480 | Name: std::string(Name), Detail: [&]() { return std::string(Detail); }, |
481 | EventType: TimeTraceEventType::AsyncEvent); |
482 | return nullptr; |
483 | } |
484 | |
485 | void llvm::timeTraceAddInstantEvent(StringRef Name, |
486 | llvm::function_ref<std::string()> Detail) { |
487 | if (TimeTraceProfilerInstance != nullptr) |
488 | TimeTraceProfilerInstance->insert(Name: std::string(Name), Detail); |
489 | } |
490 | |
491 | void llvm::timeTraceProfilerEnd() { |
492 | if (TimeTraceProfilerInstance != nullptr) |
493 | TimeTraceProfilerInstance->end(); |
494 | } |
495 | |
496 | void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) { |
497 | if (TimeTraceProfilerInstance != nullptr) |
498 | TimeTraceProfilerInstance->end(E&: *E); |
499 | } |
500 | |