1 | //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements hierarchical time profiler. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Support/TimeProfiler.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/STLFunctionalExtras.h" |
16 | #include "llvm/ADT/StringMap.h" |
17 | #include "llvm/Support/JSON.h" |
18 | #include "llvm/Support/Path.h" |
19 | #include "llvm/Support/Process.h" |
20 | #include "llvm/Support/Threading.h" |
21 | #include <algorithm> |
22 | #include <cassert> |
23 | #include <chrono> |
24 | #include <memory> |
25 | #include <mutex> |
26 | #include <string> |
27 | #include <vector> |
28 | |
29 | using namespace llvm; |
30 | |
31 | namespace { |
32 | |
33 | using std::chrono::duration; |
34 | using std::chrono::duration_cast; |
35 | using std::chrono::microseconds; |
36 | using std::chrono::steady_clock; |
37 | using std::chrono::system_clock; |
38 | using std::chrono::time_point; |
39 | using std::chrono::time_point_cast; |
40 | |
41 | struct TimeTraceProfilerInstances { |
42 | std::mutex Lock; |
43 | std::vector<TimeTraceProfiler *> List; |
44 | }; |
45 | |
46 | TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { |
47 | static TimeTraceProfilerInstances Instances; |
48 | return Instances; |
49 | } |
50 | |
51 | } // anonymous namespace |
52 | |
53 | // Per Thread instance |
54 | static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; |
55 | |
56 | TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { |
57 | return TimeTraceProfilerInstance; |
58 | } |
59 | |
60 | namespace { |
61 | |
62 | using ClockType = steady_clock; |
63 | using TimePointType = time_point<ClockType>; |
64 | using DurationType = duration<ClockType::rep, ClockType::period>; |
65 | using CountAndDurationType = std::pair<size_t, DurationType>; |
66 | using NameAndCountAndDurationType = |
67 | std::pair<std::string, CountAndDurationType>; |
68 | |
69 | } // anonymous namespace |
70 | |
71 | /// Represents an open or completed time section entry to be captured. |
72 | struct llvm::TimeTraceProfilerEntry { |
73 | const TimePointType Start; |
74 | TimePointType End; |
75 | const std::string Name; |
76 | TimeTraceMetadata Metadata; |
77 | |
78 | const bool AsyncEvent = false; |
79 | TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, |
80 | std::string &&Dt, bool Ae) |
81 | : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), |
82 | AsyncEvent(Ae) { |
83 | Metadata.Detail = std::move(Dt); |
84 | } |
85 | |
86 | TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, |
87 | TimeTraceMetadata &&Mt, bool Ae) |
88 | : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), |
89 | Metadata(std::move(Mt)), AsyncEvent(Ae) {} |
90 | |
91 | // Calculate timings for FlameGraph. Cast time points to microsecond precision |
92 | // rather than casting duration. This avoids truncation issues causing inner |
93 | // scopes overruning outer scopes. |
94 | ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { |
95 | return (time_point_cast<microseconds>(t: Start) - |
96 | time_point_cast<microseconds>(t: StartTime)) |
97 | .count(); |
98 | } |
99 | |
100 | ClockType::rep getFlameGraphDurUs() const { |
101 | return (time_point_cast<microseconds>(t: End) - |
102 | time_point_cast<microseconds>(t: Start)) |
103 | .count(); |
104 | } |
105 | }; |
106 | |
107 | struct llvm::TimeTraceProfiler { |
108 | TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "" , |
109 | bool TimeTraceVerbose = false) |
110 | : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), |
111 | ProcName(ProcName), Pid(sys::Process::getProcessId()), |
112 | Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity), |
113 | TimeTraceVerbose(TimeTraceVerbose) { |
114 | llvm::get_thread_name(Name&: ThreadName); |
115 | } |
116 | |
117 | TimeTraceProfilerEntry *begin(std::string Name, |
118 | llvm::function_ref<std::string()> Detail, |
119 | bool AsyncEvent = false) { |
120 | Stack.emplace_back(Args: std::make_unique<TimeTraceProfilerEntry>( |
121 | args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Detail(), |
122 | args&: AsyncEvent)); |
123 | return Stack.back().get(); |
124 | } |
125 | |
126 | TimeTraceProfilerEntry * |
127 | begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata, |
128 | bool AsyncEvent = false) { |
129 | Stack.emplace_back(Args: std::make_unique<TimeTraceProfilerEntry>( |
130 | args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Metadata(), |
131 | args&: AsyncEvent)); |
132 | return Stack.back().get(); |
133 | } |
134 | |
135 | void end() { |
136 | assert(!Stack.empty() && "Must call begin() first" ); |
137 | end(E&: *Stack.back()); |
138 | } |
139 | |
140 | void end(TimeTraceProfilerEntry &E) { |
141 | assert(!Stack.empty() && "Must call begin() first" ); |
142 | E.End = ClockType::now(); |
143 | |
144 | // Calculate duration at full precision for overall counts. |
145 | DurationType Duration = E.End - E.Start; |
146 | |
147 | // Only include sections longer or equal to TimeTraceGranularity msec. |
148 | if (duration_cast<microseconds>(d: Duration).count() >= TimeTraceGranularity) |
149 | Entries.emplace_back(Args&: E); |
150 | |
151 | // Track total time taken by each "name", but only the topmost levels of |
152 | // them; e.g. if there's a template instantiation that instantiates other |
153 | // templates from within, we only want to add the topmost one. "topmost" |
154 | // happens to be the ones that don't have any currently open entries above |
155 | // itself. |
156 | if (llvm::none_of(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: Stack)), |
157 | P: [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { |
158 | return Val->Name == E.Name; |
159 | })) { |
160 | auto &CountAndTotal = CountAndTotalPerName[E.Name]; |
161 | CountAndTotal.first++; |
162 | CountAndTotal.second += Duration; |
163 | }; |
164 | |
165 | llvm::erase_if(C&: Stack, |
166 | P: [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { |
167 | return Val.get() == &E; |
168 | }); |
169 | } |
170 | |
171 | // Write events from this TimeTraceProfilerInstance and |
172 | // ThreadTimeTraceProfilerInstances. |
173 | void write(raw_pwrite_stream &OS) { |
174 | // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. |
175 | auto &Instances = getTimeTraceProfilerInstances(); |
176 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
177 | assert(Stack.empty() && |
178 | "All profiler sections should be ended when calling write" ); |
179 | assert(llvm::all_of(Instances.List, |
180 | [](const auto &TTP) { return TTP->Stack.empty(); }) && |
181 | "All profiler sections should be ended when calling write" ); |
182 | |
183 | json::OStream J(OS); |
184 | J.objectBegin(); |
185 | J.attributeBegin(Key: "traceEvents" ); |
186 | J.arrayBegin(); |
187 | |
188 | // Emit all events for the main flame graph. |
189 | auto writeEvent = [&](const auto &E, uint64_t Tid) { |
190 | auto StartUs = E.getFlameGraphStartUs(StartTime); |
191 | auto DurUs = E.getFlameGraphDurUs(); |
192 | |
193 | J.object(Contents: [&] { |
194 | J.attribute(Key: "pid" , Contents: Pid); |
195 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
196 | J.attribute(Key: "ts" , Contents: StartUs); |
197 | if (E.AsyncEvent) { |
198 | J.attribute(Key: "cat" , Contents: E.Name); |
199 | J.attribute(Key: "ph" , Contents: "b" ); |
200 | J.attribute(Key: "id" , Contents: 0); |
201 | } else { |
202 | J.attribute(Key: "ph" , Contents: "X" ); |
203 | J.attribute(Key: "dur" , Contents: DurUs); |
204 | } |
205 | J.attribute(Key: "name" , Contents: E.Name); |
206 | if (!E.Metadata.isEmpty()) { |
207 | J.attributeObject(Key: "args" , Contents: [&] { |
208 | if (!E.Metadata.Detail.empty()) |
209 | J.attribute(Key: "detail" , Contents: E.Metadata.Detail); |
210 | if (!E.Metadata.File.empty()) |
211 | J.attribute(Key: "file" , Contents: E.Metadata.File); |
212 | if (E.Metadata.Line > 0) |
213 | J.attribute(Key: "line" , Contents: E.Metadata.Line); |
214 | }); |
215 | } |
216 | }); |
217 | |
218 | if (E.AsyncEvent) { |
219 | J.object(Contents: [&] { |
220 | J.attribute(Key: "pid" , Contents: Pid); |
221 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
222 | J.attribute(Key: "ts" , Contents: StartUs + DurUs); |
223 | J.attribute(Key: "cat" , Contents: E.Name); |
224 | J.attribute(Key: "ph" , Contents: "e" ); |
225 | J.attribute(Key: "id" , Contents: 0); |
226 | J.attribute(Key: "name" , Contents: E.Name); |
227 | }); |
228 | } |
229 | }; |
230 | for (const TimeTraceProfilerEntry &E : Entries) |
231 | writeEvent(E, this->Tid); |
232 | for (const TimeTraceProfiler *TTP : Instances.List) |
233 | for (const TimeTraceProfilerEntry &E : TTP->Entries) |
234 | writeEvent(E, TTP->Tid); |
235 | |
236 | // Emit totals by section name as additional "thread" events, sorted from |
237 | // longest one. |
238 | // Find highest used thread id. |
239 | uint64_t MaxTid = this->Tid; |
240 | for (const TimeTraceProfiler *TTP : Instances.List) |
241 | MaxTid = std::max(a: MaxTid, b: TTP->Tid); |
242 | |
243 | // Combine all CountAndTotalPerName from threads into one. |
244 | StringMap<CountAndDurationType> AllCountAndTotalPerName; |
245 | auto combineStat = [&](const auto &Stat) { |
246 | StringRef Key = Stat.getKey(); |
247 | auto Value = Stat.getValue(); |
248 | auto &CountAndTotal = AllCountAndTotalPerName[Key]; |
249 | CountAndTotal.first += Value.first; |
250 | CountAndTotal.second += Value.second; |
251 | }; |
252 | for (const auto &Stat : CountAndTotalPerName) |
253 | combineStat(Stat); |
254 | for (const TimeTraceProfiler *TTP : Instances.List) |
255 | for (const auto &Stat : TTP->CountAndTotalPerName) |
256 | combineStat(Stat); |
257 | |
258 | std::vector<NameAndCountAndDurationType> SortedTotals; |
259 | SortedTotals.reserve(n: AllCountAndTotalPerName.size()); |
260 | for (const auto &Total : AllCountAndTotalPerName) |
261 | SortedTotals.emplace_back(args: std::string(Total.getKey()), args: Total.getValue()); |
262 | |
263 | llvm::sort(C&: SortedTotals, Comp: [](const NameAndCountAndDurationType &A, |
264 | const NameAndCountAndDurationType &B) { |
265 | return A.second.second > B.second.second; |
266 | }); |
267 | |
268 | // Report totals on separate threads of tracing file. |
269 | uint64_t TotalTid = MaxTid + 1; |
270 | for (const NameAndCountAndDurationType &Total : SortedTotals) { |
271 | auto DurUs = duration_cast<microseconds>(d: Total.second.second).count(); |
272 | auto Count = AllCountAndTotalPerName[Total.first].first; |
273 | |
274 | J.object(Contents: [&] { |
275 | J.attribute(Key: "pid" , Contents: Pid); |
276 | J.attribute(Key: "tid" , Contents: int64_t(TotalTid)); |
277 | J.attribute(Key: "ph" , Contents: "X" ); |
278 | J.attribute(Key: "ts" , Contents: 0); |
279 | J.attribute(Key: "dur" , Contents: DurUs); |
280 | J.attribute(Key: "name" , Contents: "Total " + Total.first); |
281 | J.attributeObject(Key: "args" , Contents: [&] { |
282 | J.attribute(Key: "count" , Contents: int64_t(Count)); |
283 | J.attribute(Key: "avg ms" , Contents: int64_t(DurUs / Count / 1000)); |
284 | }); |
285 | }); |
286 | |
287 | ++TotalTid; |
288 | } |
289 | |
290 | auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, |
291 | StringRef arg) { |
292 | J.object(Contents: [&] { |
293 | J.attribute(Key: "cat" , Contents: "" ); |
294 | J.attribute(Key: "pid" , Contents: Pid); |
295 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
296 | J.attribute(Key: "ts" , Contents: 0); |
297 | J.attribute(Key: "ph" , Contents: "M" ); |
298 | J.attribute(Key: "name" , Contents: Name); |
299 | J.attributeObject(Key: "args" , Contents: [&] { J.attribute(Key: "name" , Contents: arg); }); |
300 | }); |
301 | }; |
302 | |
303 | writeMetadataEvent("process_name" , Tid, ProcName); |
304 | writeMetadataEvent("thread_name" , Tid, ThreadName); |
305 | for (const TimeTraceProfiler *TTP : Instances.List) |
306 | writeMetadataEvent("thread_name" , TTP->Tid, TTP->ThreadName); |
307 | |
308 | J.arrayEnd(); |
309 | J.attributeEnd(); |
310 | |
311 | // Emit the absolute time when this TimeProfiler started. |
312 | // This can be used to combine the profiling data from |
313 | // multiple processes and preserve actual time intervals. |
314 | J.attribute(Key: "beginningOfTime" , |
315 | Contents: time_point_cast<microseconds>(t: BeginningOfTime) |
316 | .time_since_epoch() |
317 | .count()); |
318 | |
319 | J.objectEnd(); |
320 | } |
321 | |
322 | SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack; |
323 | SmallVector<TimeTraceProfilerEntry, 128> Entries; |
324 | StringMap<CountAndDurationType> CountAndTotalPerName; |
325 | // System clock time when the session was begun. |
326 | const time_point<system_clock> BeginningOfTime; |
327 | // Profiling clock time when the session was begun. |
328 | const TimePointType StartTime; |
329 | const std::string ProcName; |
330 | const sys::Process::Pid Pid; |
331 | SmallString<0> ThreadName; |
332 | const uint64_t Tid; |
333 | |
334 | // Minimum time granularity (in microseconds) |
335 | const unsigned TimeTraceGranularity; |
336 | |
337 | // Make time trace capture verbose event details (e.g. source filenames). This |
338 | // can increase the size of the output by 2-3 times. |
339 | const bool TimeTraceVerbose; |
340 | }; |
341 | |
342 | bool llvm::isTimeTraceVerbose() { |
343 | return getTimeTraceProfilerInstance() && |
344 | getTimeTraceProfilerInstance()->TimeTraceVerbose; |
345 | } |
346 | |
347 | void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, |
348 | StringRef ProcName, |
349 | bool TimeTraceVerbose) { |
350 | assert(TimeTraceProfilerInstance == nullptr && |
351 | "Profiler should not be initialized" ); |
352 | TimeTraceProfilerInstance = new TimeTraceProfiler( |
353 | TimeTraceGranularity, llvm::sys::path::filename(path: ProcName), |
354 | TimeTraceVerbose); |
355 | } |
356 | |
357 | // Removes all TimeTraceProfilerInstances. |
358 | // Called from main thread. |
359 | void llvm::timeTraceProfilerCleanup() { |
360 | delete TimeTraceProfilerInstance; |
361 | TimeTraceProfilerInstance = nullptr; |
362 | |
363 | auto &Instances = getTimeTraceProfilerInstances(); |
364 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
365 | for (auto *TTP : Instances.List) |
366 | delete TTP; |
367 | Instances.List.clear(); |
368 | } |
369 | |
370 | // Finish TimeTraceProfilerInstance on a worker thread. |
371 | // This doesn't remove the instance, just moves the pointer to global vector. |
372 | void llvm::timeTraceProfilerFinishThread() { |
373 | auto &Instances = getTimeTraceProfilerInstances(); |
374 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
375 | Instances.List.push_back(x: TimeTraceProfilerInstance); |
376 | TimeTraceProfilerInstance = nullptr; |
377 | } |
378 | |
379 | void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { |
380 | assert(TimeTraceProfilerInstance != nullptr && |
381 | "Profiler object can't be null" ); |
382 | TimeTraceProfilerInstance->write(OS); |
383 | } |
384 | |
385 | Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, |
386 | StringRef FallbackFileName) { |
387 | assert(TimeTraceProfilerInstance != nullptr && |
388 | "Profiler object can't be null" ); |
389 | |
390 | std::string Path = PreferredFileName.str(); |
391 | if (Path.empty()) { |
392 | Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); |
393 | Path += ".time-trace" ; |
394 | } |
395 | |
396 | std::error_code EC; |
397 | raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); |
398 | if (EC) |
399 | return createStringError(EC, S: "Could not open " + Path); |
400 | |
401 | timeTraceProfilerWrite(OS); |
402 | return Error::success(); |
403 | } |
404 | |
405 | TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, |
406 | StringRef Detail) { |
407 | if (TimeTraceProfilerInstance != nullptr) |
408 | return TimeTraceProfilerInstance->begin( |
409 | Name: std::string(Name), Detail: [&]() { return std::string(Detail); }, AsyncEvent: false); |
410 | return nullptr; |
411 | } |
412 | |
413 | TimeTraceProfilerEntry * |
414 | llvm::timeTraceProfilerBegin(StringRef Name, |
415 | llvm::function_ref<std::string()> Detail) { |
416 | if (TimeTraceProfilerInstance != nullptr) |
417 | return TimeTraceProfilerInstance->begin(Name: std::string(Name), Detail, AsyncEvent: false); |
418 | return nullptr; |
419 | } |
420 | |
421 | TimeTraceProfilerEntry * |
422 | llvm::timeTraceProfilerBegin(StringRef Name, |
423 | llvm::function_ref<TimeTraceMetadata()> Metadata) { |
424 | if (TimeTraceProfilerInstance != nullptr) |
425 | return TimeTraceProfilerInstance->begin(Name: std::string(Name), Metadata, AsyncEvent: false); |
426 | return nullptr; |
427 | } |
428 | |
429 | TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, |
430 | StringRef Detail) { |
431 | if (TimeTraceProfilerInstance != nullptr) |
432 | return TimeTraceProfilerInstance->begin( |
433 | Name: std::string(Name), Detail: [&]() { return std::string(Detail); }, AsyncEvent: true); |
434 | return nullptr; |
435 | } |
436 | |
437 | void llvm::timeTraceProfilerEnd() { |
438 | if (TimeTraceProfilerInstance != nullptr) |
439 | TimeTraceProfilerInstance->end(); |
440 | } |
441 | |
442 | void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) { |
443 | if (TimeTraceProfilerInstance != nullptr) |
444 | TimeTraceProfilerInstance->end(E&: *E); |
445 | } |
446 | |