1//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hierarchical time profiler.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/TimeProfiler.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/STLFunctionalExtras.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringMap.h"
18#include "llvm/Support/JSON.h"
19#include "llvm/Support/Path.h"
20#include "llvm/Support/Process.h"
21#include "llvm/Support/Threading.h"
22#include <algorithm>
23#include <cassert>
24#include <chrono>
25#include <memory>
26#include <mutex>
27#include <string>
28#include <vector>
29
30using namespace llvm;
31
32namespace {
33
34using std::chrono::duration;
35using std::chrono::duration_cast;
36using std::chrono::microseconds;
37using std::chrono::steady_clock;
38using std::chrono::system_clock;
39using std::chrono::time_point;
40using std::chrono::time_point_cast;
41
42struct TimeTraceProfilerInstances {
43 std::mutex Lock;
44 std::vector<TimeTraceProfiler *> List;
45};
46
47TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
48 static TimeTraceProfilerInstances Instances;
49 return Instances;
50}
51
52static const char TimeTraceFileExtension[] = ".time-trace.json";
53
54} // anonymous namespace
55
56// Per Thread instance
57static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
58
59TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
60 return TimeTraceProfilerInstance;
61}
62
63namespace {
64
65using ClockType = steady_clock;
66using TimePointType = time_point<ClockType>;
67using DurationType = duration<ClockType::rep, ClockType::period>;
68using CountAndDurationType = std::pair<size_t, DurationType>;
69using NameAndCountAndDurationType =
70 std::pair<std::string, CountAndDurationType>;
71
72} // anonymous namespace
73
74/// Represents an open or completed time section entry to be captured.
75struct llvm::TimeTraceProfilerEntry {
76 const TimePointType Start;
77 TimePointType End;
78 const std::string Name;
79 TimeTraceMetadata Metadata;
80
81 const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent;
82 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
83 std::string &&Dt, TimeTraceEventType Et)
84 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
85 EventType(Et) {
86 Metadata.Detail = std::move(Dt);
87 }
88
89 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
90 TimeTraceMetadata &&Mt, TimeTraceEventType Et)
91 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
92 Metadata(std::move(Mt)), EventType(Et) {}
93
94 // Calculate timings for FlameGraph. Cast time points to microsecond precision
95 // rather than casting duration. This avoids truncation issues causing inner
96 // scopes overruning outer scopes.
97 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
98 return (time_point_cast<microseconds>(t: Start) -
99 time_point_cast<microseconds>(t: StartTime))
100 .count();
101 }
102
103 ClockType::rep getFlameGraphDurUs() const {
104 return (time_point_cast<microseconds>(t: End) -
105 time_point_cast<microseconds>(t: Start))
106 .count();
107 }
108};
109
110// Represents a currently open (in-progress) time trace entry. InstantEvents
111// that happen during an open event are associated with the duration of this
112// parent event and they are dropped if parent duration is shorter than
113// the granularity.
114struct InProgressEntry {
115 TimeTraceProfilerEntry Event;
116 std::vector<TimeTraceProfilerEntry> InstantEvents;
117
118 InProgressEntry(TimePointType S, TimePointType E, std::string N,
119 std::string Dt, TimeTraceEventType Et)
120 : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et),
121 InstantEvents() {}
122
123 InProgressEntry(TimePointType S, TimePointType E, std::string N,
124 TimeTraceMetadata Mt, TimeTraceEventType Et)
125 : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et),
126 InstantEvents() {}
127};
128
129struct llvm::TimeTraceProfiler {
130 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
131 bool TimeTraceVerbose = false)
132 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
133 ProcName(ProcName), Pid(sys::Process::getProcessId()),
134 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
135 TimeTraceVerbose(TimeTraceVerbose) {
136 llvm::get_thread_name(Name&: ThreadName);
137 }
138
139 TimeTraceProfilerEntry *
140 begin(std::string Name, llvm::function_ref<std::string()> Detail,
141 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
142 assert(EventType != TimeTraceEventType::InstantEvent &&
143 "Instant Events don't have begin and end.");
144 Stack.emplace_back(Args: std::make_unique<InProgressEntry>(
145 args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Detail(),
146 args&: EventType));
147 return &Stack.back()->Event;
148 }
149
150 TimeTraceProfilerEntry *
151 begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
152 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
153 assert(EventType != TimeTraceEventType::InstantEvent &&
154 "Instant Events don't have begin and end.");
155 Stack.emplace_back(Args: std::make_unique<InProgressEntry>(
156 args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Metadata(),
157 args&: EventType));
158 return &Stack.back()->Event;
159 }
160
161 void insert(std::string Name, llvm::function_ref<std::string()> Detail) {
162 if (Stack.empty())
163 return;
164
165 Stack.back()->InstantEvents.emplace_back(args: TimeTraceProfilerEntry(
166 ClockType::now(), TimePointType(), std::move(Name), Detail(),
167 TimeTraceEventType::InstantEvent));
168 }
169
170 void end() {
171 assert(!Stack.empty() && "Must call begin() first");
172 end(E&: Stack.back()->Event);
173 }
174
175 void end(TimeTraceProfilerEntry &E) {
176 assert(!Stack.empty() && "Must call begin() first");
177 E.End = ClockType::now();
178
179 // Calculate duration at full precision for overall counts.
180 DurationType Duration = E.End - E.Start;
181
182 const auto *Iter =
183 llvm::find_if(Range&: Stack, P: [&](const std::unique_ptr<InProgressEntry> &Val) {
184 return &Val->Event == &E;
185 });
186 assert(Iter != Stack.end() && "Event not in the Stack");
187
188 // Only include sections longer or equal to TimeTraceGranularity msec.
189 if (duration_cast<microseconds>(d: Duration).count() >= TimeTraceGranularity) {
190 Entries.emplace_back(Args&: E);
191 for (auto &IE : Iter->get()->InstantEvents) {
192 Entries.emplace_back(Args&: IE);
193 }
194 }
195
196 // Track total time taken by each "name", but only the topmost levels of
197 // them; e.g. if there's a template instantiation that instantiates other
198 // templates from within, we only want to add the topmost one. "topmost"
199 // happens to be the ones that don't have any currently open entries above
200 // itself.
201 if (llvm::none_of(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: Stack)),
202 P: [&](const std::unique_ptr<InProgressEntry> &Val) {
203 return Val->Event.Name == E.Name;
204 })) {
205 auto &CountAndTotal = CountAndTotalPerName[E.Name];
206 CountAndTotal.first++;
207 CountAndTotal.second += Duration;
208 };
209
210 Stack.erase(CI: Iter);
211 }
212
213 // Write events from this TimeTraceProfilerInstance and
214 // ThreadTimeTraceProfilerInstances.
215 void write(raw_pwrite_stream &OS) {
216 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
217 auto &Instances = getTimeTraceProfilerInstances();
218 std::lock_guard<std::mutex> Lock(Instances.Lock);
219 assert(Stack.empty() &&
220 "All profiler sections should be ended when calling write");
221 assert(llvm::all_of(Instances.List,
222 [](const auto &TTP) { return TTP->Stack.empty(); }) &&
223 "All profiler sections should be ended when calling write");
224
225 json::OStream J(OS);
226 J.objectBegin();
227 J.attributeBegin(Key: "traceEvents");
228 J.arrayBegin();
229
230 // Emit all events for the main flame graph.
231 auto writeEvent = [&](const auto &E, uint64_t Tid) {
232 auto StartUs = E.getFlameGraphStartUs(StartTime);
233 auto DurUs = E.getFlameGraphDurUs();
234
235 J.object(Contents: [&] {
236 J.attribute(Key: "pid", Contents: Pid);
237 J.attribute(Key: "tid", Contents: int64_t(Tid));
238 J.attribute(Key: "ts", Contents: StartUs);
239 if (E.EventType == TimeTraceEventType::AsyncEvent) {
240 J.attribute(Key: "cat", Contents: E.Name);
241 J.attribute(Key: "ph", Contents: "b");
242 J.attribute(Key: "id", Contents: 0);
243 } else if (E.EventType == TimeTraceEventType::CompleteEvent) {
244 J.attribute(Key: "ph", Contents: "X");
245 J.attribute(Key: "dur", Contents: DurUs);
246 } else { // instant event
247 assert(E.EventType == TimeTraceEventType::InstantEvent &&
248 "InstantEvent expected");
249 J.attribute(Key: "ph", Contents: "i");
250 }
251 J.attribute(Key: "name", Contents: E.Name);
252 if (!E.Metadata.isEmpty()) {
253 J.attributeObject(Key: "args", Contents: [&] {
254 if (!E.Metadata.Detail.empty())
255 J.attribute(Key: "detail", Contents: E.Metadata.Detail);
256 if (!E.Metadata.File.empty())
257 J.attribute(Key: "file", Contents: E.Metadata.File);
258 if (E.Metadata.Line > 0)
259 J.attribute(Key: "line", Contents: E.Metadata.Line);
260 });
261 }
262 });
263
264 if (E.EventType == TimeTraceEventType::AsyncEvent) {
265 J.object(Contents: [&] {
266 J.attribute(Key: "pid", Contents: Pid);
267 J.attribute(Key: "tid", Contents: int64_t(Tid));
268 J.attribute(Key: "ts", Contents: StartUs + DurUs);
269 J.attribute(Key: "cat", Contents: E.Name);
270 J.attribute(Key: "ph", Contents: "e");
271 J.attribute(Key: "id", Contents: 0);
272 J.attribute(Key: "name", Contents: E.Name);
273 });
274 }
275 };
276 for (const TimeTraceProfilerEntry &E : Entries)
277 writeEvent(E, this->Tid);
278 for (const TimeTraceProfiler *TTP : Instances.List)
279 for (const TimeTraceProfilerEntry &E : TTP->Entries)
280 writeEvent(E, TTP->Tid);
281
282 // Emit totals by section name as additional "thread" events, sorted from
283 // longest one.
284 // Find highest used thread id.
285 uint64_t MaxTid = this->Tid;
286 for (const TimeTraceProfiler *TTP : Instances.List)
287 MaxTid = std::max(a: MaxTid, b: TTP->Tid);
288
289 // Combine all CountAndTotalPerName from threads into one.
290 StringMap<CountAndDurationType> AllCountAndTotalPerName;
291 auto combineStat = [&](const auto &Stat) {
292 StringRef Key = Stat.getKey();
293 auto Value = Stat.getValue();
294 auto &CountAndTotal = AllCountAndTotalPerName[Key];
295 CountAndTotal.first += Value.first;
296 CountAndTotal.second += Value.second;
297 };
298 for (const auto &Stat : CountAndTotalPerName)
299 combineStat(Stat);
300 for (const TimeTraceProfiler *TTP : Instances.List)
301 for (const auto &Stat : TTP->CountAndTotalPerName)
302 combineStat(Stat);
303
304 std::vector<NameAndCountAndDurationType> SortedTotals;
305 SortedTotals.reserve(n: AllCountAndTotalPerName.size());
306 for (const auto &Total : AllCountAndTotalPerName)
307 SortedTotals.emplace_back(args: std::string(Total.getKey()), args: Total.getValue());
308
309 llvm::sort(C&: SortedTotals, Comp: [](const NameAndCountAndDurationType &A,
310 const NameAndCountAndDurationType &B) {
311 return A.second.second > B.second.second;
312 });
313
314 // Report totals on separate threads of tracing file.
315 uint64_t TotalTid = MaxTid + 1;
316 for (const NameAndCountAndDurationType &Total : SortedTotals) {
317 auto DurUs = duration_cast<microseconds>(d: Total.second.second).count();
318 auto Count = AllCountAndTotalPerName[Total.first].first;
319
320 J.object(Contents: [&] {
321 J.attribute(Key: "pid", Contents: Pid);
322 J.attribute(Key: "tid", Contents: int64_t(TotalTid));
323 J.attribute(Key: "ph", Contents: "X");
324 J.attribute(Key: "ts", Contents: 0);
325 J.attribute(Key: "dur", Contents: DurUs);
326 J.attribute(Key: "name", Contents: "Total " + Total.first);
327 J.attributeObject(Key: "args", Contents: [&] {
328 J.attribute(Key: "count", Contents: int64_t(Count));
329 J.attribute(Key: "avg ms", Contents: int64_t(DurUs / Count / 1000));
330 });
331 });
332
333 ++TotalTid;
334 }
335
336 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
337 StringRef arg) {
338 J.object(Contents: [&] {
339 J.attribute(Key: "cat", Contents: "");
340 J.attribute(Key: "pid", Contents: Pid);
341 J.attribute(Key: "tid", Contents: int64_t(Tid));
342 J.attribute(Key: "ts", Contents: 0);
343 J.attribute(Key: "ph", Contents: "M");
344 J.attribute(Key: "name", Contents: Name);
345 J.attributeObject(Key: "args", Contents: [&] { J.attribute(Key: "name", Contents: arg); });
346 });
347 };
348
349 writeMetadataEvent("process_name", Tid, ProcName);
350 writeMetadataEvent("thread_name", Tid, ThreadName);
351 for (const TimeTraceProfiler *TTP : Instances.List)
352 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
353
354 J.arrayEnd();
355 J.attributeEnd();
356
357 // Emit the absolute time when this TimeProfiler started.
358 // This can be used to combine the profiling data from
359 // multiple processes and preserve actual time intervals.
360 J.attribute(Key: "beginningOfTime",
361 Contents: time_point_cast<microseconds>(t: BeginningOfTime)
362 .time_since_epoch()
363 .count());
364
365 J.objectEnd();
366 }
367
368 SmallVector<std::unique_ptr<InProgressEntry>, 16> Stack;
369 SmallVector<TimeTraceProfilerEntry, 128> Entries;
370 StringMap<CountAndDurationType> CountAndTotalPerName;
371 // System clock time when the session was begun.
372 const time_point<system_clock> BeginningOfTime;
373 // Profiling clock time when the session was begun.
374 const TimePointType StartTime;
375 const std::string ProcName;
376 const sys::Process::Pid Pid;
377 SmallString<0> ThreadName;
378 const uint64_t Tid;
379
380 // Minimum time granularity (in microseconds)
381 const unsigned TimeTraceGranularity;
382
383 // Make time trace capture verbose event details (e.g. source filenames). This
384 // can increase the size of the output by 2-3 times.
385 const bool TimeTraceVerbose;
386};
387
388bool llvm::isTimeTraceVerbose() {
389 return getTimeTraceProfilerInstance() &&
390 getTimeTraceProfilerInstance()->TimeTraceVerbose;
391}
392
393void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
394 StringRef ProcName,
395 bool TimeTraceVerbose) {
396 assert(TimeTraceProfilerInstance == nullptr &&
397 "Profiler should not be initialized");
398 TimeTraceProfilerInstance = new TimeTraceProfiler(
399 TimeTraceGranularity, llvm::sys::path::filename(path: ProcName),
400 TimeTraceVerbose);
401}
402
403// Removes all TimeTraceProfilerInstances.
404// Called from main thread.
405void llvm::timeTraceProfilerCleanup() {
406 delete TimeTraceProfilerInstance;
407 TimeTraceProfilerInstance = nullptr;
408
409 auto &Instances = getTimeTraceProfilerInstances();
410 std::lock_guard<std::mutex> Lock(Instances.Lock);
411 for (auto *TTP : Instances.List)
412 delete TTP;
413 Instances.List.clear();
414}
415
416// Finish TimeTraceProfilerInstance on a worker thread.
417// This doesn't remove the instance, just moves the pointer to global vector.
418void llvm::timeTraceProfilerFinishThread() {
419 auto &Instances = getTimeTraceProfilerInstances();
420 std::lock_guard<std::mutex> Lock(Instances.Lock);
421 Instances.List.push_back(x: TimeTraceProfilerInstance);
422 TimeTraceProfilerInstance = nullptr;
423}
424
425void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
426 assert(TimeTraceProfilerInstance != nullptr &&
427 "Profiler object can't be null");
428 TimeTraceProfilerInstance->write(OS);
429}
430
431Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
432 StringRef FallbackFileName) {
433 assert(TimeTraceProfilerInstance != nullptr &&
434 "Profiler object can't be null");
435
436 std::string Path = PreferredFileName.str();
437 if (Path.empty()) {
438 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
439 Path += TimeTraceFileExtension;
440 }
441
442 std::error_code EC;
443 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
444 if (EC)
445 return createStringError(EC, S: "Could not open " + Path);
446
447 timeTraceProfilerWrite(OS);
448 return Error::success();
449}
450
451TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
452 StringRef Detail) {
453 if (TimeTraceProfilerInstance != nullptr)
454 return TimeTraceProfilerInstance->begin(
455 Name: std::string(Name), Detail: [&]() { return std::string(Detail); },
456 EventType: TimeTraceEventType::CompleteEvent);
457 return nullptr;
458}
459
460TimeTraceProfilerEntry *
461llvm::timeTraceProfilerBegin(StringRef Name,
462 llvm::function_ref<std::string()> Detail) {
463 if (TimeTraceProfilerInstance != nullptr)
464 return TimeTraceProfilerInstance->begin(Name: std::string(Name), Detail,
465 EventType: TimeTraceEventType::CompleteEvent);
466 return nullptr;
467}
468
469TimeTraceProfilerEntry *
470llvm::timeTraceProfilerBegin(StringRef Name,
471 llvm::function_ref<TimeTraceMetadata()> Metadata) {
472 if (TimeTraceProfilerInstance != nullptr)
473 return TimeTraceProfilerInstance->begin(Name: std::string(Name), Metadata,
474 EventType: TimeTraceEventType::CompleteEvent);
475 return nullptr;
476}
477
478TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
479 StringRef Detail) {
480 if (TimeTraceProfilerInstance != nullptr)
481 return TimeTraceProfilerInstance->begin(
482 Name: std::string(Name), Detail: [&]() { return std::string(Detail); },
483 EventType: TimeTraceEventType::AsyncEvent);
484 return nullptr;
485}
486
487void llvm::timeTraceAddInstantEvent(StringRef Name,
488 llvm::function_ref<std::string()> Detail) {
489 if (TimeTraceProfilerInstance != nullptr)
490 TimeTraceProfilerInstance->insert(Name: std::string(Name), Detail);
491}
492
493void llvm::timeTraceProfilerEnd() {
494 if (TimeTraceProfilerInstance != nullptr)
495 TimeTraceProfilerInstance->end();
496}
497
498void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
499 if (TimeTraceProfilerInstance != nullptr)
500 TimeTraceProfilerInstance->end(E&: *E);
501}
502