1//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hierarchical time profiler.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/TimeProfiler.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/STLFunctionalExtras.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringMap.h"
18#include "llvm/Support/JSON.h"
19#include "llvm/Support/Path.h"
20#include "llvm/Support/Process.h"
21#include "llvm/Support/Threading.h"
22#include <algorithm>
23#include <cassert>
24#include <chrono>
25#include <memory>
26#include <mutex>
27#include <string>
28#include <vector>
29
30using namespace llvm;
31
32namespace {
33
34using std::chrono::duration;
35using std::chrono::duration_cast;
36using std::chrono::microseconds;
37using std::chrono::steady_clock;
38using std::chrono::system_clock;
39using std::chrono::time_point;
40using std::chrono::time_point_cast;
41
42struct TimeTraceProfilerInstances {
43 std::mutex Lock;
44 std::vector<TimeTraceProfiler *> List;
45};
46
47TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
48 static TimeTraceProfilerInstances Instances;
49 return Instances;
50}
51
52} // anonymous namespace
53
54// Per Thread instance
55static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
56
57TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
58 return TimeTraceProfilerInstance;
59}
60
61namespace {
62
63using ClockType = steady_clock;
64using TimePointType = time_point<ClockType>;
65using DurationType = duration<ClockType::rep, ClockType::period>;
66using CountAndDurationType = std::pair<size_t, DurationType>;
67using NameAndCountAndDurationType =
68 std::pair<std::string, CountAndDurationType>;
69
70} // anonymous namespace
71
72/// Represents an open or completed time section entry to be captured.
73struct llvm::TimeTraceProfilerEntry {
74 const TimePointType Start;
75 TimePointType End;
76 const std::string Name;
77 TimeTraceMetadata Metadata;
78
79 const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent;
80 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
81 std::string &&Dt, TimeTraceEventType Et)
82 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
83 EventType(Et) {
84 Metadata.Detail = std::move(Dt);
85 }
86
87 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
88 TimeTraceMetadata &&Mt, TimeTraceEventType Et)
89 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
90 Metadata(std::move(Mt)), EventType(Et) {}
91
92 // Calculate timings for FlameGraph. Cast time points to microsecond precision
93 // rather than casting duration. This avoids truncation issues causing inner
94 // scopes overruning outer scopes.
95 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
96 return (time_point_cast<microseconds>(t: Start) -
97 time_point_cast<microseconds>(t: StartTime))
98 .count();
99 }
100
101 ClockType::rep getFlameGraphDurUs() const {
102 return (time_point_cast<microseconds>(t: End) -
103 time_point_cast<microseconds>(t: Start))
104 .count();
105 }
106};
107
108// Represents a currently open (in-progress) time trace entry. InstantEvents
109// that happen during an open event are associated with the duration of this
110// parent event and they are dropped if parent duration is shorter than
111// the granularity.
112struct InProgressEntry {
113 TimeTraceProfilerEntry Event;
114 std::vector<TimeTraceProfilerEntry> InstantEvents;
115
116 InProgressEntry(TimePointType S, TimePointType E, std::string N,
117 std::string Dt, TimeTraceEventType Et)
118 : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et),
119 InstantEvents() {}
120
121 InProgressEntry(TimePointType S, TimePointType E, std::string N,
122 TimeTraceMetadata Mt, TimeTraceEventType Et)
123 : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et),
124 InstantEvents() {}
125};
126
127struct llvm::TimeTraceProfiler {
128 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
129 bool TimeTraceVerbose = false)
130 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
131 ProcName(ProcName), Pid(sys::Process::getProcessId()),
132 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
133 TimeTraceVerbose(TimeTraceVerbose) {
134 llvm::get_thread_name(Name&: ThreadName);
135 }
136
137 TimeTraceProfilerEntry *
138 begin(std::string Name, llvm::function_ref<std::string()> Detail,
139 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
140 assert(EventType != TimeTraceEventType::InstantEvent &&
141 "Instant Events don't have begin and end.");
142 Stack.emplace_back(Args: std::make_unique<InProgressEntry>(
143 args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Detail(),
144 args&: EventType));
145 return &Stack.back()->Event;
146 }
147
148 TimeTraceProfilerEntry *
149 begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
150 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
151 assert(EventType != TimeTraceEventType::InstantEvent &&
152 "Instant Events don't have begin and end.");
153 Stack.emplace_back(Args: std::make_unique<InProgressEntry>(
154 args: ClockType::now(), args: TimePointType(), args: std::move(Name), args: Metadata(),
155 args&: EventType));
156 return &Stack.back()->Event;
157 }
158
159 void insert(std::string Name, llvm::function_ref<std::string()> Detail) {
160 if (Stack.empty())
161 return;
162
163 Stack.back()->InstantEvents.emplace_back(args: TimeTraceProfilerEntry(
164 ClockType::now(), TimePointType(), std::move(Name), Detail(),
165 TimeTraceEventType::InstantEvent));
166 }
167
168 void end() {
169 assert(!Stack.empty() && "Must call begin() first");
170 end(E&: Stack.back()->Event);
171 }
172
173 void end(TimeTraceProfilerEntry &E) {
174 assert(!Stack.empty() && "Must call begin() first");
175 E.End = ClockType::now();
176
177 // Calculate duration at full precision for overall counts.
178 DurationType Duration = E.End - E.Start;
179
180 const auto *Iter =
181 llvm::find_if(Range&: Stack, P: [&](const std::unique_ptr<InProgressEntry> &Val) {
182 return &Val->Event == &E;
183 });
184 assert(Iter != Stack.end() && "Event not in the Stack");
185
186 // Only include sections longer or equal to TimeTraceGranularity msec.
187 if (duration_cast<microseconds>(d: Duration).count() >= TimeTraceGranularity) {
188 Entries.emplace_back(Args&: E);
189 for (auto &IE : Iter->get()->InstantEvents) {
190 Entries.emplace_back(Args&: IE);
191 }
192 }
193
194 // Track total time taken by each "name", but only the topmost levels of
195 // them; e.g. if there's a template instantiation that instantiates other
196 // templates from within, we only want to add the topmost one. "topmost"
197 // happens to be the ones that don't have any currently open entries above
198 // itself.
199 if (llvm::none_of(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: Stack)),
200 P: [&](const std::unique_ptr<InProgressEntry> &Val) {
201 return Val->Event.Name == E.Name;
202 })) {
203 auto &CountAndTotal = CountAndTotalPerName[E.Name];
204 CountAndTotal.first++;
205 CountAndTotal.second += Duration;
206 };
207
208 Stack.erase(CI: Iter);
209 }
210
211 // Write events from this TimeTraceProfilerInstance and
212 // ThreadTimeTraceProfilerInstances.
213 void write(raw_pwrite_stream &OS) {
214 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
215 auto &Instances = getTimeTraceProfilerInstances();
216 std::lock_guard<std::mutex> Lock(Instances.Lock);
217 assert(Stack.empty() &&
218 "All profiler sections should be ended when calling write");
219 assert(llvm::all_of(Instances.List,
220 [](const auto &TTP) { return TTP->Stack.empty(); }) &&
221 "All profiler sections should be ended when calling write");
222
223 json::OStream J(OS);
224 J.objectBegin();
225 J.attributeBegin(Key: "traceEvents");
226 J.arrayBegin();
227
228 // Emit all events for the main flame graph.
229 auto writeEvent = [&](const auto &E, uint64_t Tid) {
230 auto StartUs = E.getFlameGraphStartUs(StartTime);
231 auto DurUs = E.getFlameGraphDurUs();
232
233 J.object(Contents: [&] {
234 J.attribute(Key: "pid", Contents: Pid);
235 J.attribute(Key: "tid", Contents: int64_t(Tid));
236 J.attribute(Key: "ts", Contents: StartUs);
237 if (E.EventType == TimeTraceEventType::AsyncEvent) {
238 J.attribute(Key: "cat", Contents: E.Name);
239 J.attribute(Key: "ph", Contents: "b");
240 J.attribute(Key: "id", Contents: 0);
241 } else if (E.EventType == TimeTraceEventType::CompleteEvent) {
242 J.attribute(Key: "ph", Contents: "X");
243 J.attribute(Key: "dur", Contents: DurUs);
244 } else { // instant event
245 assert(E.EventType == TimeTraceEventType::InstantEvent &&
246 "InstantEvent expected");
247 J.attribute(Key: "ph", Contents: "i");
248 }
249 J.attribute(Key: "name", Contents: E.Name);
250 if (!E.Metadata.isEmpty()) {
251 J.attributeObject(Key: "args", Contents: [&] {
252 if (!E.Metadata.Detail.empty())
253 J.attribute(Key: "detail", Contents: E.Metadata.Detail);
254 if (!E.Metadata.File.empty())
255 J.attribute(Key: "file", Contents: E.Metadata.File);
256 if (E.Metadata.Line > 0)
257 J.attribute(Key: "line", Contents: E.Metadata.Line);
258 });
259 }
260 });
261
262 if (E.EventType == TimeTraceEventType::AsyncEvent) {
263 J.object(Contents: [&] {
264 J.attribute(Key: "pid", Contents: Pid);
265 J.attribute(Key: "tid", Contents: int64_t(Tid));
266 J.attribute(Key: "ts", Contents: StartUs + DurUs);
267 J.attribute(Key: "cat", Contents: E.Name);
268 J.attribute(Key: "ph", Contents: "e");
269 J.attribute(Key: "id", Contents: 0);
270 J.attribute(Key: "name", Contents: E.Name);
271 });
272 }
273 };
274 for (const TimeTraceProfilerEntry &E : Entries)
275 writeEvent(E, this->Tid);
276 for (const TimeTraceProfiler *TTP : Instances.List)
277 for (const TimeTraceProfilerEntry &E : TTP->Entries)
278 writeEvent(E, TTP->Tid);
279
280 // Emit totals by section name as additional "thread" events, sorted from
281 // longest one.
282 // Find highest used thread id.
283 uint64_t MaxTid = this->Tid;
284 for (const TimeTraceProfiler *TTP : Instances.List)
285 MaxTid = std::max(a: MaxTid, b: TTP->Tid);
286
287 // Combine all CountAndTotalPerName from threads into one.
288 StringMap<CountAndDurationType> AllCountAndTotalPerName;
289 auto combineStat = [&](const auto &Stat) {
290 StringRef Key = Stat.getKey();
291 auto Value = Stat.getValue();
292 auto &CountAndTotal = AllCountAndTotalPerName[Key];
293 CountAndTotal.first += Value.first;
294 CountAndTotal.second += Value.second;
295 };
296 for (const auto &Stat : CountAndTotalPerName)
297 combineStat(Stat);
298 for (const TimeTraceProfiler *TTP : Instances.List)
299 for (const auto &Stat : TTP->CountAndTotalPerName)
300 combineStat(Stat);
301
302 std::vector<NameAndCountAndDurationType> SortedTotals;
303 SortedTotals.reserve(n: AllCountAndTotalPerName.size());
304 for (const auto &Total : AllCountAndTotalPerName)
305 SortedTotals.emplace_back(args: std::string(Total.getKey()), args: Total.getValue());
306
307 llvm::sort(C&: SortedTotals, Comp: [](const NameAndCountAndDurationType &A,
308 const NameAndCountAndDurationType &B) {
309 return A.second.second > B.second.second;
310 });
311
312 // Report totals on separate threads of tracing file.
313 uint64_t TotalTid = MaxTid + 1;
314 for (const NameAndCountAndDurationType &Total : SortedTotals) {
315 auto DurUs = duration_cast<microseconds>(d: Total.second.second).count();
316 auto Count = AllCountAndTotalPerName[Total.first].first;
317
318 J.object(Contents: [&] {
319 J.attribute(Key: "pid", Contents: Pid);
320 J.attribute(Key: "tid", Contents: int64_t(TotalTid));
321 J.attribute(Key: "ph", Contents: "X");
322 J.attribute(Key: "ts", Contents: 0);
323 J.attribute(Key: "dur", Contents: DurUs);
324 J.attribute(Key: "name", Contents: "Total " + Total.first);
325 J.attributeObject(Key: "args", Contents: [&] {
326 J.attribute(Key: "count", Contents: int64_t(Count));
327 J.attribute(Key: "avg ms", Contents: int64_t(DurUs / Count / 1000));
328 });
329 });
330
331 ++TotalTid;
332 }
333
334 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
335 StringRef arg) {
336 J.object(Contents: [&] {
337 J.attribute(Key: "cat", Contents: "");
338 J.attribute(Key: "pid", Contents: Pid);
339 J.attribute(Key: "tid", Contents: int64_t(Tid));
340 J.attribute(Key: "ts", Contents: 0);
341 J.attribute(Key: "ph", Contents: "M");
342 J.attribute(Key: "name", Contents: Name);
343 J.attributeObject(Key: "args", Contents: [&] { J.attribute(Key: "name", Contents: arg); });
344 });
345 };
346
347 writeMetadataEvent("process_name", Tid, ProcName);
348 writeMetadataEvent("thread_name", Tid, ThreadName);
349 for (const TimeTraceProfiler *TTP : Instances.List)
350 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
351
352 J.arrayEnd();
353 J.attributeEnd();
354
355 // Emit the absolute time when this TimeProfiler started.
356 // This can be used to combine the profiling data from
357 // multiple processes and preserve actual time intervals.
358 J.attribute(Key: "beginningOfTime",
359 Contents: time_point_cast<microseconds>(t: BeginningOfTime)
360 .time_since_epoch()
361 .count());
362
363 J.objectEnd();
364 }
365
366 SmallVector<std::unique_ptr<InProgressEntry>, 16> Stack;
367 SmallVector<TimeTraceProfilerEntry, 128> Entries;
368 StringMap<CountAndDurationType> CountAndTotalPerName;
369 // System clock time when the session was begun.
370 const time_point<system_clock> BeginningOfTime;
371 // Profiling clock time when the session was begun.
372 const TimePointType StartTime;
373 const std::string ProcName;
374 const sys::Process::Pid Pid;
375 SmallString<0> ThreadName;
376 const uint64_t Tid;
377
378 // Minimum time granularity (in microseconds)
379 const unsigned TimeTraceGranularity;
380
381 // Make time trace capture verbose event details (e.g. source filenames). This
382 // can increase the size of the output by 2-3 times.
383 const bool TimeTraceVerbose;
384};
385
386bool llvm::isTimeTraceVerbose() {
387 return getTimeTraceProfilerInstance() &&
388 getTimeTraceProfilerInstance()->TimeTraceVerbose;
389}
390
391void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
392 StringRef ProcName,
393 bool TimeTraceVerbose) {
394 assert(TimeTraceProfilerInstance == nullptr &&
395 "Profiler should not be initialized");
396 TimeTraceProfilerInstance = new TimeTraceProfiler(
397 TimeTraceGranularity, llvm::sys::path::filename(path: ProcName),
398 TimeTraceVerbose);
399}
400
401// Removes all TimeTraceProfilerInstances.
402// Called from main thread.
403void llvm::timeTraceProfilerCleanup() {
404 delete TimeTraceProfilerInstance;
405 TimeTraceProfilerInstance = nullptr;
406
407 auto &Instances = getTimeTraceProfilerInstances();
408 std::lock_guard<std::mutex> Lock(Instances.Lock);
409 for (auto *TTP : Instances.List)
410 delete TTP;
411 Instances.List.clear();
412}
413
414// Finish TimeTraceProfilerInstance on a worker thread.
415// This doesn't remove the instance, just moves the pointer to global vector.
416void llvm::timeTraceProfilerFinishThread() {
417 auto &Instances = getTimeTraceProfilerInstances();
418 std::lock_guard<std::mutex> Lock(Instances.Lock);
419 Instances.List.push_back(x: TimeTraceProfilerInstance);
420 TimeTraceProfilerInstance = nullptr;
421}
422
423void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
424 assert(TimeTraceProfilerInstance != nullptr &&
425 "Profiler object can't be null");
426 TimeTraceProfilerInstance->write(OS);
427}
428
429Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
430 StringRef FallbackFileName) {
431 assert(TimeTraceProfilerInstance != nullptr &&
432 "Profiler object can't be null");
433
434 std::string Path = PreferredFileName.str();
435 if (Path.empty()) {
436 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
437 Path += ".time-trace";
438 }
439
440 std::error_code EC;
441 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
442 if (EC)
443 return createStringError(EC, S: "Could not open " + Path);
444
445 timeTraceProfilerWrite(OS);
446 return Error::success();
447}
448
449TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
450 StringRef Detail) {
451 if (TimeTraceProfilerInstance != nullptr)
452 return TimeTraceProfilerInstance->begin(
453 Name: std::string(Name), Detail: [&]() { return std::string(Detail); },
454 EventType: TimeTraceEventType::CompleteEvent);
455 return nullptr;
456}
457
458TimeTraceProfilerEntry *
459llvm::timeTraceProfilerBegin(StringRef Name,
460 llvm::function_ref<std::string()> Detail) {
461 if (TimeTraceProfilerInstance != nullptr)
462 return TimeTraceProfilerInstance->begin(Name: std::string(Name), Detail,
463 EventType: TimeTraceEventType::CompleteEvent);
464 return nullptr;
465}
466
467TimeTraceProfilerEntry *
468llvm::timeTraceProfilerBegin(StringRef Name,
469 llvm::function_ref<TimeTraceMetadata()> Metadata) {
470 if (TimeTraceProfilerInstance != nullptr)
471 return TimeTraceProfilerInstance->begin(Name: std::string(Name), Metadata,
472 EventType: TimeTraceEventType::CompleteEvent);
473 return nullptr;
474}
475
476TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
477 StringRef Detail) {
478 if (TimeTraceProfilerInstance != nullptr)
479 return TimeTraceProfilerInstance->begin(
480 Name: std::string(Name), Detail: [&]() { return std::string(Detail); },
481 EventType: TimeTraceEventType::AsyncEvent);
482 return nullptr;
483}
484
485void llvm::timeTraceAddInstantEvent(StringRef Name,
486 llvm::function_ref<std::string()> Detail) {
487 if (TimeTraceProfilerInstance != nullptr)
488 TimeTraceProfilerInstance->insert(Name: std::string(Name), Detail);
489}
490
491void llvm::timeTraceProfilerEnd() {
492 if (TimeTraceProfilerInstance != nullptr)
493 TimeTraceProfilerInstance->end();
494}
495
496void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
497 if (TimeTraceProfilerInstance != nullptr)
498 TimeTraceProfilerInstance->end(E&: *E);
499}
500