1//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// This is the implementation of a profiling handler.
12//
13//===----------------------------------------------------------------------===//
14#include <memory>
15#include <time.h>
16
17#include "sanitizer_common/sanitizer_atomic.h"
18#include "sanitizer_common/sanitizer_flags.h"
19#include "xray/xray_interface.h"
20#include "xray/xray_log_interface.h"
21#include "xray_buffer_queue.h"
22#include "xray_flags.h"
23#include "xray_profile_collector.h"
24#include "xray_profiling_flags.h"
25#include "xray_recursion_guard.h"
26#include "xray_tsc.h"
27#include "xray_utils.h"
28#include <pthread.h>
29
30namespace __xray {
31
32namespace {
33
34static atomic_sint32_t ProfilerLogFlushStatus = {
35 .val_dont_use: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36
37static atomic_sint32_t ProfilerLogStatus = {
38 .val_dont_use: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39
40static SpinMutex ProfilerOptionsMutex;
41
42struct ProfilingData {
43 atomic_uintptr_t Allocators;
44 atomic_uintptr_t FCT;
45};
46
47static pthread_key_t ProfilingKey;
48
49// We use a global buffer queue, which gets initialized once at initialisation
50// time, and gets reset when profiling is "done".
51alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)];
52static BufferQueue *BQ = nullptr;
53
54thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
55alignas(FunctionCallTrie::Allocators) thread_local std::byte
56 AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)];
57alignas(FunctionCallTrie) thread_local std::byte
58 FunctionCallTrieStorage[sizeof(FunctionCallTrie)];
59thread_local ProfilingData TLD{.Allocators: {.val_dont_use: 0}, .FCT: {.val_dont_use: 0}};
60thread_local atomic_uint8_t ReentranceGuard{.val_dont_use: 0};
61
62// We use a separate guard for ensuring that for this thread, if we're already
63// cleaning up, that any signal handlers don't attempt to cleanup nor
64// initialise.
65thread_local atomic_uint8_t TLDInitGuard{.val_dont_use: 0};
66
67// We also use a separate latch to signal that the thread is exiting, and
68// non-essential work should be ignored (things like recording events, etc.).
69thread_local atomic_uint8_t ThreadExitingLatch{.val_dont_use: 0};
70
71static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
72 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
73 pthread_setspecific(key: ProfilingKey, pointer: &TLD);
74 return false;
75 }();
76 (void)ThreadOnce;
77
78 RecursionGuard TLDInit(TLDInitGuard);
79 if (!TLDInit)
80 return nullptr;
81
82 if (atomic_load_relaxed(a: &ThreadExitingLatch))
83 return nullptr;
84
85 uptr Allocators = 0;
86 if (atomic_compare_exchange_strong(a: &TLD.Allocators, cmp: &Allocators, xchg: 1,
87 mo: memory_order_acq_rel)) {
88 bool Success = false;
89 auto AllocatorsUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
90 if (!Success)
91 atomic_store(a: &TLD.Allocators, v: 0, mo: memory_order_release);
92 });
93
94 // Acquire a set of buffers for this thread.
95 if (BQ == nullptr)
96 return nullptr;
97
98 if (BQ->getBuffer(Buf&: ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
99 return nullptr;
100 auto NodeBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
101 if (!Success)
102 BQ->releaseBuffer(Buf&: ThreadBuffers.NodeBuffer);
103 });
104
105 if (BQ->getBuffer(Buf&: ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
106 return nullptr;
107 auto RootsBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
108 if (!Success)
109 BQ->releaseBuffer(Buf&: ThreadBuffers.RootsBuffer);
110 });
111
112 if (BQ->getBuffer(Buf&: ThreadBuffers.ShadowStackBuffer) !=
113 BufferQueue::ErrorCode::Ok)
114 return nullptr;
115 auto ShadowStackBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
116 if (!Success)
117 BQ->releaseBuffer(Buf&: ThreadBuffers.ShadowStackBuffer);
118 });
119
120 if (BQ->getBuffer(Buf&: ThreadBuffers.NodeIdPairBuffer) !=
121 BufferQueue::ErrorCode::Ok)
122 return nullptr;
123
124 Success = true;
125 new (&AllocatorsStorage) FunctionCallTrie::Allocators(
126 FunctionCallTrie::InitAllocatorsFromBuffers(Bufs&: ThreadBuffers));
127 Allocators = reinterpret_cast<uptr>(
128 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
129 atomic_store(a: &TLD.Allocators, v: Allocators, mo: memory_order_release);
130 }
131
132 if (Allocators == 1)
133 return nullptr;
134
135 uptr FCT = 0;
136 if (atomic_compare_exchange_strong(a: &TLD.FCT, cmp: &FCT, xchg: 1, mo: memory_order_acq_rel)) {
137 new (&FunctionCallTrieStorage)
138 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
139 atomic_load_relaxed(a: &TLD.Allocators)));
140 FCT = reinterpret_cast<uptr>(
141 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
142 atomic_store(a: &TLD.FCT, v: FCT, mo: memory_order_release);
143 }
144
145 if (FCT == 1)
146 return nullptr;
147
148 return &TLD;
149}
150
151static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
152 auto FCT = atomic_exchange(a: &TLD.FCT, v: 0, mo: memory_order_acq_rel);
153 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
154 &FunctionCallTrieStorage)))
155 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
156
157 auto Allocators = atomic_exchange(a: &TLD.Allocators, v: 0, mo: memory_order_acq_rel);
158 if (Allocators ==
159 reinterpret_cast<uptr>(
160 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
161 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
162}
163
164static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
165 RecursionGuard TLDInit(TLDInitGuard);
166 if (!TLDInit)
167 return;
168
169 uptr P = atomic_exchange(a: &T.FCT, v: 0, mo: memory_order_acq_rel);
170 if (P != reinterpret_cast<uptr>(
171 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
172 return;
173
174 auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
175 DCHECK_NE(FCT, nullptr);
176
177 uptr A = atomic_exchange(a: &T.Allocators, v: 0, mo: memory_order_acq_rel);
178 if (A !=
179 reinterpret_cast<uptr>(
180 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
181 return;
182
183 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
184 DCHECK_NE(Allocators, nullptr);
185
186 // Always move the data into the profile collector.
187 profileCollectorService::post(Q: BQ, T: std::move(*FCT), A: std::move(*Allocators),
188 B: std::move(ThreadBuffers), TId: GetTid());
189
190 // Re-initialize the ThreadBuffers object to a known "default" state.
191 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
192}
193
194} // namespace
195
196const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
197#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
198 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
199#else
200 return "";
201#endif
202}
203
204XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
205 if (atomic_load(a: &ProfilerLogStatus, mo: memory_order_acquire) !=
206 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
207 if (Verbosity())
208 Report(format: "Not flushing profiles, profiling not been finalized.\n");
209 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
210 }
211
212 RecursionGuard SignalGuard(ReentranceGuard);
213 if (!SignalGuard) {
214 if (Verbosity())
215 Report(format: "Cannot finalize properly inside a signal handler!\n");
216 atomic_store(a: &ProfilerLogFlushStatus,
217 v: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
218 mo: memory_order_release);
219 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
220 }
221
222 s32 Previous = atomic_exchange(a: &ProfilerLogFlushStatus,
223 v: XRayLogFlushStatus::XRAY_LOG_FLUSHING,
224 mo: memory_order_acq_rel);
225 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
226 if (Verbosity())
227 Report(format: "Not flushing profiles, implementation still flushing.\n");
228 return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
229 }
230
231 // At this point, we'll create the file that will contain the profile, but
232 // only if the options say so.
233 if (!profilingFlags()->no_flush) {
234 // First check whether we have data in the profile collector service
235 // before we try and write anything down.
236 XRayBuffer B = profileCollectorService::nextBuffer(B: {.Data: nullptr, .Size: 0});
237 if (B.Data == nullptr) {
238 if (Verbosity())
239 Report(format: "profiling: No data to flush.\n");
240 } else {
241 LogWriter *LW = LogWriter::Open();
242 if (LW == nullptr) {
243 if (Verbosity())
244 Report(format: "profiling: Failed to flush to file, dropping data.\n");
245 } else {
246 // Now for each of the buffers, write out the profile data as we would
247 // see it in memory, verbatim.
248 while (B.Data != nullptr && B.Size != 0) {
249 LW->WriteAll(Begin: reinterpret_cast<const char *>(B.Data),
250 End: reinterpret_cast<const char *>(B.Data) + B.Size);
251 B = profileCollectorService::nextBuffer(B);
252 }
253 LogWriter::Close(LogWriter: LW);
254 }
255 }
256 }
257
258 profileCollectorService::reset();
259
260 atomic_store(a: &ProfilerLogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED,
261 mo: memory_order_release);
262 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
263 mo: memory_order_release);
264
265 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
266}
267
268void profilingHandleArg0(int32_t FuncId,
269 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
270 unsigned char CPU;
271 auto TSC = readTSC(CPU);
272 RecursionGuard G(ReentranceGuard);
273 if (!G)
274 return;
275
276 auto Status = atomic_load(a: &ProfilerLogStatus, mo: memory_order_acquire);
277 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
278 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
279 return;
280
281 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
282 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
283 postCurrentThreadFCT(T&: TLD);
284 return;
285 }
286
287 auto T = getThreadLocalData();
288 if (T == nullptr)
289 return;
290
291 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(a: &T->FCT));
292 switch (Entry) {
293 case XRayEntryType::ENTRY:
294 case XRayEntryType::LOG_ARGS_ENTRY:
295 FCT->enterFunction(FId: FuncId, TSC, CPU);
296 break;
297 case XRayEntryType::EXIT:
298 case XRayEntryType::TAIL:
299 FCT->exitFunction(FId: FuncId, TSC, CPU);
300 break;
301 default:
302 // FIXME: Handle bugs.
303 break;
304 }
305}
306
307void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
308 uint64_t) XRAY_NEVER_INSTRUMENT {
309 return profilingHandleArg0(FuncId, Entry);
310}
311
312XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
313 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
314 if (!atomic_compare_exchange_strong(a: &ProfilerLogStatus, cmp: &CurrentStatus,
315 xchg: XRayLogInitStatus::XRAY_LOG_FINALIZING,
316 mo: memory_order_release)) {
317 if (Verbosity())
318 Report(format: "Cannot finalize profile, the profiling is not initialized.\n");
319 return static_cast<XRayLogInitStatus>(CurrentStatus);
320 }
321
322 // Mark then finalize the current generation of buffers. This allows us to let
323 // the threads currently holding onto new buffers still use them, but let the
324 // last reference do the memory cleanup.
325 DCHECK_NE(BQ, nullptr);
326 BQ->finalize();
327
328 // Wait a grace period to allow threads to see that we're finalizing.
329 SleepForMillis(millis: profilingFlags()->grace_period_ms);
330
331 // If we for some reason are entering this function from an instrumented
332 // handler, we bail out.
333 RecursionGuard G(ReentranceGuard);
334 if (!G)
335 return static_cast<XRayLogInitStatus>(CurrentStatus);
336
337 // Post the current thread's data if we have any.
338 postCurrentThreadFCT(T&: TLD);
339
340 // Then we force serialize the log data.
341 profileCollectorService::serialize();
342
343 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_FINALIZED,
344 mo: memory_order_release);
345 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
346}
347
348XRayLogInitStatus
349profilingLoggingInit(size_t, size_t, void *Options,
350 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
351 RecursionGuard G(ReentranceGuard);
352 if (!G)
353 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
354
355 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
356 if (!atomic_compare_exchange_strong(a: &ProfilerLogStatus, cmp: &CurrentStatus,
357 xchg: XRayLogInitStatus::XRAY_LOG_INITIALIZING,
358 mo: memory_order_acq_rel)) {
359 if (Verbosity())
360 Report(format: "Cannot initialize already initialised profiling "
361 "implementation.\n");
362 return static_cast<XRayLogInitStatus>(CurrentStatus);
363 }
364
365 {
366 SpinMutexLock Lock(&ProfilerOptionsMutex);
367 FlagParser ConfigParser;
368 ProfilerFlags Flags;
369 Flags.setDefaults();
370 registerProfilerFlags(P: &ConfigParser, F: &Flags);
371 ConfigParser.ParseString(s: profilingCompilerDefinedFlags());
372 const char *Env = GetEnv(name: "XRAY_PROFILING_OPTIONS");
373 if (Env == nullptr)
374 Env = "";
375 ConfigParser.ParseString(s: Env);
376
377 // Then parse the configuration string provided.
378 ConfigParser.ParseString(s: static_cast<const char *>(Options));
379 if (Verbosity())
380 ReportUnrecognizedFlags();
381 *profilingFlags() = Flags;
382 }
383
384 // We need to reset the profile data collection implementation now.
385 profileCollectorService::reset();
386
387 // Then also reset the buffer queue implementation.
388 if (BQ == nullptr) {
389 bool Success = false;
390 new (&BufferQueueStorage)
391 BufferQueue(profilingFlags()->per_thread_allocator_max,
392 profilingFlags()->buffers_max, Success);
393 if (!Success) {
394 if (Verbosity())
395 Report(format: "Failed to initialize preallocated memory buffers!");
396 atomic_store(a: &ProfilerLogStatus,
397 v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
398 mo: memory_order_release);
399 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
400 }
401
402 // If we've succeeded, set the global pointer to the initialised storage.
403 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
404 } else {
405 BQ->finalize();
406 auto InitStatus = BQ->init(BS: profilingFlags()->per_thread_allocator_max,
407 BC: profilingFlags()->buffers_max);
408
409 if (InitStatus != BufferQueue::ErrorCode::Ok) {
410 if (Verbosity())
411 Report(format: "Failed to initialize preallocated memory buffers; error: %s",
412 BufferQueue::getErrorString(E: InitStatus));
413 atomic_store(a: &ProfilerLogStatus,
414 v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
415 mo: memory_order_release);
416 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
417 }
418
419 DCHECK(!BQ->finalizing());
420 }
421
422 // We need to set up the exit handlers.
423 static pthread_once_t Once = PTHREAD_ONCE_INIT;
424 pthread_once(
425 once_control: &Once, init_routine: +[] {
426 pthread_key_create(
427 key: &ProfilingKey, destr_function: +[](void *P) XRAY_NEVER_INSTRUMENT {
428 if (atomic_exchange(a: &ThreadExitingLatch, v: 1, mo: memory_order_acq_rel))
429 return;
430
431 if (P == nullptr)
432 return;
433
434 auto T = reinterpret_cast<ProfilingData *>(P);
435 if (atomic_load_relaxed(a: &T->Allocators) == 0)
436 return;
437
438 {
439 // If we're somehow executing this while inside a
440 // non-reentrant-friendly context, we skip attempting to post
441 // the current thread's data.
442 RecursionGuard G(ReentranceGuard);
443 if (!G)
444 return;
445
446 postCurrentThreadFCT(T&: *T);
447 }
448 });
449
450 // We also need to set up an exit handler, so that we can get the
451 // profile information at exit time. We use the C API to do this, to not
452 // rely on C++ ABI functions for registering exit handlers.
453 Atexit(function: +[]() XRAY_NEVER_INSTRUMENT {
454 if (atomic_exchange(a: &ThreadExitingLatch, v: 1, mo: memory_order_acq_rel))
455 return;
456
457 auto Cleanup =
458 at_scope_exit(fn: []() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
459
460 // Finalize and flush.
461 if (profilingFinalize() != XRAY_LOG_FINALIZED ||
462 profilingFlush() != XRAY_LOG_FLUSHED)
463 return;
464
465 if (Verbosity())
466 Report(format: "XRay Profile flushed at exit.");
467 });
468 });
469
470 __xray_log_set_buffer_iterator(Iterator: profileCollectorService::nextBuffer);
471 __xray_set_handler(entry: profilingHandleArg0);
472 __xray_set_handler_arg1(entry: profilingHandleArg1);
473
474 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_INITIALIZED,
475 mo: memory_order_release);
476 if (Verbosity())
477 Report(format: "XRay Profiling init successful.\n");
478
479 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
480}
481
482bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
483 // Set up the flag defaults from the static defaults and the
484 // compiler-provided defaults.
485 {
486 SpinMutexLock Lock(&ProfilerOptionsMutex);
487 auto *F = profilingFlags();
488 F->setDefaults();
489 FlagParser ProfilingParser;
490 registerProfilerFlags(P: &ProfilingParser, F);
491 ProfilingParser.ParseString(s: profilingCompilerDefinedFlags());
492 }
493
494 XRayLogImpl Impl{
495 .log_init: profilingLoggingInit,
496 .log_finalize: profilingFinalize,
497 .handle_arg0: profilingHandleArg0,
498 .flush_log: profilingFlush,
499 };
500 auto RegistrationResult = __xray_log_register_mode(Mode: "xray-profiling", Impl);
501 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
502 if (Verbosity())
503 Report(format: "Cannot register XRay Profiling mode to 'xray-profiling'; error = "
504 "%d\n",
505 RegistrationResult);
506 return false;
507 }
508
509 if (!internal_strcmp(s1: flags()->xray_mode, s2: "xray-profiling"))
510 __xray_log_select_mode(Mode: "xray_profiling");
511 return true;
512}
513
514} // namespace __xray
515
516static auto UNUSED Unused = __xray::profilingDynamicInitializer();
517