1//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// Here we implement the Flight Data Recorder mode for XRay, where we use
12// compact structures to store records in memory as well as when writing out the
13// data to files.
14//
15//===----------------------------------------------------------------------===//
16#include "xray_fdr_logging.h"
17#include <cassert>
18#include <cstddef>
19#include <errno.h>
20#include <limits>
21#include <memory>
22#include <pthread.h>
23#include <sys/time.h>
24#include <time.h>
25#include <unistd.h>
26
27#include "sanitizer_common/sanitizer_allocator_internal.h"
28#include "sanitizer_common/sanitizer_atomic.h"
29#include "sanitizer_common/sanitizer_common.h"
30#include "xray/xray_interface.h"
31#include "xray/xray_records.h"
32#include "xray_allocator.h"
33#include "xray_buffer_queue.h"
34#include "xray_defs.h"
35#include "xray_fdr_controller.h"
36#include "xray_fdr_flags.h"
37#include "xray_fdr_log_writer.h"
38#include "xray_flags.h"
39#include "xray_recursion_guard.h"
40#include "xray_tsc.h"
41#include "xray_utils.h"
42
43namespace __xray {
44
45static atomic_sint32_t LoggingStatus = {
46 .val_dont_use: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
47
48namespace {
49
50// Group together thread-local-data in a struct, then hide it behind a function
51// call so that it can be initialized on first use instead of as a global. We
52// force the alignment to 64-bytes for x86 cache line alignment, as this
53// structure is used in the hot path of implementation.
54struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
55 BufferQueue::Buffer Buffer{};
56 BufferQueue *BQ = nullptr;
57
58 using LogWriterStorage = std::byte[sizeof(FDRLogWriter)];
59 alignas(FDRLogWriter) LogWriterStorage LWStorage;
60 FDRLogWriter *Writer = nullptr;
61
62 using ControllerStorage = std::byte[sizeof(FDRController<>)];
63 alignas(FDRController<>) ControllerStorage CStorage;
64 FDRController<> *Controller = nullptr;
65};
66
67} // namespace
68
69static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
70 "ThreadLocalData must be trivially destructible");
71
72// Use a global pthread key to identify thread-local data for logging.
73static pthread_key_t Key;
74
75// Global BufferQueue.
76static std::byte BufferQueueStorage[sizeof(BufferQueue)];
77static BufferQueue *BQ = nullptr;
78
79// Global thresholds for function durations.
80static atomic_uint64_t ThresholdTicks{.val_dont_use: 0};
81
82// Global for ticks per second.
83static atomic_uint64_t TicksPerSec{.val_dont_use: 0};
84
85static atomic_sint32_t LogFlushStatus = {
86 .val_dont_use: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
87
88// This function will initialize the thread-local data structure used by the FDR
89// logging implementation and return a reference to it. The implementation
90// details require a bit of care to maintain.
91//
92// First, some requirements on the implementation in general:
93//
94// - XRay handlers should not call any memory allocation routines that may
95// delegate to an instrumented implementation. This means functions like
96// malloc() and free() should not be called while instrumenting.
97//
98// - We would like to use some thread-local data initialized on first-use of
99// the XRay instrumentation. These allow us to implement unsynchronized
100// routines that access resources associated with the thread.
101//
102// The implementation here uses a few mechanisms that allow us to provide both
103// the requirements listed above. We do this by:
104//
105// 1. Using a thread-local aligned storage buffer for representing the
106// ThreadLocalData struct. This data will be uninitialized memory by
107// design.
108//
109// 2. Not requiring a thread exit handler/implementation, keeping the
110// thread-local as purely a collection of references/data that do not
111// require cleanup.
112//
113// We're doing this to avoid using a `thread_local` object that has a
114// non-trivial destructor, because the C++ runtime might call std::malloc(...)
115// to register calls to destructors. Deadlocks may arise when, for example, an
116// externally provided malloc implementation is XRay instrumented, and
117// initializing the thread-locals involves calling into malloc. A malloc
118// implementation that does global synchronization might be holding a lock for a
119// critical section, calling a function that might be XRay instrumented (and
120// thus in turn calling into malloc by virtue of registration of the
121// thread_local's destructor).
122#if XRAY_HAS_TLS_ALIGNAS
123static_assert(alignof(ThreadLocalData) >= 64,
124 "ThreadLocalData must be cache line aligned.");
125#endif
126static ThreadLocalData &getThreadLocalData() {
127 alignas(ThreadLocalData) thread_local std::byte
128 TLDStorage[sizeof(ThreadLocalData)];
129
130 if (pthread_getspecific(key: Key) == NULL) {
131 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
132 pthread_setspecific(key: Key, pointer: &TLDStorage);
133 }
134
135 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
136}
137
138static XRayFileHeader &fdrCommonHeaderInfo() {
139 alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)];
140 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
141 static bool TSCSupported = true;
142 static uint64_t CycleFrequency = NanosecondsPerSecond;
143 pthread_once(
144 once_control: &OnceInit, init_routine: +[] {
145 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
146 // Version 2 of the log writes the extents of the buffer, instead of
147 // relying on an end-of-buffer record.
148 // Version 3 includes PID metadata record.
149 // Version 4 includes CPU data in the custom event records.
150 // Version 5 uses relative deltas for custom and typed event records,
151 // and removes the CPU data in custom event records (similar to how
152 // function records use deltas instead of full TSCs and rely on other
153 // metadata records for TSC wraparound and CPU migration).
154 H.Version = 5;
155 H.Type = FileTypes::FDR_LOG;
156
157 // Test for required CPU features and cache the cycle frequency
158 TSCSupported = probeRequiredCPUFeatures();
159 if (TSCSupported)
160 CycleFrequency = getTSCFrequency();
161 H.CycleFrequency = CycleFrequency;
162
163 // FIXME: Actually check whether we have 'constant_tsc' and
164 // 'nonstop_tsc' before setting the values in the header.
165 H.ConstantTSC = 1;
166 H.NonstopTSC = 1;
167 });
168 return reinterpret_cast<XRayFileHeader &>(HStorage);
169}
170
171// This is the iterator implementation, which knows how to handle FDR-mode
172// specific buffers. This is used as an implementation of the iterator function
173// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
174// buffer iteration for the currently installed FDR mode buffers. In particular:
175//
176// - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
177// then the iterator returns the header information.
178// - If the argument represents the header information ({address of header
179// info, size of the header info}) then it returns the first FDR buffer's
180// address and extents.
181// - It will keep returning the next buffer and extents as there are more
182// buffers to process. When the input represents the last buffer, it will
183// return the initial state to signal completion ({nullptr, 0}).
184//
185// See xray/xray_log_interface.h for more details on the requirements for the
186// implementations of __xray_set_buffer_iterator(...) and
187// __xray_log_process_buffers(...).
188XRayBuffer fdrIterator(const XRayBuffer B) {
189 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
190 DCHECK(BQ->finalizing());
191
192 if (BQ == nullptr || !BQ->finalizing()) {
193 if (Verbosity())
194 Report(
195 format: "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
196 return {.Data: nullptr, .Size: 0};
197 }
198
199 // We use a global scratch-pad for the header information, which only gets
200 // initialized the first time this function is called. We'll update one part
201 // of this information with some relevant data (in particular the number of
202 // buffers to expect).
203 alignas(
204 XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)];
205 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
206 pthread_once(
207 once_control: &HeaderOnce, init_routine: +[] {
208 reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
209 fdrCommonHeaderInfo();
210 });
211
212 // We use a convenience alias for code referring to Header from here on out.
213 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
214 if (B.Data == nullptr && B.Size == 0) {
215 Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()};
216 return XRayBuffer{.Data: static_cast<void *>(&Header), .Size: sizeof(Header)};
217 }
218
219 static BufferQueue::const_iterator It{};
220 static BufferQueue::const_iterator End{};
221 static uint8_t *CurrentBuffer{nullptr};
222 static size_t SerializedBufferSize = 0;
223 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
224 // From this point on, we provide raw access to the raw buffer we're getting
225 // from the BufferQueue. We're relying on the iterators from the current
226 // Buffer queue.
227 It = BQ->cbegin();
228 End = BQ->cend();
229 }
230
231 if (CurrentBuffer != nullptr) {
232 deallocateBuffer(B: CurrentBuffer, S: SerializedBufferSize);
233 CurrentBuffer = nullptr;
234 }
235
236 if (It == End)
237 return {.Data: nullptr, .Size: 0};
238
239 // Set up the current buffer to contain the extents like we would when writing
240 // out to disk. The difference here would be that we still write "empty"
241 // buffers, or at least go through the iterators faithfully to let the
242 // handlers see the empty buffers in the queue.
243 //
244 // We need this atomic fence here to ensure that writes happening to the
245 // buffer have been committed before we load the extents atomically. Because
246 // the buffer is not explicitly synchronised across threads, we rely on the
247 // fence ordering to ensure that writes we expect to have been completed
248 // before the fence are fully committed before we read the extents.
249 atomic_thread_fence(mo: memory_order_acquire);
250 auto BufferSize = atomic_load(a: It->Extents, mo: memory_order_acquire);
251 SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
252 CurrentBuffer = allocateBuffer(S: SerializedBufferSize);
253 if (CurrentBuffer == nullptr)
254 return {.Data: nullptr, .Size: 0};
255
256 // Write out the extents as a Metadata Record into the CurrentBuffer.
257 MetadataRecord ExtentsRecord;
258 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
259 ExtentsRecord.RecordKind =
260 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
261 internal_memcpy(dest: ExtentsRecord.Data, src: &BufferSize, n: sizeof(BufferSize));
262 auto AfterExtents =
263 static_cast<char *>(internal_memcpy(dest: CurrentBuffer, src: &ExtentsRecord,
264 n: sizeof(MetadataRecord))) +
265 sizeof(MetadataRecord);
266 internal_memcpy(dest: AfterExtents, src: It->Data, n: BufferSize);
267
268 XRayBuffer Result;
269 Result.Data = CurrentBuffer;
270 Result.Size = SerializedBufferSize;
271 ++It;
272 return Result;
273}
274
275// Must finalize before flushing.
276XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
277 if (atomic_load(a: &LoggingStatus, mo: memory_order_acquire) !=
278 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
279 if (Verbosity())
280 Report(format: "Not flushing log, implementation is not finalized.\n");
281 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
282 }
283
284 if (atomic_exchange(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHING,
285 mo: memory_order_release) ==
286 XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
287 if (Verbosity())
288 Report(format: "Not flushing log, implementation is still flushing.\n");
289 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
290 }
291
292 if (BQ == nullptr) {
293 if (Verbosity())
294 Report(format: "Cannot flush when global buffer queue is null.\n");
295 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
296 }
297
298 // We wait a number of milliseconds to allow threads to see that we've
299 // finalised before attempting to flush the log.
300 SleepForMillis(millis: fdrFlags()->grace_period_ms);
301
302 // At this point, we're going to uninstall the iterator implementation, before
303 // we decide to do anything further with the global buffer queue.
304 __xray_log_remove_buffer_iterator();
305
306 // Once flushed, we should set the global status of the logging implementation
307 // to "uninitialized" to allow for FDR-logging multiple runs.
308 auto ResetToUnitialized = at_scope_exit(fn: [] {
309 atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
310 mo: memory_order_release);
311 });
312
313 auto CleanupBuffers = at_scope_exit(fn: [] {
314 auto &TLD = getThreadLocalData();
315 if (TLD.Controller != nullptr)
316 TLD.Controller->flush();
317 });
318
319 if (fdrFlags()->no_file_flush) {
320 if (Verbosity())
321 Report(format: "XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
322
323 atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED,
324 mo: memory_order_release);
325 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
326 }
327
328 // We write out the file in the following format:
329 //
330 // 1) We write down the XRay file header with version 1, type FDR_LOG.
331 // 2) Then we use the 'apply' member of the BufferQueue that's live, to
332 // ensure that at this point in time we write down the buffers that have
333 // been released (and marked "used") -- we dump the full buffer for now
334 // (fixed-sized) and let the tools reading the buffers deal with the data
335 // afterwards.
336 //
337 LogWriter *LW = LogWriter::Open();
338 if (LW == nullptr) {
339 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
340 atomic_store(a: &LogFlushStatus, v: Result, mo: memory_order_release);
341 return Result;
342 }
343
344 XRayFileHeader Header = fdrCommonHeaderInfo();
345 Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()};
346 LW->WriteAll(Begin: reinterpret_cast<char *>(&Header),
347 End: reinterpret_cast<char *>(&Header) + sizeof(Header));
348
349 // Release the current thread's buffer before we attempt to write out all the
350 // buffers. This ensures that in case we had only a single thread going, that
351 // we are able to capture the data nonetheless.
352 auto &TLD = getThreadLocalData();
353 if (TLD.Controller != nullptr)
354 TLD.Controller->flush();
355
356 BQ->apply(Fn: [&](const BufferQueue::Buffer &B) {
357 // Starting at version 2 of the FDR logging implementation, we only write
358 // the records identified by the extents of the buffer. We use the Extents
359 // from the Buffer and write that out as the first record in the buffer. We
360 // still use a Metadata record, but fill in the extents instead for the
361 // data.
362 MetadataRecord ExtentsRecord;
363 auto BufferExtents = atomic_load(a: B.Extents, mo: memory_order_acquire);
364 DCHECK(BufferExtents <= B.Size);
365 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
366 ExtentsRecord.RecordKind =
367 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
368 internal_memcpy(dest: ExtentsRecord.Data, src: &BufferExtents, n: sizeof(BufferExtents));
369 if (BufferExtents > 0) {
370 LW->WriteAll(Begin: reinterpret_cast<char *>(&ExtentsRecord),
371 End: reinterpret_cast<char *>(&ExtentsRecord) +
372 sizeof(MetadataRecord));
373 LW->WriteAll(Begin: reinterpret_cast<char *>(B.Data),
374 End: reinterpret_cast<char *>(B.Data) + BufferExtents);
375 }
376 });
377
378 atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED,
379 mo: memory_order_release);
380 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
381}
382
383XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
384 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
385 if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus,
386 xchg: XRayLogInitStatus::XRAY_LOG_FINALIZING,
387 mo: memory_order_release)) {
388 if (Verbosity())
389 Report(format: "Cannot finalize log, implementation not initialized.\n");
390 return static_cast<XRayLogInitStatus>(CurrentStatus);
391 }
392
393 // Do special things to make the log finalize itself, and not allow any more
394 // operations to be performed until re-initialized.
395 if (BQ == nullptr) {
396 if (Verbosity())
397 Report(format: "Attempting to finalize an uninitialized global buffer!\n");
398 } else {
399 BQ->finalize();
400 }
401
402 atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_FINALIZED,
403 mo: memory_order_release);
404 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
405}
406
407struct TSCAndCPU {
408 uint64_t TSC = 0;
409 unsigned char CPU = 0;
410};
411
412static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
413 // We want to get the TSC as early as possible, so that we can check whether
414 // we've seen this CPU before. We also do it before we load anything else,
415 // to allow for forward progress with the scheduling.
416 TSCAndCPU Result;
417
418 // Test once for required CPU features
419 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
420 static bool TSCSupported = true;
421 pthread_once(
422 once_control: &OnceProbe, init_routine: +[] { TSCSupported = probeRequiredCPUFeatures(); });
423
424 if (TSCSupported) {
425 Result.TSC = __xray::readTSC(CPU&: Result.CPU);
426 } else {
427 // FIXME: This code needs refactoring as it appears in multiple locations
428 timespec TS;
429 int result = clock_gettime(CLOCK_REALTIME, tp: &TS);
430 if (result != 0) {
431 Report(format: "clock_gettime(2) return %d, errno=%d", result, int(errno));
432 TS = {.tv_sec: 0, .tv_nsec: 0};
433 }
434 Result.CPU = 0;
435 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
436 }
437 return Result;
438}
439
440thread_local atomic_uint8_t Running{.val_dont_use: 0};
441
442static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
443 // Check if we're finalizing, before proceeding.
444 {
445 auto Status = atomic_load(a: &LoggingStatus, mo: memory_order_acquire);
446 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
447 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
448 if (TLD.Controller != nullptr) {
449 TLD.Controller->flush();
450 TLD.Controller = nullptr;
451 }
452 return false;
453 }
454 }
455
456 if (UNLIKELY(TLD.Controller == nullptr)) {
457 // Set up the TLD buffer queue.
458 if (UNLIKELY(BQ == nullptr))
459 return false;
460 TLD.BQ = BQ;
461
462 // Check that we have a valid buffer.
463 if (TLD.Buffer.Generation != BQ->generation() &&
464 TLD.BQ->releaseBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok)
465 return false;
466
467 // Set up a buffer, before setting up the log writer. Bail out on failure.
468 if (TLD.BQ->getBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok)
469 return false;
470
471 // Set up the Log Writer for this thread.
472 if (UNLIKELY(TLD.Writer == nullptr)) {
473 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
474 new (LWStorage) FDRLogWriter(TLD.Buffer);
475 TLD.Writer = LWStorage;
476 } else {
477 TLD.Writer->resetRecord();
478 }
479
480 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
481 new (CStorage)
482 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
483 atomic_load_relaxed(a: &ThresholdTicks));
484 TLD.Controller = CStorage;
485 }
486
487 DCHECK_NE(TLD.Controller, nullptr);
488 return true;
489}
490
491void fdrLoggingHandleArg0(int32_t FuncId,
492 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
493 auto TC = getTimestamp();
494 auto &TSC = TC.TSC;
495 auto &CPU = TC.CPU;
496 RecursionGuard Guard{Running};
497 if (!Guard)
498 return;
499
500 auto &TLD = getThreadLocalData();
501 if (!setupTLD(TLD))
502 return;
503
504 switch (Entry) {
505 case XRayEntryType::ENTRY:
506 case XRayEntryType::LOG_ARGS_ENTRY:
507 TLD.Controller->functionEnter(FuncId, TSC, CPU);
508 return;
509 case XRayEntryType::EXIT:
510 TLD.Controller->functionExit(FuncId, TSC, CPU);
511 return;
512 case XRayEntryType::TAIL:
513 TLD.Controller->functionTailExit(FuncId, TSC, CPU);
514 return;
515 case XRayEntryType::CUSTOM_EVENT:
516 case XRayEntryType::TYPED_EVENT:
517 break;
518 }
519}
520
521void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
522 uint64_t Arg) XRAY_NEVER_INSTRUMENT {
523 auto TC = getTimestamp();
524 auto &TSC = TC.TSC;
525 auto &CPU = TC.CPU;
526 RecursionGuard Guard{Running};
527 if (!Guard)
528 return;
529
530 auto &TLD = getThreadLocalData();
531 if (!setupTLD(TLD))
532 return;
533
534 switch (Entry) {
535 case XRayEntryType::ENTRY:
536 case XRayEntryType::LOG_ARGS_ENTRY:
537 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
538 return;
539 case XRayEntryType::EXIT:
540 TLD.Controller->functionExit(FuncId, TSC, CPU);
541 return;
542 case XRayEntryType::TAIL:
543 TLD.Controller->functionTailExit(FuncId, TSC, CPU);
544 return;
545 case XRayEntryType::CUSTOM_EVENT:
546 case XRayEntryType::TYPED_EVENT:
547 break;
548 }
549}
550
551void fdrLoggingHandleCustomEvent(void *Event,
552 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
553 auto TC = getTimestamp();
554 auto &TSC = TC.TSC;
555 auto &CPU = TC.CPU;
556 RecursionGuard Guard{Running};
557 if (!Guard)
558 return;
559
560 // Complain when we ever get at least one custom event that's larger than what
561 // we can possibly support.
562 if (EventSize >
563 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
564 static pthread_once_t Once = PTHREAD_ONCE_INIT;
565 pthread_once(
566 once_control: &Once, init_routine: +[] {
567 Report(format: "Custom event size too large; truncating to %d.\n",
568 std::numeric_limits<int32_t>::max());
569 });
570 }
571
572 auto &TLD = getThreadLocalData();
573 if (!setupTLD(TLD))
574 return;
575
576 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
577 TLD.Controller->customEvent(TSC, CPU, Event, EventSize: ReducedEventSize);
578}
579
580void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event,
581 size_t EventSize) noexcept
582 XRAY_NEVER_INSTRUMENT {
583 auto TC = getTimestamp();
584 auto &TSC = TC.TSC;
585 auto &CPU = TC.CPU;
586 RecursionGuard Guard{Running};
587 if (!Guard)
588 return;
589
590 // Complain when we ever get at least one typed event that's larger than what
591 // we can possibly support.
592 if (EventSize >
593 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
594 static pthread_once_t Once = PTHREAD_ONCE_INIT;
595 pthread_once(
596 once_control: &Once, init_routine: +[] {
597 Report(format: "Typed event size too large; truncating to %d.\n",
598 std::numeric_limits<int32_t>::max());
599 });
600 }
601
602 auto &TLD = getThreadLocalData();
603 if (!setupTLD(TLD))
604 return;
605
606 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
607 TLD.Controller->typedEvent(TSC, CPU, EventType: static_cast<uint16_t>(EventType), Event,
608 EventSize: ReducedEventSize);
609}
610
611XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
612 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
613 if (Options == nullptr)
614 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
615
616 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
617 if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus,
618 xchg: XRayLogInitStatus::XRAY_LOG_INITIALIZING,
619 mo: memory_order_release)) {
620 if (Verbosity())
621 Report(format: "Cannot initialize already initialized implementation.\n");
622 return static_cast<XRayLogInitStatus>(CurrentStatus);
623 }
624
625 if (Verbosity())
626 Report(format: "Initializing FDR mode with options: %s\n",
627 static_cast<const char *>(Options));
628
629 // TODO: Factor out the flags specific to the FDR mode implementation. For
630 // now, use the global/single definition of the flags, since the FDR mode
631 // flags are already defined there.
632 FlagParser FDRParser;
633 FDRFlags FDRFlags;
634 registerXRayFDRFlags(P: &FDRParser, F: &FDRFlags);
635 FDRFlags.setDefaults();
636
637 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
638 // options until we migrate everyone to use the XRAY_FDR_OPTIONS
639 // compiler-provided options.
640 FDRParser.ParseString(s: useCompilerDefinedFlags());
641 FDRParser.ParseString(s: useCompilerDefinedFDRFlags());
642 auto *EnvOpts = GetEnv(name: "XRAY_FDR_OPTIONS");
643 if (EnvOpts == nullptr)
644 EnvOpts = "";
645 FDRParser.ParseString(s: EnvOpts);
646
647 // FIXME: Remove this when we fully remove the deprecated flags.
648 if (internal_strlen(s: EnvOpts) == 0) {
649 FDRFlags.func_duration_threshold_us =
650 flags()->xray_fdr_log_func_duration_threshold_us;
651 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
652 }
653
654 // The provided options should always override the compiler-provided and
655 // environment-variable defined options.
656 FDRParser.ParseString(s: static_cast<const char *>(Options));
657 *fdrFlags() = FDRFlags;
658 auto BufferSize = FDRFlags.buffer_size;
659 auto BufferMax = FDRFlags.buffer_max;
660
661 if (BQ == nullptr) {
662 bool Success = false;
663 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
664 new (BQ) BufferQueue(BufferSize, BufferMax, Success);
665 if (!Success) {
666 Report(format: "BufferQueue init failed.\n");
667 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
668 }
669 } else {
670 if (BQ->init(BS: BufferSize, BC: BufferMax) != BufferQueue::ErrorCode::Ok) {
671 if (Verbosity())
672 Report(format: "Failed to re-initialize global buffer queue. Init failed.\n");
673 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
674 }
675 }
676
677 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
678 pthread_once(
679 once_control: &OnceInit, init_routine: +[] {
680 atomic_store(a: &TicksPerSec,
681 v: probeRequiredCPUFeatures() ? getTSCFrequency()
682 : __xray::NanosecondsPerSecond,
683 mo: memory_order_release);
684 pthread_key_create(
685 key: &Key, destr_function: +[](void *TLDPtr) {
686 if (TLDPtr == nullptr)
687 return;
688 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
689 if (TLD.BQ == nullptr)
690 return;
691 if (TLD.Buffer.Data == nullptr)
692 return;
693 auto EC = TLD.BQ->releaseBuffer(Buf&: TLD.Buffer);
694 if (EC != BufferQueue::ErrorCode::Ok)
695 Report(format: "At thread exit, failed to release buffer at %p; "
696 "error=%s\n",
697 TLD.Buffer.Data, BufferQueue::getErrorString(E: EC));
698 });
699 });
700
701 atomic_store(a: &ThresholdTicks,
702 v: atomic_load_relaxed(a: &TicksPerSec) *
703 fdrFlags()->func_duration_threshold_us / 1000000,
704 mo: memory_order_release);
705 // Arg1 handler should go in first to avoid concurrent code accidentally
706 // falling back to arg0 when it should have ran arg1.
707 __xray_set_handler_arg1(entry: fdrLoggingHandleArg1);
708 // Install the actual handleArg0 handler after initialising the buffers.
709 __xray_set_handler(entry: fdrLoggingHandleArg0);
710 __xray_set_customevent_handler(entry: fdrLoggingHandleCustomEvent);
711 __xray_set_typedevent_handler(entry: fdrLoggingHandleTypedEvent);
712
713 // Install the buffer iterator implementation.
714 __xray_log_set_buffer_iterator(Iterator: fdrIterator);
715
716 atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_INITIALIZED,
717 mo: memory_order_release);
718
719 if (Verbosity())
720 Report(format: "XRay FDR init successful.\n");
721 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
722}
723
724bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
725 XRayLogImpl Impl{
726 .log_init: fdrLoggingInit,
727 .log_finalize: fdrLoggingFinalize,
728 .handle_arg0: fdrLoggingHandleArg0,
729 .flush_log: fdrLoggingFlush,
730 };
731 auto RegistrationResult = __xray_log_register_mode(Mode: "xray-fdr", Impl);
732 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
733 Verbosity()) {
734 Report(format: "Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
735 RegistrationResult);
736 return false;
737 }
738
739 if (flags()->xray_fdr_log ||
740 !internal_strcmp(s1: flags()->xray_mode, s2: "xray-fdr")) {
741 auto SelectResult = __xray_log_select_mode(Mode: "xray-fdr");
742 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
743 Verbosity()) {
744 Report(format: "Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
745 SelectResult);
746 return false;
747 }
748 }
749 return true;
750}
751
752} // namespace __xray
753
754static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
755