1 | //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
10 | // |
11 | // Here we implement the Flight Data Recorder mode for XRay, where we use |
12 | // compact structures to store records in memory as well as when writing out the |
13 | // data to files. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | #include "xray_fdr_logging.h" |
17 | #include <cassert> |
18 | #include <cstddef> |
19 | #include <errno.h> |
20 | #include <limits> |
21 | #include <memory> |
22 | #include <pthread.h> |
23 | #include <sys/time.h> |
24 | #include <time.h> |
25 | #include <unistd.h> |
26 | |
27 | #include "sanitizer_common/sanitizer_allocator_internal.h" |
28 | #include "sanitizer_common/sanitizer_atomic.h" |
29 | #include "sanitizer_common/sanitizer_common.h" |
30 | #include "xray/xray_interface.h" |
31 | #include "xray/xray_records.h" |
32 | #include "xray_allocator.h" |
33 | #include "xray_buffer_queue.h" |
34 | #include "xray_defs.h" |
35 | #include "xray_fdr_controller.h" |
36 | #include "xray_fdr_flags.h" |
37 | #include "xray_fdr_log_writer.h" |
38 | #include "xray_flags.h" |
39 | #include "xray_recursion_guard.h" |
40 | #include "xray_tsc.h" |
41 | #include "xray_utils.h" |
42 | |
43 | namespace __xray { |
44 | |
45 | static atomic_sint32_t LoggingStatus = { |
46 | .val_dont_use: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; |
47 | |
48 | namespace { |
49 | |
50 | // Group together thread-local-data in a struct, then hide it behind a function |
51 | // call so that it can be initialized on first use instead of as a global. We |
52 | // force the alignment to 64-bytes for x86 cache line alignment, as this |
53 | // structure is used in the hot path of implementation. |
54 | struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { |
55 | BufferQueue::Buffer Buffer{}; |
56 | BufferQueue *BQ = nullptr; |
57 | |
58 | using LogWriterStorage = std::byte[sizeof(FDRLogWriter)]; |
59 | alignas(FDRLogWriter) LogWriterStorage LWStorage; |
60 | FDRLogWriter *Writer = nullptr; |
61 | |
62 | using ControllerStorage = std::byte[sizeof(FDRController<>)]; |
63 | alignas(FDRController<>) ControllerStorage CStorage; |
64 | FDRController<> *Controller = nullptr; |
65 | }; |
66 | |
67 | } // namespace |
68 | |
69 | static_assert(std::is_trivially_destructible<ThreadLocalData>::value, |
70 | "ThreadLocalData must be trivially destructible" ); |
71 | |
72 | // Use a global pthread key to identify thread-local data for logging. |
73 | static pthread_key_t Key; |
74 | |
75 | // Global BufferQueue. |
76 | static std::byte BufferQueueStorage[sizeof(BufferQueue)]; |
77 | static BufferQueue *BQ = nullptr; |
78 | |
79 | // Global thresholds for function durations. |
80 | static atomic_uint64_t ThresholdTicks{.val_dont_use: 0}; |
81 | |
82 | // Global for ticks per second. |
83 | static atomic_uint64_t TicksPerSec{.val_dont_use: 0}; |
84 | |
85 | static atomic_sint32_t LogFlushStatus = { |
86 | .val_dont_use: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; |
87 | |
88 | // This function will initialize the thread-local data structure used by the FDR |
89 | // logging implementation and return a reference to it. The implementation |
90 | // details require a bit of care to maintain. |
91 | // |
92 | // First, some requirements on the implementation in general: |
93 | // |
94 | // - XRay handlers should not call any memory allocation routines that may |
95 | // delegate to an instrumented implementation. This means functions like |
96 | // malloc() and free() should not be called while instrumenting. |
97 | // |
98 | // - We would like to use some thread-local data initialized on first-use of |
99 | // the XRay instrumentation. These allow us to implement unsynchronized |
100 | // routines that access resources associated with the thread. |
101 | // |
102 | // The implementation here uses a few mechanisms that allow us to provide both |
103 | // the requirements listed above. We do this by: |
104 | // |
105 | // 1. Using a thread-local aligned storage buffer for representing the |
106 | // ThreadLocalData struct. This data will be uninitialized memory by |
107 | // design. |
108 | // |
109 | // 2. Not requiring a thread exit handler/implementation, keeping the |
110 | // thread-local as purely a collection of references/data that do not |
111 | // require cleanup. |
112 | // |
113 | // We're doing this to avoid using a `thread_local` object that has a |
114 | // non-trivial destructor, because the C++ runtime might call std::malloc(...) |
115 | // to register calls to destructors. Deadlocks may arise when, for example, an |
116 | // externally provided malloc implementation is XRay instrumented, and |
117 | // initializing the thread-locals involves calling into malloc. A malloc |
118 | // implementation that does global synchronization might be holding a lock for a |
119 | // critical section, calling a function that might be XRay instrumented (and |
120 | // thus in turn calling into malloc by virtue of registration of the |
121 | // thread_local's destructor). |
122 | #if XRAY_HAS_TLS_ALIGNAS |
123 | static_assert(alignof(ThreadLocalData) >= 64, |
124 | "ThreadLocalData must be cache line aligned." ); |
125 | #endif |
126 | static ThreadLocalData &getThreadLocalData() { |
127 | alignas(ThreadLocalData) thread_local std::byte |
128 | TLDStorage[sizeof(ThreadLocalData)]; |
129 | |
130 | if (pthread_getspecific(key: Key) == NULL) { |
131 | new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; |
132 | pthread_setspecific(key: Key, pointer: &TLDStorage); |
133 | } |
134 | |
135 | return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); |
136 | } |
137 | |
138 | static XRayFileHeader &() { |
139 | alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)]; |
140 | static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; |
141 | static bool TSCSupported = true; |
142 | static uint64_t CycleFrequency = NanosecondsPerSecond; |
143 | pthread_once( |
144 | once_control: &OnceInit, init_routine: +[] { |
145 | XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); |
146 | // Version 2 of the log writes the extents of the buffer, instead of |
147 | // relying on an end-of-buffer record. |
148 | // Version 3 includes PID metadata record. |
149 | // Version 4 includes CPU data in the custom event records. |
150 | // Version 5 uses relative deltas for custom and typed event records, |
151 | // and removes the CPU data in custom event records (similar to how |
152 | // function records use deltas instead of full TSCs and rely on other |
153 | // metadata records for TSC wraparound and CPU migration). |
154 | H.Version = 5; |
155 | H.Type = FileTypes::FDR_LOG; |
156 | |
157 | // Test for required CPU features and cache the cycle frequency |
158 | TSCSupported = probeRequiredCPUFeatures(); |
159 | if (TSCSupported) |
160 | CycleFrequency = getTSCFrequency(); |
161 | H.CycleFrequency = CycleFrequency; |
162 | |
163 | // FIXME: Actually check whether we have 'constant_tsc' and |
164 | // 'nonstop_tsc' before setting the values in the header. |
165 | H.ConstantTSC = 1; |
166 | H.NonstopTSC = 1; |
167 | }); |
168 | return reinterpret_cast<XRayFileHeader &>(HStorage); |
169 | } |
170 | |
171 | // This is the iterator implementation, which knows how to handle FDR-mode |
172 | // specific buffers. This is used as an implementation of the iterator function |
173 | // needed by __xray_set_buffer_iterator(...). It maintains a global state of the |
174 | // buffer iteration for the currently installed FDR mode buffers. In particular: |
175 | // |
176 | // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) |
177 | // then the iterator returns the header information. |
178 | // - If the argument represents the header information ({address of header |
179 | // info, size of the header info}) then it returns the first FDR buffer's |
180 | // address and extents. |
181 | // - It will keep returning the next buffer and extents as there are more |
182 | // buffers to process. When the input represents the last buffer, it will |
183 | // return the initial state to signal completion ({nullptr, 0}). |
184 | // |
185 | // See xray/xray_log_interface.h for more details on the requirements for the |
186 | // implementations of __xray_set_buffer_iterator(...) and |
187 | // __xray_log_process_buffers(...). |
188 | XRayBuffer fdrIterator(const XRayBuffer B) { |
189 | DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr" ) == 0); |
190 | DCHECK(BQ->finalizing()); |
191 | |
192 | if (BQ == nullptr || !BQ->finalizing()) { |
193 | if (Verbosity()) |
194 | Report( |
195 | format: "XRay FDR: Failed global buffer queue is null or not finalizing!\n" ); |
196 | return {.Data: nullptr, .Size: 0}; |
197 | } |
198 | |
199 | // We use a global scratch-pad for the header information, which only gets |
200 | // initialized the first time this function is called. We'll update one part |
201 | // of this information with some relevant data (in particular the number of |
202 | // buffers to expect). |
203 | alignas( |
204 | XRayFileHeader) static std::byte [sizeof(XRayFileHeader)]; |
205 | static pthread_once_t = PTHREAD_ONCE_INIT; |
206 | pthread_once( |
207 | once_control: &HeaderOnce, init_routine: +[] { |
208 | reinterpret_cast<XRayFileHeader &>(HeaderStorage) = |
209 | fdrCommonHeaderInfo(); |
210 | }); |
211 | |
212 | // We use a convenience alias for code referring to Header from here on out. |
213 | auto & = reinterpret_cast<XRayFileHeader &>(HeaderStorage); |
214 | if (B.Data == nullptr && B.Size == 0) { |
215 | Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()}; |
216 | return XRayBuffer{.Data: static_cast<void *>(&Header), .Size: sizeof(Header)}; |
217 | } |
218 | |
219 | static BufferQueue::const_iterator It{}; |
220 | static BufferQueue::const_iterator End{}; |
221 | static uint8_t *CurrentBuffer{nullptr}; |
222 | static size_t SerializedBufferSize = 0; |
223 | if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { |
224 | // From this point on, we provide raw access to the raw buffer we're getting |
225 | // from the BufferQueue. We're relying on the iterators from the current |
226 | // Buffer queue. |
227 | It = BQ->cbegin(); |
228 | End = BQ->cend(); |
229 | } |
230 | |
231 | if (CurrentBuffer != nullptr) { |
232 | deallocateBuffer(B: CurrentBuffer, S: SerializedBufferSize); |
233 | CurrentBuffer = nullptr; |
234 | } |
235 | |
236 | if (It == End) |
237 | return {.Data: nullptr, .Size: 0}; |
238 | |
239 | // Set up the current buffer to contain the extents like we would when writing |
240 | // out to disk. The difference here would be that we still write "empty" |
241 | // buffers, or at least go through the iterators faithfully to let the |
242 | // handlers see the empty buffers in the queue. |
243 | // |
244 | // We need this atomic fence here to ensure that writes happening to the |
245 | // buffer have been committed before we load the extents atomically. Because |
246 | // the buffer is not explicitly synchronised across threads, we rely on the |
247 | // fence ordering to ensure that writes we expect to have been completed |
248 | // before the fence are fully committed before we read the extents. |
249 | atomic_thread_fence(mo: memory_order_acquire); |
250 | auto BufferSize = atomic_load(a: It->Extents, mo: memory_order_acquire); |
251 | SerializedBufferSize = BufferSize + sizeof(MetadataRecord); |
252 | CurrentBuffer = allocateBuffer(S: SerializedBufferSize); |
253 | if (CurrentBuffer == nullptr) |
254 | return {.Data: nullptr, .Size: 0}; |
255 | |
256 | // Write out the extents as a Metadata Record into the CurrentBuffer. |
257 | MetadataRecord ExtentsRecord; |
258 | ExtentsRecord.Type = uint8_t(RecordType::Metadata); |
259 | ExtentsRecord.RecordKind = |
260 | uint8_t(MetadataRecord::RecordKinds::BufferExtents); |
261 | internal_memcpy(dest: ExtentsRecord.Data, src: &BufferSize, n: sizeof(BufferSize)); |
262 | auto AfterExtents = |
263 | static_cast<char *>(internal_memcpy(dest: CurrentBuffer, src: &ExtentsRecord, |
264 | n: sizeof(MetadataRecord))) + |
265 | sizeof(MetadataRecord); |
266 | internal_memcpy(dest: AfterExtents, src: It->Data, n: BufferSize); |
267 | |
268 | XRayBuffer Result; |
269 | Result.Data = CurrentBuffer; |
270 | Result.Size = SerializedBufferSize; |
271 | ++It; |
272 | return Result; |
273 | } |
274 | |
275 | // Must finalize before flushing. |
276 | XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { |
277 | if (atomic_load(a: &LoggingStatus, mo: memory_order_acquire) != |
278 | XRayLogInitStatus::XRAY_LOG_FINALIZED) { |
279 | if (Verbosity()) |
280 | Report(format: "Not flushing log, implementation is not finalized.\n" ); |
281 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
282 | } |
283 | |
284 | if (atomic_exchange(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHING, |
285 | mo: memory_order_release) == |
286 | XRayLogFlushStatus::XRAY_LOG_FLUSHING) { |
287 | if (Verbosity()) |
288 | Report(format: "Not flushing log, implementation is still flushing.\n" ); |
289 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
290 | } |
291 | |
292 | if (BQ == nullptr) { |
293 | if (Verbosity()) |
294 | Report(format: "Cannot flush when global buffer queue is null.\n" ); |
295 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
296 | } |
297 | |
298 | // We wait a number of milliseconds to allow threads to see that we've |
299 | // finalised before attempting to flush the log. |
300 | SleepForMillis(millis: fdrFlags()->grace_period_ms); |
301 | |
302 | // At this point, we're going to uninstall the iterator implementation, before |
303 | // we decide to do anything further with the global buffer queue. |
304 | __xray_log_remove_buffer_iterator(); |
305 | |
306 | // Once flushed, we should set the global status of the logging implementation |
307 | // to "uninitialized" to allow for FDR-logging multiple runs. |
308 | auto ResetToUnitialized = at_scope_exit(fn: [] { |
309 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, |
310 | mo: memory_order_release); |
311 | }); |
312 | |
313 | auto CleanupBuffers = at_scope_exit(fn: [] { |
314 | auto &TLD = getThreadLocalData(); |
315 | if (TLD.Controller != nullptr) |
316 | TLD.Controller->flush(); |
317 | }); |
318 | |
319 | if (fdrFlags()->no_file_flush) { |
320 | if (Verbosity()) |
321 | Report(format: "XRay FDR: Not flushing to file, 'no_file_flush=true'.\n" ); |
322 | |
323 | atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED, |
324 | mo: memory_order_release); |
325 | return XRayLogFlushStatus::XRAY_LOG_FLUSHED; |
326 | } |
327 | |
328 | // We write out the file in the following format: |
329 | // |
330 | // 1) We write down the XRay file header with version 1, type FDR_LOG. |
331 | // 2) Then we use the 'apply' member of the BufferQueue that's live, to |
332 | // ensure that at this point in time we write down the buffers that have |
333 | // been released (and marked "used") -- we dump the full buffer for now |
334 | // (fixed-sized) and let the tools reading the buffers deal with the data |
335 | // afterwards. |
336 | // |
337 | LogWriter *LW = LogWriter::Open(); |
338 | if (LW == nullptr) { |
339 | auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
340 | atomic_store(a: &LogFlushStatus, v: Result, mo: memory_order_release); |
341 | return Result; |
342 | } |
343 | |
344 | XRayFileHeader = fdrCommonHeaderInfo(); |
345 | Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()}; |
346 | LW->WriteAll(Begin: reinterpret_cast<char *>(&Header), |
347 | End: reinterpret_cast<char *>(&Header) + sizeof(Header)); |
348 | |
349 | // Release the current thread's buffer before we attempt to write out all the |
350 | // buffers. This ensures that in case we had only a single thread going, that |
351 | // we are able to capture the data nonetheless. |
352 | auto &TLD = getThreadLocalData(); |
353 | if (TLD.Controller != nullptr) |
354 | TLD.Controller->flush(); |
355 | |
356 | BQ->apply(Fn: [&](const BufferQueue::Buffer &B) { |
357 | // Starting at version 2 of the FDR logging implementation, we only write |
358 | // the records identified by the extents of the buffer. We use the Extents |
359 | // from the Buffer and write that out as the first record in the buffer. We |
360 | // still use a Metadata record, but fill in the extents instead for the |
361 | // data. |
362 | MetadataRecord ExtentsRecord; |
363 | auto BufferExtents = atomic_load(a: B.Extents, mo: memory_order_acquire); |
364 | DCHECK(BufferExtents <= B.Size); |
365 | ExtentsRecord.Type = uint8_t(RecordType::Metadata); |
366 | ExtentsRecord.RecordKind = |
367 | uint8_t(MetadataRecord::RecordKinds::BufferExtents); |
368 | internal_memcpy(dest: ExtentsRecord.Data, src: &BufferExtents, n: sizeof(BufferExtents)); |
369 | if (BufferExtents > 0) { |
370 | LW->WriteAll(Begin: reinterpret_cast<char *>(&ExtentsRecord), |
371 | End: reinterpret_cast<char *>(&ExtentsRecord) + |
372 | sizeof(MetadataRecord)); |
373 | LW->WriteAll(Begin: reinterpret_cast<char *>(B.Data), |
374 | End: reinterpret_cast<char *>(B.Data) + BufferExtents); |
375 | } |
376 | }); |
377 | |
378 | atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED, |
379 | mo: memory_order_release); |
380 | return XRayLogFlushStatus::XRAY_LOG_FLUSHED; |
381 | } |
382 | |
383 | XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { |
384 | s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; |
385 | if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus, |
386 | xchg: XRayLogInitStatus::XRAY_LOG_FINALIZING, |
387 | mo: memory_order_release)) { |
388 | if (Verbosity()) |
389 | Report(format: "Cannot finalize log, implementation not initialized.\n" ); |
390 | return static_cast<XRayLogInitStatus>(CurrentStatus); |
391 | } |
392 | |
393 | // Do special things to make the log finalize itself, and not allow any more |
394 | // operations to be performed until re-initialized. |
395 | if (BQ == nullptr) { |
396 | if (Verbosity()) |
397 | Report(format: "Attempting to finalize an uninitialized global buffer!\n" ); |
398 | } else { |
399 | BQ->finalize(); |
400 | } |
401 | |
402 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_FINALIZED, |
403 | mo: memory_order_release); |
404 | return XRayLogInitStatus::XRAY_LOG_FINALIZED; |
405 | } |
406 | |
407 | struct TSCAndCPU { |
408 | uint64_t TSC = 0; |
409 | unsigned char CPU = 0; |
410 | }; |
411 | |
412 | static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { |
413 | // We want to get the TSC as early as possible, so that we can check whether |
414 | // we've seen this CPU before. We also do it before we load anything else, |
415 | // to allow for forward progress with the scheduling. |
416 | TSCAndCPU Result; |
417 | |
418 | // Test once for required CPU features |
419 | static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; |
420 | static bool TSCSupported = true; |
421 | pthread_once( |
422 | once_control: &OnceProbe, init_routine: +[] { TSCSupported = probeRequiredCPUFeatures(); }); |
423 | |
424 | if (TSCSupported) { |
425 | Result.TSC = __xray::readTSC(CPU&: Result.CPU); |
426 | } else { |
427 | // FIXME: This code needs refactoring as it appears in multiple locations |
428 | timespec TS; |
429 | int result = clock_gettime(CLOCK_REALTIME, tp: &TS); |
430 | if (result != 0) { |
431 | Report(format: "clock_gettime(2) return %d, errno=%d" , result, int(errno)); |
432 | TS = {.tv_sec: 0, .tv_nsec: 0}; |
433 | } |
434 | Result.CPU = 0; |
435 | Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; |
436 | } |
437 | return Result; |
438 | } |
439 | |
440 | thread_local atomic_uint8_t Running{.val_dont_use: 0}; |
441 | |
442 | static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { |
443 | // Check if we're finalizing, before proceeding. |
444 | { |
445 | auto Status = atomic_load(a: &LoggingStatus, mo: memory_order_acquire); |
446 | if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || |
447 | Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { |
448 | if (TLD.Controller != nullptr) { |
449 | TLD.Controller->flush(); |
450 | TLD.Controller = nullptr; |
451 | } |
452 | return false; |
453 | } |
454 | } |
455 | |
456 | if (UNLIKELY(TLD.Controller == nullptr)) { |
457 | // Set up the TLD buffer queue. |
458 | if (UNLIKELY(BQ == nullptr)) |
459 | return false; |
460 | TLD.BQ = BQ; |
461 | |
462 | // Check that we have a valid buffer. |
463 | if (TLD.Buffer.Generation != BQ->generation() && |
464 | TLD.BQ->releaseBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok) |
465 | return false; |
466 | |
467 | // Set up a buffer, before setting up the log writer. Bail out on failure. |
468 | if (TLD.BQ->getBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok) |
469 | return false; |
470 | |
471 | // Set up the Log Writer for this thread. |
472 | if (UNLIKELY(TLD.Writer == nullptr)) { |
473 | auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); |
474 | new (LWStorage) FDRLogWriter(TLD.Buffer); |
475 | TLD.Writer = LWStorage; |
476 | } else { |
477 | TLD.Writer->resetRecord(); |
478 | } |
479 | |
480 | auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); |
481 | new (CStorage) |
482 | FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, |
483 | atomic_load_relaxed(a: &ThresholdTicks)); |
484 | TLD.Controller = CStorage; |
485 | } |
486 | |
487 | DCHECK_NE(TLD.Controller, nullptr); |
488 | return true; |
489 | } |
490 | |
491 | void fdrLoggingHandleArg0(int32_t FuncId, |
492 | XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { |
493 | auto TC = getTimestamp(); |
494 | auto &TSC = TC.TSC; |
495 | auto &CPU = TC.CPU; |
496 | RecursionGuard Guard{Running}; |
497 | if (!Guard) |
498 | return; |
499 | |
500 | auto &TLD = getThreadLocalData(); |
501 | if (!setupTLD(TLD)) |
502 | return; |
503 | |
504 | switch (Entry) { |
505 | case XRayEntryType::ENTRY: |
506 | case XRayEntryType::LOG_ARGS_ENTRY: |
507 | TLD.Controller->functionEnter(FuncId, TSC, CPU); |
508 | return; |
509 | case XRayEntryType::EXIT: |
510 | TLD.Controller->functionExit(FuncId, TSC, CPU); |
511 | return; |
512 | case XRayEntryType::TAIL: |
513 | TLD.Controller->functionTailExit(FuncId, TSC, CPU); |
514 | return; |
515 | case XRayEntryType::CUSTOM_EVENT: |
516 | case XRayEntryType::TYPED_EVENT: |
517 | break; |
518 | } |
519 | } |
520 | |
521 | void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, |
522 | uint64_t Arg) XRAY_NEVER_INSTRUMENT { |
523 | auto TC = getTimestamp(); |
524 | auto &TSC = TC.TSC; |
525 | auto &CPU = TC.CPU; |
526 | RecursionGuard Guard{Running}; |
527 | if (!Guard) |
528 | return; |
529 | |
530 | auto &TLD = getThreadLocalData(); |
531 | if (!setupTLD(TLD)) |
532 | return; |
533 | |
534 | switch (Entry) { |
535 | case XRayEntryType::ENTRY: |
536 | case XRayEntryType::LOG_ARGS_ENTRY: |
537 | TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); |
538 | return; |
539 | case XRayEntryType::EXIT: |
540 | TLD.Controller->functionExit(FuncId, TSC, CPU); |
541 | return; |
542 | case XRayEntryType::TAIL: |
543 | TLD.Controller->functionTailExit(FuncId, TSC, CPU); |
544 | return; |
545 | case XRayEntryType::CUSTOM_EVENT: |
546 | case XRayEntryType::TYPED_EVENT: |
547 | break; |
548 | } |
549 | } |
550 | |
551 | void fdrLoggingHandleCustomEvent(void *Event, |
552 | std::size_t EventSize) XRAY_NEVER_INSTRUMENT { |
553 | auto TC = getTimestamp(); |
554 | auto &TSC = TC.TSC; |
555 | auto &CPU = TC.CPU; |
556 | RecursionGuard Guard{Running}; |
557 | if (!Guard) |
558 | return; |
559 | |
560 | // Complain when we ever get at least one custom event that's larger than what |
561 | // we can possibly support. |
562 | if (EventSize > |
563 | static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { |
564 | static pthread_once_t Once = PTHREAD_ONCE_INIT; |
565 | pthread_once( |
566 | once_control: &Once, init_routine: +[] { |
567 | Report(format: "Custom event size too large; truncating to %d.\n" , |
568 | std::numeric_limits<int32_t>::max()); |
569 | }); |
570 | } |
571 | |
572 | auto &TLD = getThreadLocalData(); |
573 | if (!setupTLD(TLD)) |
574 | return; |
575 | |
576 | int32_t ReducedEventSize = static_cast<int32_t>(EventSize); |
577 | TLD.Controller->customEvent(TSC, CPU, Event, EventSize: ReducedEventSize); |
578 | } |
579 | |
580 | void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, |
581 | size_t EventSize) noexcept |
582 | XRAY_NEVER_INSTRUMENT { |
583 | auto TC = getTimestamp(); |
584 | auto &TSC = TC.TSC; |
585 | auto &CPU = TC.CPU; |
586 | RecursionGuard Guard{Running}; |
587 | if (!Guard) |
588 | return; |
589 | |
590 | // Complain when we ever get at least one typed event that's larger than what |
591 | // we can possibly support. |
592 | if (EventSize > |
593 | static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { |
594 | static pthread_once_t Once = PTHREAD_ONCE_INIT; |
595 | pthread_once( |
596 | once_control: &Once, init_routine: +[] { |
597 | Report(format: "Typed event size too large; truncating to %d.\n" , |
598 | std::numeric_limits<int32_t>::max()); |
599 | }); |
600 | } |
601 | |
602 | auto &TLD = getThreadLocalData(); |
603 | if (!setupTLD(TLD)) |
604 | return; |
605 | |
606 | int32_t ReducedEventSize = static_cast<int32_t>(EventSize); |
607 | TLD.Controller->typedEvent(TSC, CPU, EventType: static_cast<uint16_t>(EventType), Event, |
608 | EventSize: ReducedEventSize); |
609 | } |
610 | |
611 | XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, |
612 | size_t OptionsSize) XRAY_NEVER_INSTRUMENT { |
613 | if (Options == nullptr) |
614 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
615 | |
616 | s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
617 | if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus, |
618 | xchg: XRayLogInitStatus::XRAY_LOG_INITIALIZING, |
619 | mo: memory_order_release)) { |
620 | if (Verbosity()) |
621 | Report(format: "Cannot initialize already initialized implementation.\n" ); |
622 | return static_cast<XRayLogInitStatus>(CurrentStatus); |
623 | } |
624 | |
625 | if (Verbosity()) |
626 | Report(format: "Initializing FDR mode with options: %s\n" , |
627 | static_cast<const char *>(Options)); |
628 | |
629 | // TODO: Factor out the flags specific to the FDR mode implementation. For |
630 | // now, use the global/single definition of the flags, since the FDR mode |
631 | // flags are already defined there. |
632 | FlagParser FDRParser; |
633 | FDRFlags FDRFlags; |
634 | registerXRayFDRFlags(P: &FDRParser, F: &FDRFlags); |
635 | FDRFlags.setDefaults(); |
636 | |
637 | // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided |
638 | // options until we migrate everyone to use the XRAY_FDR_OPTIONS |
639 | // compiler-provided options. |
640 | FDRParser.ParseString(s: useCompilerDefinedFlags()); |
641 | FDRParser.ParseString(s: useCompilerDefinedFDRFlags()); |
642 | auto *EnvOpts = GetEnv(name: "XRAY_FDR_OPTIONS" ); |
643 | if (EnvOpts == nullptr) |
644 | EnvOpts = "" ; |
645 | FDRParser.ParseString(s: EnvOpts); |
646 | |
647 | // FIXME: Remove this when we fully remove the deprecated flags. |
648 | if (internal_strlen(s: EnvOpts) == 0) { |
649 | FDRFlags.func_duration_threshold_us = |
650 | flags()->xray_fdr_log_func_duration_threshold_us; |
651 | FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; |
652 | } |
653 | |
654 | // The provided options should always override the compiler-provided and |
655 | // environment-variable defined options. |
656 | FDRParser.ParseString(s: static_cast<const char *>(Options)); |
657 | *fdrFlags() = FDRFlags; |
658 | auto BufferSize = FDRFlags.buffer_size; |
659 | auto BufferMax = FDRFlags.buffer_max; |
660 | |
661 | if (BQ == nullptr) { |
662 | bool Success = false; |
663 | BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); |
664 | new (BQ) BufferQueue(BufferSize, BufferMax, Success); |
665 | if (!Success) { |
666 | Report(format: "BufferQueue init failed.\n" ); |
667 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
668 | } |
669 | } else { |
670 | if (BQ->init(BS: BufferSize, BC: BufferMax) != BufferQueue::ErrorCode::Ok) { |
671 | if (Verbosity()) |
672 | Report(format: "Failed to re-initialize global buffer queue. Init failed.\n" ); |
673 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
674 | } |
675 | } |
676 | |
677 | static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; |
678 | pthread_once( |
679 | once_control: &OnceInit, init_routine: +[] { |
680 | atomic_store(a: &TicksPerSec, |
681 | v: probeRequiredCPUFeatures() ? getTSCFrequency() |
682 | : __xray::NanosecondsPerSecond, |
683 | mo: memory_order_release); |
684 | pthread_key_create( |
685 | key: &Key, destr_function: +[](void *TLDPtr) { |
686 | if (TLDPtr == nullptr) |
687 | return; |
688 | auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); |
689 | if (TLD.BQ == nullptr) |
690 | return; |
691 | if (TLD.Buffer.Data == nullptr) |
692 | return; |
693 | auto EC = TLD.BQ->releaseBuffer(Buf&: TLD.Buffer); |
694 | if (EC != BufferQueue::ErrorCode::Ok) |
695 | Report(format: "At thread exit, failed to release buffer at %p; " |
696 | "error=%s\n" , |
697 | TLD.Buffer.Data, BufferQueue::getErrorString(E: EC)); |
698 | }); |
699 | }); |
700 | |
701 | atomic_store(a: &ThresholdTicks, |
702 | v: atomic_load_relaxed(a: &TicksPerSec) * |
703 | fdrFlags()->func_duration_threshold_us / 1000000, |
704 | mo: memory_order_release); |
705 | // Arg1 handler should go in first to avoid concurrent code accidentally |
706 | // falling back to arg0 when it should have ran arg1. |
707 | __xray_set_handler_arg1(entry: fdrLoggingHandleArg1); |
708 | // Install the actual handleArg0 handler after initialising the buffers. |
709 | __xray_set_handler(entry: fdrLoggingHandleArg0); |
710 | __xray_set_customevent_handler(entry: fdrLoggingHandleCustomEvent); |
711 | __xray_set_typedevent_handler(entry: fdrLoggingHandleTypedEvent); |
712 | |
713 | // Install the buffer iterator implementation. |
714 | __xray_log_set_buffer_iterator(Iterator: fdrIterator); |
715 | |
716 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_INITIALIZED, |
717 | mo: memory_order_release); |
718 | |
719 | if (Verbosity()) |
720 | Report(format: "XRay FDR init successful.\n" ); |
721 | return XRayLogInitStatus::XRAY_LOG_INITIALIZED; |
722 | } |
723 | |
724 | bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { |
725 | XRayLogImpl Impl{ |
726 | .log_init: fdrLoggingInit, |
727 | .log_finalize: fdrLoggingFinalize, |
728 | .handle_arg0: fdrLoggingHandleArg0, |
729 | .flush_log: fdrLoggingFlush, |
730 | }; |
731 | auto RegistrationResult = __xray_log_register_mode(Mode: "xray-fdr" , Impl); |
732 | if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && |
733 | Verbosity()) { |
734 | Report(format: "Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n" , |
735 | RegistrationResult); |
736 | return false; |
737 | } |
738 | |
739 | if (flags()->xray_fdr_log || |
740 | !internal_strcmp(s1: flags()->xray_mode, s2: "xray-fdr" )) { |
741 | auto SelectResult = __xray_log_select_mode(Mode: "xray-fdr" ); |
742 | if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && |
743 | Verbosity()) { |
744 | Report(format: "Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n" , |
745 | SelectResult); |
746 | return false; |
747 | } |
748 | } |
749 | return true; |
750 | } |
751 | |
752 | } // namespace __xray |
753 | |
754 | static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); |
755 | |