1//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// Implementation of a simple in-memory log of XRay events. This defines a
12// logging function that's compatible with the XRay handler interface, and
13// routines for exporting data to files.
14//
15//===----------------------------------------------------------------------===//
16
17#include <errno.h>
18#include <fcntl.h>
19#include <pthread.h>
20#include <sys/stat.h>
21#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
22#include <sys/syscall.h>
23#endif
24#include <sys/types.h>
25#include <time.h>
26#include <unistd.h>
27
28#include "sanitizer_common/sanitizer_allocator_internal.h"
29#include "sanitizer_common/sanitizer_libc.h"
30#include "xray/xray_records.h"
31#include "xray_recursion_guard.h"
32#include "xray_basic_flags.h"
33#include "xray_basic_logging.h"
34#include "xray_defs.h"
35#include "xray_flags.h"
36#include "xray_interface_internal.h"
37#include "xray_tsc.h"
38#include "xray_utils.h"
39
40namespace __xray {
41
42static SpinMutex LogMutex;
43
44namespace {
45// We use elements of this type to record the entry TSC of every function ID we
46// see as we're tracing a particular thread's execution.
47struct alignas(16) StackEntry {
48 int32_t FuncId;
49 uint16_t Type;
50 uint8_t CPU;
51 uint8_t Padding;
52 uint64_t TSC;
53};
54
55static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
56
57struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
58 void *InMemoryBuffer = nullptr;
59 size_t BufferSize = 0;
60 size_t BufferOffset = 0;
61 void *ShadowStack = nullptr;
62 size_t StackSize = 0;
63 size_t StackEntries = 0;
64 __xray::LogWriter *LogWriter = nullptr;
65};
66
67struct BasicLoggingOptions {
68 int DurationFilterMicros = 0;
69 size_t MaxStackDepth = 0;
70 size_t ThreadBufferSize = 0;
71};
72} // namespace
73
74static pthread_key_t PThreadKey;
75
76static atomic_uint8_t BasicInitialized{.val_dont_use: 0};
77
78struct BasicLoggingOptions GlobalOptions;
79
80thread_local atomic_uint8_t Guard{.val_dont_use: 0};
81
82static atomic_uint8_t UseRealTSC{.val_dont_use: 0};
83static atomic_uint64_t ThresholdTicks{.val_dont_use: 0};
84static atomic_uint64_t TicksPerSec{.val_dont_use: 0};
85static atomic_uint64_t CycleFrequency{.val_dont_use: NanosecondsPerSecond};
86
87static LogWriter *getLog() XRAY_NEVER_INSTRUMENT {
88 LogWriter* LW = LogWriter::Open();
89 if (LW == nullptr)
90 return LW;
91
92 static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
93 pthread_once(once_control: &DetectOnce, init_routine: +[] {
94 if (atomic_load(a: &UseRealTSC, mo: memory_order_acquire))
95 atomic_store(a: &CycleFrequency, v: getTSCFrequency(), mo: memory_order_release);
96 });
97
98 // Since we're here, we get to write the header. We set it up so that the
99 // header will only be written once, at the start, and let the threads
100 // logging do writes which just append.
101 XRayFileHeader Header;
102 // Version 2 includes tail exit records.
103 // Version 3 includes pid inside records.
104 Header.Version = 3;
105 Header.Type = FileTypes::NAIVE_LOG;
106 Header.CycleFrequency = atomic_load(a: &CycleFrequency, mo: memory_order_acquire);
107
108 // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
109 // before setting the values in the header.
110 Header.ConstantTSC = 1;
111 Header.NonstopTSC = 1;
112 LW->WriteAll(Begin: reinterpret_cast<char *>(&Header),
113 End: reinterpret_cast<char *>(&Header) + sizeof(Header));
114 return LW;
115}
116
117static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT {
118 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
119 static LogWriter *LW = nullptr;
120 pthread_once(once_control: &OnceInit, init_routine: +[] { LW = getLog(); });
121 return LW;
122}
123
124static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
125 thread_local ThreadLocalData TLD;
126 thread_local bool UNUSED TOnce = [] {
127 if (GlobalOptions.ThreadBufferSize == 0) {
128 if (Verbosity())
129 Report(format: "Not initializing TLD since ThreadBufferSize == 0.\n");
130 return false;
131 }
132 pthread_setspecific(key: PThreadKey, pointer: &TLD);
133 TLD.LogWriter = getGlobalLog();
134 TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
135 InternalAlloc(size: sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
136 cache: nullptr, alignment: alignof(XRayRecord)));
137 TLD.BufferSize = GlobalOptions.ThreadBufferSize;
138 TLD.BufferOffset = 0;
139 if (GlobalOptions.MaxStackDepth == 0) {
140 if (Verbosity())
141 Report(format: "Not initializing the ShadowStack since MaxStackDepth == 0.\n");
142 TLD.StackSize = 0;
143 TLD.StackEntries = 0;
144 TLD.ShadowStack = nullptr;
145 return false;
146 }
147 TLD.ShadowStack = reinterpret_cast<StackEntry *>(
148 InternalAlloc(size: sizeof(StackEntry) * GlobalOptions.MaxStackDepth, cache: nullptr,
149 alignment: alignof(StackEntry)));
150 TLD.StackSize = GlobalOptions.MaxStackDepth;
151 TLD.StackEntries = 0;
152 return false;
153 }();
154 return TLD;
155}
156
157template <class RDTSC>
158void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
159 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
160 auto &TLD = getThreadLocalData();
161 LogWriter *LW = getGlobalLog();
162 if (LW == nullptr)
163 return;
164
165 // Use a simple recursion guard, to handle cases where we're already logging
166 // and for one reason or another, this function gets called again in the same
167 // thread.
168 RecursionGuard G(Guard);
169 if (!G)
170 return;
171
172 uint8_t CPU = 0;
173 uint64_t TSC = ReadTSC(CPU);
174
175 switch (Type) {
176 case XRayEntryType::ENTRY:
177 case XRayEntryType::LOG_ARGS_ENTRY: {
178 // Short circuit if we've reached the maximum depth of the stack.
179 if (TLD.StackEntries++ >= TLD.StackSize)
180 return;
181
182 // When we encounter an entry event, we keep track of the TSC and the CPU,
183 // and put it in the stack.
184 StackEntry E;
185 E.FuncId = FuncId;
186 E.CPU = CPU;
187 E.Type = Type;
188 E.TSC = TSC;
189 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
190 (sizeof(StackEntry) * (TLD.StackEntries - 1));
191 internal_memcpy(dest: StackEntryPtr, src: &E, n: sizeof(StackEntry));
192 break;
193 }
194 case XRayEntryType::EXIT:
195 case XRayEntryType::TAIL: {
196 if (TLD.StackEntries == 0)
197 break;
198
199 if (--TLD.StackEntries >= TLD.StackSize)
200 return;
201
202 // When we encounter an exit event, we check whether all the following are
203 // true:
204 //
205 // - The Function ID is the same as the most recent entry in the stack.
206 // - The CPU is the same as the most recent entry in the stack.
207 // - The Delta of the TSCs is less than the threshold amount of time we're
208 // looking to record.
209 //
210 // If all of these conditions are true, we pop the stack and don't write a
211 // record and move the record offset back.
212 StackEntry StackTop;
213 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
214 (sizeof(StackEntry) * TLD.StackEntries);
215 internal_memcpy(dest: &StackTop, src: StackEntryPtr, n: sizeof(StackEntry));
216 if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
217 StackTop.TSC < TSC) {
218 auto Delta = TSC - StackTop.TSC;
219 if (Delta < atomic_load(a: &ThresholdTicks, mo: memory_order_relaxed)) {
220 DCHECK(TLD.BufferOffset > 0);
221 TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
222 return;
223 }
224 }
225 break;
226 }
227 default:
228 // Should be unreachable.
229 DCHECK(false && "Unsupported XRayEntryType encountered.");
230 break;
231 }
232
233 // First determine whether the delta between the function's enter record and
234 // the exit record is higher than the threshold.
235 XRayRecord R;
236 R.RecordType = RecordTypes::NORMAL;
237 R.CPU = CPU;
238 R.TSC = TSC;
239 R.TId = GetTid();
240 R.PId = internal_getpid();
241 R.Type = Type;
242 R.FuncId = FuncId;
243 auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer);
244 internal_memcpy(dest: FirstEntry + TLD.BufferOffset, src: &R, n: sizeof(R));
245 if (++TLD.BufferOffset == TLD.BufferSize) {
246 SpinMutexLock Lock(&LogMutex);
247 LW->WriteAll(Begin: reinterpret_cast<char *>(FirstEntry),
248 End: reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
249 TLD.BufferOffset = 0;
250 TLD.StackEntries = 0;
251 }
252}
253
254template <class RDTSC>
255void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
256 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
257 auto &TLD = getThreadLocalData();
258 auto FirstEntry =
259 reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer);
260 const auto &BuffLen = TLD.BufferSize;
261 LogWriter *LW = getGlobalLog();
262 if (LW == nullptr)
263 return;
264
265 // First we check whether there's enough space to write the data consecutively
266 // in the thread-local buffer. If not, we first flush the buffer before
267 // attempting to write the two records that must be consecutive.
268 if (TLD.BufferOffset + 2 > BuffLen) {
269 SpinMutexLock Lock(&LogMutex);
270 LW->WriteAll(Begin: reinterpret_cast<char *>(FirstEntry),
271 End: reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
272 TLD.BufferOffset = 0;
273 TLD.StackEntries = 0;
274 }
275
276 // Then we write the "we have an argument" record.
277 InMemoryRawLog(FuncId, Type, ReadTSC);
278
279 RecursionGuard G(Guard);
280 if (!G)
281 return;
282
283 // And, from here on write the arg payload.
284 XRayArgPayload R;
285 R.RecordType = RecordTypes::ARG_PAYLOAD;
286 R.FuncId = FuncId;
287 R.TId = GetTid();
288 R.PId = internal_getpid();
289 R.Arg = Arg1;
290 internal_memcpy(dest: FirstEntry + TLD.BufferOffset, src: &R, n: sizeof(R));
291 if (++TLD.BufferOffset == BuffLen) {
292 SpinMutexLock Lock(&LogMutex);
293 LW->WriteAll(Begin: reinterpret_cast<char *>(FirstEntry),
294 End: reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
295 TLD.BufferOffset = 0;
296 TLD.StackEntries = 0;
297 }
298}
299
300void basicLoggingHandleArg0RealTSC(int32_t FuncId,
301 XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
302 InMemoryRawLog(FuncId, Type, ReadTSC: readTSC);
303}
304
305void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
306 XRAY_NEVER_INSTRUMENT {
307 InMemoryRawLog(FuncId, Type, ReadTSC: [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
308 timespec TS;
309 int result = clock_gettime(CLOCK_REALTIME, tp: &TS);
310 if (result != 0) {
311 Report(format: "clock_gettimg(2) return %d, errno=%d.", result, int(errno));
312 TS = {.tv_sec: 0, .tv_nsec: 0};
313 }
314 CPU = 0;
315 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
316 });
317}
318
319void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
320 uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
321 InMemoryRawLogWithArg(FuncId, Type, Arg1, ReadTSC: readTSC);
322}
323
324void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
325 uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
326 InMemoryRawLogWithArg(
327 FuncId, Type, Arg1, ReadTSC: [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
328 timespec TS;
329 int result = clock_gettime(CLOCK_REALTIME, tp: &TS);
330 if (result != 0) {
331 Report(format: "clock_gettimg(2) return %d, errno=%d.", result, int(errno));
332 TS = {.tv_sec: 0, .tv_nsec: 0};
333 }
334 CPU = 0;
335 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
336 });
337}
338
339static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
340 ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
341 auto ExitGuard = at_scope_exit(fn: [&TLD] {
342 // Clean up dynamic resources.
343 if (TLD.InMemoryBuffer)
344 InternalFree(p: TLD.InMemoryBuffer);
345 if (TLD.ShadowStack)
346 InternalFree(p: TLD.ShadowStack);
347 if (Verbosity())
348 Report(format: "Cleaned up log for TID: %llu\n", GetTid());
349 });
350
351 if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) {
352 if (Verbosity())
353 Report(format: "Skipping buffer for TID: %llu; Offset = %zu\n", GetTid(),
354 TLD.BufferOffset);
355 return;
356 }
357
358 {
359 SpinMutexLock L(&LogMutex);
360 TLD.LogWriter->WriteAll(Begin: reinterpret_cast<char *>(TLD.InMemoryBuffer),
361 End: reinterpret_cast<char *>(TLD.InMemoryBuffer) +
362 (sizeof(XRayRecord) * TLD.BufferOffset));
363 }
364
365 // Because this thread's exit could be the last one trying to write to
366 // the file and that we're not able to close out the file properly, we
367 // sync instead and hope that the pending writes are flushed as the
368 // thread exits.
369 TLD.LogWriter->Flush();
370}
371
372XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
373 UNUSED size_t BufferMax, void *Options,
374 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
375 uint8_t Expected = 0;
376 if (!atomic_compare_exchange_strong(a: &BasicInitialized, cmp: &Expected, xchg: 1,
377 mo: memory_order_acq_rel)) {
378 if (Verbosity())
379 Report(format: "Basic logging already initialized.\n");
380 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
381 }
382
383 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
384 pthread_once(once_control: &OnceInit, init_routine: +[] {
385 pthread_key_create(key: &PThreadKey, destr_function: TLDDestructor);
386 atomic_store(a: &UseRealTSC, v: probeRequiredCPUFeatures(), mo: memory_order_release);
387 // Initialize the global TicksPerSec value.
388 atomic_store(a: &TicksPerSec,
389 v: probeRequiredCPUFeatures() ? getTSCFrequency()
390 : NanosecondsPerSecond,
391 mo: memory_order_release);
392 if (!atomic_load(a: &UseRealTSC, mo: memory_order_relaxed) && Verbosity())
393 Report(format: "WARNING: Required CPU features missing for XRay instrumentation, "
394 "using emulation instead.\n");
395 });
396
397 FlagParser P;
398 BasicFlags F;
399 F.setDefaults();
400 registerXRayBasicFlags(P: &P, F: &F);
401 P.ParseString(s: useCompilerDefinedBasicFlags());
402 auto *EnvOpts = GetEnv(name: "XRAY_BASIC_OPTIONS");
403 if (EnvOpts == nullptr)
404 EnvOpts = "";
405
406 P.ParseString(s: EnvOpts);
407
408 // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
409 // set through XRAY_OPTIONS instead.
410 if (internal_strlen(s: EnvOpts) == 0) {
411 F.func_duration_threshold_us =
412 flags()->xray_naive_log_func_duration_threshold_us;
413 F.max_stack_depth = flags()->xray_naive_log_max_stack_depth;
414 F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size;
415 }
416
417 P.ParseString(s: static_cast<const char *>(Options));
418 GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
419 GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
420 GlobalOptions.MaxStackDepth = F.max_stack_depth;
421 *basicFlags() = F;
422
423 atomic_store(a: &ThresholdTicks,
424 v: atomic_load(a: &TicksPerSec, mo: memory_order_acquire) *
425 GlobalOptions.DurationFilterMicros / 1000000,
426 mo: memory_order_release);
427 __xray_set_handler_arg1(entry: atomic_load(a: &UseRealTSC, mo: memory_order_acquire)
428 ? basicLoggingHandleArg1RealTSC
429 : basicLoggingHandleArg1EmulateTSC);
430 __xray_set_handler(entry: atomic_load(a: &UseRealTSC, mo: memory_order_acquire)
431 ? basicLoggingHandleArg0RealTSC
432 : basicLoggingHandleArg0EmulateTSC);
433
434 // TODO: Implement custom event and typed event handling support in Basic
435 // Mode.
436 __xray_remove_customevent_handler();
437 __xray_remove_typedevent_handler();
438
439 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
440}
441
442XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
443 uint8_t Expected = 0;
444 if (!atomic_compare_exchange_strong(a: &BasicInitialized, cmp: &Expected, xchg: 0,
445 mo: memory_order_acq_rel) &&
446 Verbosity())
447 Report(format: "Basic logging already finalized.\n");
448
449 // Nothing really to do aside from marking state of the global to be
450 // uninitialized.
451
452 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
453}
454
455XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
456 // This really does nothing, since flushing the logs happen at the end of a
457 // thread's lifetime, or when the buffers are full.
458 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
459}
460
461// This is a handler that, effectively, does nothing.
462void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
463}
464
465bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
466 XRayLogImpl Impl{
467 .log_init: basicLoggingInit,
468 .log_finalize: basicLoggingFinalize,
469 .handle_arg0: basicLoggingHandleArg0Empty,
470 .flush_log: basicLoggingFlush,
471 };
472 auto RegistrationResult = __xray_log_register_mode(Mode: "xray-basic", Impl);
473 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
474 Verbosity())
475 Report(format: "Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
476 RegistrationResult);
477 if (flags()->xray_naive_log ||
478 !internal_strcmp(s1: flags()->xray_mode, s2: "xray-basic")) {
479 auto SelectResult = __xray_log_select_mode(Mode: "xray-basic");
480 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
481 if (Verbosity())
482 Report(format: "Failed selecting XRay Basic Mode; error = %d\n", SelectResult);
483 return false;
484 }
485
486 // We initialize the implementation using the data we get from the
487 // XRAY_BASIC_OPTIONS environment variable, at this point of the
488 // implementation.
489 auto *Env = GetEnv(name: "XRAY_BASIC_OPTIONS");
490 auto InitResult =
491 __xray_log_init_mode(Mode: "xray-basic", Config: Env == nullptr ? "" : Env);
492 if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
493 if (Verbosity())
494 Report(format: "Failed initializing XRay Basic Mode; error = %d\n", InitResult);
495 return false;
496 }
497
498 // At this point we know that we've successfully initialized Basic mode
499 // tracing, and the only chance we're going to get for the current thread to
500 // clean-up may be at thread/program exit. To ensure that we're going to get
501 // the cleanup even without calling the finalization routines, we're
502 // registering a program exit function that will do the cleanup.
503 static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
504 pthread_once(once_control: &DynamicOnce, init_routine: +[] {
505 static void *FakeTLD = nullptr;
506 FakeTLD = &getThreadLocalData();
507 Atexit(function: +[] { TLDDestructor(P: FakeTLD); });
508 });
509 }
510 return true;
511}
512
513} // namespace __xray
514
515static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
516