tsan_adaptive_delay.cpp source code [llvm_runtimes/compiler-rt/lib/tsan/rtl/tsan_adaptive_delay.cpp]

1	//===-- tsan_adaptive_delay.h ------------------------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is a part of ThreadSanitizer (TSan), a race detector.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "tsan_adaptive_delay.h"
14
15	#include "interception/interception.h"
16	#include "sanitizer_common/sanitizer_allocator_internal.h"
17	#include "sanitizer_common/sanitizer_common.h"
18	#include "sanitizer_common/sanitizer_errno_codes.h"
19	#include "tsan_interface.h"
20	#include "tsan_rtl.h"
21
22	namespace __tsan {
23
24	namespace {
25
26	// =============================================================================
27	// DelaySpec: Represents a delay configuration parsed from flag strings
28	// =============================================================================
29	//
30	// Delay can be specified as:
31	// - "spin=N" : Spin for up to N cycles (very short delays)
32	// - "yield" : Call sched_yield() once
33	// - "sleep_us=N" : Sleep for up to N microseconds
34
35	enum class DelayType { Spin, Yield, SleepUs };
36
37	struct DelaySpec {
38	DelayType type;
39	int value; // spin cycles or sleep_us value; ignored for yield
40
41	// Both estimates below are used internally as a very rough estimate for
42	// delay overhead calculation, to cap the overall delay to the
43	// adaptive_delay_aggressiveness option. They're not intended to be 100%
44	// accurate on any or all architectures/operating systems, or for use in any
45	// other contexts.
46	//
47	// Estimated nanoseconds per spin cycle (volatile loop iteration).
48	static constexpr u64 kNsPerSpinCycle = `1`;
49	// Estimated nanoseconds for a yield (context switch overhead)
50	static constexpr u64 kNsPerYield = `500`;
51
52	static DelaySpec Parse(const char* str) {
53	DelaySpec spec;
54	if (internal_strncmp(s1: str, s2: "spin=", n: `5`) == `0`) {
55	spec.type = DelayType::Spin;
56	spec.value = internal_atoll(nptr: str + `5`);
57	if (spec.value <= `0` \|\| spec.value > `10000`) {
58	Printf(
59	format: "FATAL: Invalid TSAN_OPTIONS spin value '%s'; value must be "
60	"between 1 and 10000\n",
61	str);
62	Die();
63	}
64	} else if (internal_strcmp(s1: str, s2: "yield") == `0`) {
65	spec.type = DelayType::Yield;
66	spec.value = `0`;
67	} else if (internal_strncmp(s1: str, s2: "sleep_us=", n: `9`) == `0`) {
68	spec.type = DelayType::SleepUs;
69	spec.value = internal_atoll(nptr: str + `9`);
70	if (spec.value <= `0`) {
71	Printf(
72	format: "FATAL: Invalid TSAN_OPTIONS sleep_us value '%s'; value must be a "
73	"positive integer\n",
74	str);
75	Die();
76	}
77	} else {
78	Printf(format: "FATAL: Unrecognized delay spec '%s', check TSAN_OPTIONS\n", str);
79	Die();
80	}
81	return spec;
82	}
83
84	const char* TypeName() const {
85	switch (type) {
86	case DelayType::Spin:
87	return "spin";
88	case DelayType::Yield:
89	return "yield";
90	case DelayType::SleepUs:
91	return "sleep_us";
92	}
93	return "unknown";
94	}
95	};
96
97	} // namespace
98
99	// =============================================================================
100	// AdaptiveDelayImpl: Time-budget aware delay injection for race exposure
101	// =============================================================================
102	//
103	// This implementation injects delays to expose data races while maintaining a
104	// configurable overhead target. It uses several strategies:
105	//
106	// 1. Time-Budget Controller: Tracks cumulative delays vs wall-clock time
107	// and adjusts delay probability to maintain target overhead.
108	//
109	// 2. Tiered Delays: Different delay strategies for different op types:
110	// - Relaxed atomics: Very rare sampling, tiny spin delays
111	// - Sync atomics (acq/rel/seq_cst): Moderate sampling, small usleep
112	// - Mutex/CV ops: Higher sampling, larger delays
113	// - Thread create/join: Always delay (rare but high value)
114	//
115	// 3. Address-based Sampling: Exponential backoff per address to avoid
116	// repeatedly delaying hot atomics.
117
118	struct AdaptiveDelayImpl {
119	ALWAYS_INLINE static AdaptiveDelayState* TLS() {
120	return &cur_thread()->adaptive_delay_state;
121	}
122	ALWAYS_INLINE static unsigned int* GetRandomSeed() {
123	return &TLS()->tls_random_seed_;
124	}
125	ALWAYS_INLINE static void SetRandomSeed(unsigned int seed) {
126	TLS()->tls_random_seed_ = seed;
127	}
128
129	// The public facing option is adaptive_delay_aggressiveness, which is an
130	// opaque value for the user to tune the amount of delay injected into the
131	// program. Internally, the implementation maps the aggressiveness to a target
132	// percent delay for the overall program runtime. It's not easy to implement
133	// a true wall clock delay target (e.g., 25% program wall time slowdown)
134	// because 1) spin loops and yield are hard to calculate actual wall time
135	// slowness and 2) usleep(N) is often slower than advertised. Thus, we keep
136	// the user facing parameter opaque to not under deliver on a promise of
137	// percent wall time slowdown.
138	struct TimeBudget {
139	int target_overhead_pct_;
140	Percent target_low_;
141	Percent target_high_;
142
143	void Init(int target_pct) {
144	target_overhead_pct_ = target_pct;
145	target_low_ = Percent::FromPct(
146	pct: target_overhead_pct_ >= `5` ? target_overhead_pct_ - `5` : `0`);
147	target_high_ = Percent::FromPct(pct: target_overhead_pct_ + `5`);
148	}
149
150	static constexpr u64 BucketDurationNs = `30'000'000'000ULL`;
151
152	void RecordDelay(u64 delay_ns) {
153	u64 now = NanoTime();
154	u64 elapsed_ns = now - TLS()->bucket_start_ns_;
155
156	if (elapsed_ns >= BucketDurationNs) {
157	// Shift: old bucket is discarded, new becomes old, start fresh new
158	TLS()->delay_buckets_ns_[`0`] = TLS()->delay_buckets_ns_[`1`];
159	TLS()->delay_buckets_ns_[`1`] = `0`;
160	TLS()->bucket_start_ns_ = now;
161	TLS()->bucket0_window_ns = BucketDurationNs;
162	}
163
164	TLS()->delay_buckets_ns_[`1`] += delay_ns;
165	}
166
167	Percent GetOverheadPercent() {
168	u64 now = NanoTime();
169	u64 elapsed_ns = now - TLS()->bucket_start_ns_;
170
171	// Need at least 1ms to calculate
172	if (elapsed_ns < `1'000'000ULL`)
173	return Percent::FromPct(pct: `0`);
174
175	if (elapsed_ns > BucketDurationNs * `2`) {
176	// Both buckets are stale
177	return Percent::FromPct(pct: `0`);
178	} else if (elapsed_ns > BucketDurationNs) {
179	// bucket[0] is stale, use only bucket[1] (current bucket)
180	u64 total_delay_ns = TLS()->delay_buckets_ns_[`1`];
181	return Percent::FromRatio(numerator: total_delay_ns, denominator: elapsed_ns);
182	} else {
183	u64 total_delay_ns =
184	TLS()->delay_buckets_ns_[`0`] + TLS()->delay_buckets_ns_[`1`];
185	u64 window_ns = TLS()->bucket0_window_ns + elapsed_ns;
186	return Percent::FromRatio(numerator: total_delay_ns, denominator: window_ns);
187	}
188	}
189
190	bool ShouldDelay() {
191	Percent ratio = GetOverheadPercent();
192
193	if (ratio < target_low_)
194	return true;
195	if (ratio > target_high_)
196	return false;
197
198	// Linear interpolation: at target_low -> 100%, at target_high -> 0%
199	Percent prob = (target_high_ - ratio) / (target_high_ - target_low_);
200	return prob.RandomCheck(seed: GetRandomSeed());
201	}
202	};
203
204	// Address Sampler with Exponential Backoff
205	struct AddressSampler {
206	static constexpr u64 TABLE_SIZE = `2048`;
207	struct Entry {
208	atomic_uintptr_t addr_;
209	atomic_uint32_t count_;
210	};
211	Entry table_[TABLE_SIZE];
212	static constexpr u32 ExponentialBackoffCap = `64`;
213
214	void Init() {
215	for (u64 i = `0`; i < TABLE_SIZE; ++i) {
216	atomic_store(a: &table_[i].addr_, v: `0`, mo: memory_order_relaxed);
217	atomic_store(a: &table_[i].count_, v: `0`, mo: memory_order_relaxed);
218	}
219	}
220
221	static ALWAYS_INLINE u64 splitmix64(u64 x) {
222	x = (x ^ (x >> `30`)) * `0xBF58476D1CE4E5B9ULL`;
223	x = (x ^ (x >> `27`)) * `0x94D049BB133111EBULL`;
224	x = x ^ (x >> `31`);
225	return x;
226	}
227
228	// Uses exponential backoff: delay on 1st, 2nd, 4th, 8th, 16th, ...
229	bool ShouldDelayAddr(uptr addr) {
230	u64 idx = splitmix64(x: addr >> `3`) & (TABLE_SIZE - `1`);
231	Entry& e = table_[idx];
232
233	// This function is not thread safe.
234	// If two threads access the same hashed entry in parallel,
235	// worst case, we may end up returning true too often. This is
236	// acceptable...instead of full locking.
237
238	uptr stored_addr = atomic_load(a: &e.addr_, mo: memory_order_relaxed);
239	if (stored_addr != addr) {
240	// Hash Collision - reset
241	atomic_store(a: &e.addr_, v: addr, mo: memory_order_relaxed);
242	atomic_store(a: &e.count_, v: `1`, mo: memory_order_relaxed);
243	return true;
244	}
245
246	u32 count = atomic_fetch_add(a: &e.count_, v: `1`, mo: memory_order_relaxed) + `1`;
247
248	if ((count & (count - `1`)) == `0` && count <= ExponentialBackoffCap)
249	return true;
250	return false;
251	}
252	};
253
254	TimeBudget budget_;
255	AddressSampler sampler_;
256
257	int relaxed_sample_rate_;
258	int sync_atomic_sample_rate_;
259	int mutex_sample_rate_;
260	DelaySpec atomic_delay_;
261	DelaySpec sync_delay_;
262
263	void Init() { InitTls(); }
264
265	void InitTls() {
266	TLS()->bucket_start_ns_ = NanoTime();
267	TLS()->delay_buckets_ns_[`0`] = `0`;
268	TLS()->delay_buckets_ns_[`1`] = `0`;
269	TLS()->bucket0_window_ns = `0`;
270
271	SetRandomSeed(NanoTime());
272	TLS()->tls_initialized_ = true;
273	}
274
275	bool IsTlsInitialized() const { return TLS()->tls_initialized_; }
276
277	AdaptiveDelayImpl() {
278	relaxed_sample_rate_ = flags()->adaptive_delay_relaxed_sample_rate;
279	sync_atomic_sample_rate_ = flags()->adaptive_delay_sync_atomic_sample_rate;
280	mutex_sample_rate_ = flags()->adaptive_delay_mutex_sample_rate;
281	atomic_delay_ = DelaySpec::Parse(str: flags()->adaptive_delay_max_atomic);
282	sync_delay_ = DelaySpec::Parse(str: flags()->adaptive_delay_max_sync);
283
284	int delay_aggressiveness = flags()->adaptive_delay_aggressiveness;
285	if (delay_aggressiveness < `1`)
286	delay_aggressiveness = `1`;
287
288	budget_.Init(target_pct: delay_aggressiveness);
289	sampler_.Init();
290
291	VPrintf(`1`, "INFO: ThreadSanitizer AdaptiveDelay initialized\n");
292	VPrintf(`1`, " Delay aggressiveness: %d\n", delay_aggressiveness);
293	VPrintf(`1`, " Relaxed atomic sample rate: 1/%d\n", relaxed_sample_rate_);
294	VPrintf(`1`, " Sync atomic sample rate: 1/%d\n", sync_atomic_sample_rate_);
295	VPrintf(`1`, " Mutex sample rate: 1/%d\n", mutex_sample_rate_);
296	VPrintf(`1`, " Atomic delay: %s=%d\n", atomic_delay_.TypeName(),
297	atomic_delay_.value);
298	VPrintf(`1`, " Sync delay: %s=%d\n", sync_delay_.TypeName(),
299	sync_delay_.value);
300	}
301
302	void DoSpinDelay(int iters) {
303	volatile int v = `0`;
304	for (int i = `0`; i < iters; ++i) v = i;
305	(void)v;
306	budget_.RecordDelay(delay_ns: iters * DelaySpec::kNsPerSpinCycle);
307	}
308
309	void DoYieldDelay() {
310	internal_sched_yield();
311	budget_.RecordDelay(delay_ns: DelaySpec::kNsPerYield);
312	}
313
314	void DoSleepUsDelay(int max_us) {
315	// Use two Rand() calls to get full 32-bit range for larger sleep values
316	u32 rnd = ((u32)Rand(state: GetRandomSeed()) << `16`) \| Rand(state: GetRandomSeed());
317	int delay_us = `1` + (rnd % max_us);
318	internal_usleep(useconds: delay_us);
319	budget_.RecordDelay(delay_ns: delay_us * `1000ULL`);
320	}
321
322	void ExecuteDelay(const DelaySpec& spec) {
323	switch (spec.type) {
324	case DelayType::Spin: {
325	int iters = `1` + (Rand(state: GetRandomSeed()) % spec.value);
326	DoSpinDelay(iters);
327	break;
328	}
329	case DelayType::Yield:
330	DoYieldDelay();
331	break;
332	case DelayType::SleepUs:
333	DoSleepUsDelay(max_us: spec.value);
334	break;
335	}
336	}
337
338	void AtomicRelaxedOpDelay() {
339	if ((Rand(state: GetRandomSeed()) % relaxed_sample_rate_) != `0`)
340	return;
341	if (!budget_.ShouldDelay())
342	return;
343
344	int iters = `10` + (Rand(state: GetRandomSeed()) % `10`);
345	DoSpinDelay(iters);
346	}
347
348	void AtomicSyncOpDelay(uptr* addr) {
349	if ((Rand(state: GetRandomSeed()) % sync_atomic_sample_rate_) != `0`)
350	return;
351	if (!budget_.ShouldDelay())
352	return;
353
354	if (addr && !sampler_.ShouldDelayAddr(addr: *addr))
355	return;
356
357	ExecuteDelay(spec: atomic_delay_);
358	}
359
360	void AtomicOpFence(int mo) {
361	CHECK(IsTlsInitialized());
362
363	if (mo < mo_acquire)
364	AtomicRelaxedOpDelay();
365	else
366	AtomicSyncOpDelay(addr: nullptr);
367	}
368
369	void AtomicOpAddr(uptr addr, int mo) {
370	CHECK(IsTlsInitialized());
371
372	if (mo < mo_acquire)
373	AtomicRelaxedOpDelay();
374	else
375	AtomicSyncOpDelay(addr: &addr);
376	}
377
378	void UnsampledDelay() {
379	CHECK(IsTlsInitialized());
380
381	if (!budget_.ShouldDelay())
382	return;
383
384	ExecuteDelay(spec: sync_delay_);
385	}
386
387	void SyncOp() {
388	CHECK(IsTlsInitialized());
389
390	if ((Rand(state: GetRandomSeed()) % mutex_sample_rate_) != `0`)
391	return;
392	if (!budget_.ShouldDelay())
393	return;
394
395	ExecuteDelay(spec: sync_delay_);
396	}
397
398	void BeforeChildThreadRuns() {
399	InitTls();
400	UnsampledDelay();
401	}
402
403	void AfterThreadCreation() { UnsampledDelay(); }
404	};
405
406	AdaptiveDelayImpl& GetImpl() {
407	static AdaptiveDelayImpl impl;
408	return impl;
409	}
410
411	bool AdaptiveDelay::is_adaptive_delay_enabled;
412
413	void AdaptiveDelay::InitImpl() {
414	AdaptiveDelay::is_adaptive_delay_enabled = flags()->enable_adaptive_delay;
415	if (!AdaptiveDelay::is_adaptive_delay_enabled)
416	return;
417
418	GetImpl().Init();
419	}
420
421	void AdaptiveDelay::SyncOpImpl() { GetImpl().SyncOp(); }
422	void AdaptiveDelay::AtomicOpFenceImpl(int mo) { GetImpl().AtomicOpFence(mo); }
423	void AdaptiveDelay::AtomicOpAddrImpl(__sanitizer::uptr addr, int mo) {
424	GetImpl().AtomicOpAddr(addr, mo);
425	}
426	void AdaptiveDelay::AfterThreadCreationImpl() {
427	GetImpl().AfterThreadCreation();
428	}
429	void AdaptiveDelay::BeforeChildThreadRunsImpl() {
430	GetImpl().BeforeChildThreadRuns();
431	}
432
433	} // namespace __tsan
434

Browse the source code of llvm_runtimes/compiler-rt/lib/tsan/rtl/tsan_adaptive_delay.cpp