tsan_rtl_access.cpp source code [llvm_projects/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp]

1	//===-- tsan_rtl_access.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is a part of ThreadSanitizer (TSan), a race detector.
10	//
11	// Definitions of memory access and function entry/exit entry points.
12	//===----------------------------------------------------------------------===//
13
14	#include "tsan_rtl.h"
15
16	namespace __tsan {
17
18	ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19	uptr addr, uptr size,
20	AccessType typ) {
21	DCHECK(size == `1` \|\| size == `2` \|\| size == `4` \|\| size == `8`);
22	if (!kCollectHistory)
23	return true;
24	EventAccess* ev;
25	if (UNLIKELY(!TraceAcquire(thr, &ev)))
26	return false;
27	u64 size_log = size == `1` ? `0` : size == `2` ? `1` : size == `4` ? `2` : `3`;
28	uptr pc_delta = pc - thr->trace_prev_pc + (`1` << (EventAccess::kPCBits - `1`));
29	thr->trace_prev_pc = pc;
30	if (LIKELY(pc_delta < (`1` << EventAccess::kPCBits))) {
31	ev->is_access = `1`;
32	ev->is_read = !!(typ & kAccessRead);
33	ev->is_atomic = !!(typ & kAccessAtomic);
34	ev->size_log = size_log;
35	ev->pc_delta = pc_delta;
36	DCHECK_EQ(ev->pc_delta, pc_delta);
37	ev->addr = CompressAddr(addr);
38	TraceRelease(thr, evp: ev);
39	return true;
40	}
41	auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42	evex->is_access = `0`;
43	evex->is_func = `0`;
44	evex->type = EventType::kAccessExt;
45	evex->is_read = !!(typ & kAccessRead);
46	evex->is_atomic = !!(typ & kAccessAtomic);
47	evex->size_log = size_log;
48	// Note: this is important, see comment in EventAccessExt.
49	evex->_ = `0`;
50	evex->addr = CompressAddr(addr);
51	evex->pc = pc;
52	TraceRelease(thr, evp: evex);
53	return true;
54	}
55
56	ALWAYS_INLINE
57	bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58	AccessType typ) {
59	if (!kCollectHistory)
60	return true;
61	EventAccessRange* ev;
62	if (UNLIKELY(!TraceAcquire(thr, &ev)))
63	return false;
64	thr->trace_prev_pc = pc;
65	ev->is_access = `0`;
66	ev->is_func = `0`;
67	ev->type = EventType::kAccessRange;
68	ev->is_read = !!(typ & kAccessRead);
69	ev->is_free = !!(typ & kAccessFree);
70	ev->size_lo = size;
71	ev->pc = CompressAddr(addr: pc);
72	ev->addr = CompressAddr(addr);
73	ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74	TraceRelease(thr, evp: ev);
75	return true;
76	}
77
78	void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79	AccessType typ) {
80	if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81	return;
82	TraceSwitchPart(thr);
83	UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84	DCHECK(res);
85	}
86
87	void TraceFunc(ThreadState* thr, uptr pc) {
88	if (LIKELY(TryTraceFunc(thr, pc)))
89	return;
90	TraceSwitchPart(thr);
91	UNUSED bool res = TryTraceFunc(thr, pc);
92	DCHECK(res);
93	}
94
95	NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
96	TraceSwitchPart(thr);
97	FuncEntry(thr, pc);
98	}
99
100	NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101	TraceSwitchPart(thr);
102	FuncExit(thr);
103	}
104
105	void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106	StackID stk) {
107	DCHECK(type == EventType::kLock \|\| type == EventType::kRLock);
108	if (!kCollectHistory)
109	return;
110	EventLock ev;
111	ev.is_access = `0`;
112	ev.is_func = `0`;
113	ev.type = type;
114	ev.pc = CompressAddr(addr: pc);
115	ev.stack_lo = stk;
116	ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117	ev._ = `0`;
118	ev.addr = CompressAddr(addr);
119	TraceEvent(thr, ev);
120	}
121
122	void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123	if (!kCollectHistory)
124	return;
125	EventUnlock ev;
126	ev.is_access = `0`;
127	ev.is_func = `0`;
128	ev.type = EventType::kUnlock;
129	ev._ = `0`;
130	ev.addr = CompressAddr(addr);
131	TraceEvent(thr, ev);
132	}
133
134	void TraceTime(ThreadState* thr) {
135	if (!kCollectHistory)
136	return;
137	FastState fast_state = thr->fast_state;
138	EventTime ev;
139	ev.is_access = `0`;
140	ev.is_func = `0`;
141	ev.type = EventType::kTime;
142	ev.sid = static_cast<u64>(fast_state.sid());
143	ev.epoch = static_cast<u64>(fast_state.epoch());
144	ev._ = `0`;
145	TraceEvent(thr, ev);
146	}
147
148	NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149	Shadow old,
150	AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151	// For the free shadow markers the first element (that contains kFreeSid)
152	// triggers the race, but the second element contains info about the freeing
153	// thread, take it.
154	if (old.sid() == kFreeSid)
155	old = Shadow (LoadShadow(p: &shadow_mem[`1`]));
156	// This prevents trapping on this address in future.
157	for (uptr i = `0`; i < kShadowCnt; i++)
158	StoreShadow(sp: &shadow_mem[i], s: i == `0` ? Shadow::kRodata : Shadow::kEmpty);
159	// See the comment in MemoryRangeFreed as to why the slot is locked
160	// for free memory accesses. ReportRace must not be called with
161	// the slot locked because of the fork. But MemoryRangeFreed is not
162	// called during fork because fork sets ignore_reads_and_writes,
163	// so simply unlocking the slot should be fine.
164	if (typ & kAccessSlotLocked)
165	SlotUnlock(thr);
166	ReportRace(thr, shadow_mem, cur, old: Shadow (old), typ);
167	if (typ & kAccessSlotLocked)
168	SlotLock(thr);
169	}
170
171	#if !TSAN_VECTORIZE
172	ALWAYS_INLINE
173	bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174	AccessType typ) {
175	for (uptr i = `0`; i < kShadowCnt; i++) {
176	auto old = LoadShadow(&s[i]);
177	if (!(typ & kAccessRead)) {
178	if (old == cur.raw())
179	return true;
180	continue;
181	}
182	auto masked = static_cast<RawShadow>(static_cast<u32>(old) \|
183	static_cast<u32>(Shadow::kRodata));
184	if (masked == cur.raw())
185	return true;
186	if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187	if (old == Shadow::kRodata)
188	return true;
189	}
190	}
191	return false;
192	}
193
194	ALWAYS_INLINE
195	bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196	int unused0, int unused1, AccessType typ) {
197	bool stored = false;
198	for (uptr idx = `0`; idx < kShadowCnt; idx++) {
199	RawShadow* sp = &shadow_mem[idx];
200	Shadow old(LoadShadow(sp));
201	if (LIKELY(old.raw() == Shadow::kEmpty)) {
202	if (!(typ & kAccessCheckOnly) && !stored)
203	StoreShadow(sp, cur.raw());
204	return false;
205	}
206	if (LIKELY(!(cur.access() & old.access())))
207	continue;
208	if (LIKELY(cur.sid() == old.sid())) {
209	if (!(typ & kAccessCheckOnly) &&
210	LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211	StoreShadow(sp, cur.raw());
212	stored = true;
213	}
214	continue;
215	}
216	if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217	continue;
218	if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219	continue;
220	DoReportRace(thr, shadow_mem, cur, old, typ);
221	return true;
222	}
223	// We did not find any races and had already stored
224	// the current access info, so we are done.
225	if (LIKELY(stored))
226	return false;
227	// Choose a random candidate slot and replace it.
228	uptr index =
229	atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230	StoreShadow(&shadow_mem[index], cur.raw());
231	return false;
232	}
233
234	# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
235
236	#else /* !TSAN_VECTORIZE */
237
238	ALWAYS_INLINE
239	bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240	m128 access, AccessType typ) {
241	// Note: we could check if there is a larger access of the same type,
242	// e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243	// and now do smaller reads/writes, these can also be considered as "same
244	// access". However, it will make the check more expensive, so it's unclear
245	// if it's worth it. But this would conserve trace space, so it's useful
246	// besides potential speed up.
247	if (!(typ & kAccessRead)) {
248	const m128 same = _mm_cmpeq_epi32(a: shadow, b: access);
249	return _mm_movemask_epi8(a: same);
250	}
251	// For reads we need to reset read bit in the shadow,
252	// because we need to match read with both reads and writes.
253	// Shadow::kRodata has only read bit set, so it does what we want.
254	// We also abuse it for rodata check to save few cycles
255	// since we already loaded Shadow::kRodata into a register.
256	// Reads from rodata can't race.
257	// Measurements show that they can be 10-20% of all memory accesses.
258	// Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259	// (thread epochs start from 1). So the same read bit mask
260	// serves as rodata indicator.
261	const m128 read_mask = _mm_set1_epi32(i: static_cast<u32>(Shadow::kRodata));
262	const m128 masked_shadow = _mm_or_si128(a: shadow, b: read_mask);
263	m128 same = _mm_cmpeq_epi32(a: masked_shadow, b: access);
264	// Range memory accesses check Shadow::kRodata before calling this,
265	// Shadow::kRodatas is not possible for free memory access
266	// and Go does not use Shadow::kRodata.
267	if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268	const m128 ro = _mm_cmpeq_epi32(a: shadow, b: read_mask);
269	same = _mm_or_si128(a: ro, b: same);
270	}
271	return _mm_movemask_epi8(a: same);
272	}
273
274	NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275	u32 race_mask, m128 shadow, AccessType typ) {
276	// race_mask points which of the shadow elements raced with the current
277	// access. Extract that element.
278	CHECK_NE(race_mask, `0`);
279	u32 old;
280	// Note: _mm_extract_epi32 index must be a constant value.
281	switch (__builtin_ffs(race_mask) / `4`) {
282	case `0`:
283	old = _mm_extract_epi32(shadow, `0`);
284	break;
285	case `1`:
286	old = _mm_extract_epi32(shadow, `1`);
287	break;
288	case `2`:
289	old = _mm_extract_epi32(shadow, `2`);
290	break;
291	case `3`:
292	old = _mm_extract_epi32(shadow, `3`);
293	break;
294	}
295	Shadow prev(static_cast<RawShadow>(old));
296	// For the free shadow markers the first element (that contains kFreeSid)
297	// triggers the race, but the second element contains info about the freeing
298	// thread, take it.
299	if (prev.sid() == kFreeSid)
300	prev = Shadow (static_cast<RawShadow>(_mm_extract_epi32(shadow, `1`)));
301	DoReportRace(thr, shadow_mem, cur, old: prev, typ);
302	}
303
304	ALWAYS_INLINE
305	bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306	m128 shadow, m128 access, AccessType typ) {
307	// Note: empty/zero slots don't intersect with any access.
308	const m128 zero = _mm_setzero_si128();
309	const m128 mask_access = _mm_set1_epi32(i: `0x000000ff`);
310	const m128 mask_sid = _mm_set1_epi32(i: `0x0000ff00`);
311	const m128 mask_read_atomic = _mm_set1_epi32(i: `0xc0000000`);
312	const m128 access_and = _mm_and_si128(a: access, b: shadow);
313	const m128 access_xor = _mm_xor_si128(a: access, b: shadow);
314	const m128 intersect = _mm_and_si128(a: access_and, b: mask_access);
315	const m128 not_intersect = _mm_cmpeq_epi32(a: intersect, b: zero);
316	const m128 not_same_sid = _mm_and_si128(a: access_xor, b: mask_sid);
317	const m128 same_sid = _mm_cmpeq_epi32(a: not_same_sid, b: zero);
318	const m128 both_read_or_atomic = _mm_and_si128(a: access_and, b: mask_read_atomic);
319	const m128 no_race =
320	_mm_or_si128(a: _mm_or_si128(a: not_intersect, b: same_sid), b: both_read_or_atomic);
321	const int race_mask = _mm_movemask_epi8(a: _mm_cmpeq_epi32(a: no_race, b: zero));
322	if (UNLIKELY(race_mask))
323	goto SHARED;
324
325	STORE : {
326	if (typ & kAccessCheckOnly)
327	return false;
328	// We could also replace different sid's if access is the same,
329	// rw weaker and happens before. However, just checking access below
330	// is not enough because we also need to check that !both_read_or_atomic
331	// (reads from different sids can be concurrent).
332	// Theoretically we could replace smaller accesses with larger accesses,
333	// but it's unclear if it's worth doing.
334	const m128 mask_access_sid = _mm_set1_epi32(i: `0x0000ffff`);
335	const m128 not_same_sid_access = _mm_and_si128(a: access_xor, b: mask_access_sid);
336	const m128 same_sid_access = _mm_cmpeq_epi32(a: not_same_sid_access, b: zero);
337	const m128 access_read_atomic =
338	_mm_set1_epi32(i: (typ & (kAccessRead \| kAccessAtomic)) << `30`);
339	const m128 rw_weaker =
340	_mm_cmpeq_epi32(a: _mm_max_epu32(V1: shadow, V2: access_read_atomic), b: shadow);
341	const m128 rewrite = _mm_and_si128(a: same_sid_access, b: rw_weaker);
342	const int rewrite_mask = _mm_movemask_epi8(a: rewrite);
343	int index = __builtin_ffs(rewrite_mask);
344	if (UNLIKELY(index == `0`)) {
345	const m128 empty = _mm_cmpeq_epi32(a: shadow, b: zero);
346	const int empty_mask = _mm_movemask_epi8(a: empty);
347	index = __builtin_ffs(empty_mask);
348	if (UNLIKELY(index == `0`))
349	index = (atomic_load_relaxed(a: &thr->trace_pos) / `2`) % `16`;
350	}
351	StoreShadow(sp: &shadow_mem[index / `4`], s: cur.raw());
352	// We could zero other slots determined by rewrite_mask.
353	// That would help other threads to evict better slots,
354	// but it's unclear if it's worth it.
355	return false;
356	}
357
358	SHARED:
359	m128 thread_epochs = _mm_set1_epi32(i: `0x7fffffff`);
360	// Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361	// indexes must be constants.
362	# define LOAD_EPOCH(idx) \
363	if (LIKELY(race_mask & (1 << (idx * 4)))) { \
364	u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
365	u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
366	thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
367	}
368	LOAD_EPOCH(`0`);
369	LOAD_EPOCH(`1`);
370	LOAD_EPOCH(`2`);
371	LOAD_EPOCH(`3`);
372	# undef LOAD_EPOCH
373	const m128 mask_epoch = _mm_set1_epi32(i: `0x3fff0000`);
374	const m128 shadow_epochs = _mm_and_si128(a: shadow, b: mask_epoch);
375	const m128 concurrent = _mm_cmplt_epi32(a: thread_epochs, b: shadow_epochs);
376	const int concurrent_mask = _mm_movemask_epi8(a: concurrent);
377	if (LIKELY(concurrent_mask == `0`))
378	goto STORE;
379
380	DoReportRaceV(thr, shadow_mem, cur, race_mask: concurrent_mask, shadow, typ);
381	return true;
382	}
383
384	# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
385	const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386	const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387	#endif
388
389	char* DumpShadow(char* buf, RawShadow raw) {
390	if (raw == Shadow::kEmpty) {
391	internal_snprintf(buffer: buf, length: `64`, format: "0");
392	return buf;
393	}
394	Shadow s(raw);
395	AccessType typ;
396	s.GetAccess(addr: nullptr, size: nullptr, typ: &typ);
397	internal_snprintf(buffer: buf, length: `64`, format: "{tid=%u@%u access=0x%x typ=%x}",
398	static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399	s.access(), static_cast<u32>(typ));
400	return buf;
401	}
402
403	// TryTrace and TraceRestart* functions allow to turn memory access and func*
404	// entry/exit callbacks into leaf functions with all associated performance
405	// benefits. These hottest callbacks do only 2 slow path calls: report a race
406	// and trace part switching. Race reporting is easy to turn into a tail call, we
407	// just always return from the runtime after reporting a race. But trace part
408	// switching is harder because it needs to be in the middle of callbacks. To
409	// turn it into a tail call we immidiately return after TraceRestart functions,*
410	// but TraceRestart functions themselves recurse into the callback after*
411	// switching trace part. As the result the hottest callbacks contain only tail
412	// calls, which effectively makes them leaf functions (can use all registers,
413	// no frame setup, etc).
414	NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415	uptr size, AccessType typ) {
416	TraceSwitchPart(thr);
417	MemoryAccess(thr, pc, addr, size, typ);
418	}
419
420	ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421	uptr size, AccessType typ) {
422	RawShadow* shadow_mem = MemToShadow(x: addr);
423	UNUSED char memBuf[`4`][`64`];
424	DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425	static_cast<int>(thr->fast_state.sid()),
426	static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427	static_cast<int>(typ), DumpShadow(memBuf[`0`], shadow_mem[`0`]),
428	DumpShadow(memBuf[`1`], shadow_mem[`1`]),
429	DumpShadow(memBuf[`2`], shadow_mem[`2`]),
430	DumpShadow(memBuf[`3`], shadow_mem[`3`]));
431
432	FastState fast_state = thr->fast_state;
433	Shadow cur(fast_state, addr, size, typ);
434
435	LOAD_CURRENT_SHADOW(cur, shadow_mem);
436	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437	return;
438	if (UNLIKELY(fast_state.GetIgnoreBit()))
439	return;
440	if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441	return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442	CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
443	}
444
445	void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
446
447	NOINLINE
448	void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449	AccessType typ) {
450	TraceSwitchPart(thr);
451	MemoryAccess16(thr, pc, addr, typ);
452	}
453
454	ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455	AccessType typ) {
456	const uptr size = `16`;
457	FastState fast_state = thr->fast_state;
458	if (UNLIKELY(fast_state.GetIgnoreBit()))
459	return;
460	Shadow cur(fast_state, `0`, `8`, typ);
461	RawShadow* shadow_mem = MemToShadow(x: addr);
462	bool traced = false;
463	{
464	LOAD_CURRENT_SHADOW(cur, shadow_mem);
465	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466	goto SECOND;
467	if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468	return RestartMemoryAccess16(thr, pc, addr, typ);
469	traced = true;
470	if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471	return;
472	}
473	SECOND:
474	shadow_mem += kShadowCnt;
475	LOAD_CURRENT_SHADOW(cur, shadow_mem);
476	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477	return;
478	if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479	return RestartMemoryAccess16(thr, pc, addr, typ);
480	CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
481	}
482
483	NOINLINE
484	void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485	uptr size, AccessType typ) {
486	TraceSwitchPart(thr);
487	UnalignedMemoryAccess(thr, pc, addr, size, typ);
488	}
489
490	ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491	uptr addr, uptr size,
492	AccessType typ) {
493	DCHECK_LE(size, `8`);
494	FastState fast_state = thr->fast_state;
495	if (UNLIKELY(fast_state.GetIgnoreBit()))
496	return;
497	RawShadow* shadow_mem = MemToShadow(x: addr);
498	bool traced = false;
499	uptr size1 = Min<uptr>(a: size, b: RoundUp(p: addr + `1`, align: kShadowCell) - addr);
500	{
501	Shadow cur(fast_state, addr, size1, typ);
502	LOAD_CURRENT_SHADOW(cur, shadow_mem);
503	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504	goto SECOND;
505	if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506	return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507	traced = true;
508	if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509	return;
510	}
511	SECOND:
512	uptr size2 = size - size1;
513	if (LIKELY(size2 == `0`))
514	return;
515	shadow_mem += kShadowCnt;
516	Shadow cur(fast_state, `0`, size2, typ);
517	LOAD_CURRENT_SHADOW(cur, shadow_mem);
518	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519	return;
520	if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521	return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522	CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523	}
524
525	void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526	DCHECK_LT(p, end);
527	DCHECK(IsShadowMem(p));
528	DCHECK(IsShadowMem(end - `1`));
529	UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530	DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, `0`);
531	DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, `0`);
532	#if !TSAN_VECTORIZE
533	for (; p < end; p += kShadowCnt) {
534	p[`0`] = v;
535	for (uptr i = `1`; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
536	}
537	#else
538	m128 vv = _mm_setr_epi32(
539	i0: static_cast<u32>(v), i1: static_cast<u32>(Shadow::kEmpty),
540	i2: static_cast<u32>(Shadow::kEmpty), i3: static_cast<u32>(Shadow::kEmpty));
541	m128* vp = reinterpret_cast<m128*>(p);
542	m128* vend = reinterpret_cast<m128*>(end);
543	for (; vp < vend; vp++) _mm_store_si128(p: vp, b: vv);
544	#endif
545	}
546
547	static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548	if (size == `0`)
549	return;
550	DCHECK_EQ(addr % kShadowCell, `0`);
551	DCHECK_EQ(size % kShadowCell, `0`);
552	// If a user passes some insane arguments (memset(0)),
553	// let it just crash as usual.
554	if (!IsAppMem(mem: addr) \|\| !IsAppMem(mem: addr + size - `1`))
555	return;
556	RawShadow* begin = MemToShadow(x: addr);
557	RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558	// Don't want to touch lots of shadow memory.
559	// If a program maps 10MB stack, there is no need reset the whole range.
560	// UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561	if (SANITIZER_WINDOWS \|\|
562	size <= common_flags()->clear_shadow_mmap_threshold) {
563	ShadowSet(p: begin, end, v: val);
564	return;
565	}
566	// The region is big, reset only beginning and end.
567	const uptr kPageSize = GetPageSizeCached();
568	// Set at least first kPageSize/2 to page boundary.
569	RawShadow* mid1 =
570	Min(a: end, b: reinterpret_cast<RawShadow*>(RoundUp(
571	p: reinterpret_cast<uptr>(begin) + kPageSize / `2`, align: kPageSize)));
572	// begin must < mid1
573	ShadowSet(p: begin, end: mid1, v: val);
574	// Reset middle part.
575	RawShadow* mid2 = RoundDown(p: end, align: kPageSize);
576	if (mid2 > mid1) {
577	if (!MmapFixedSuperNoReserve(fixed_addr: (uptr)mid1, size: (uptr)mid2 - (uptr)mid1))
578	Die();
579	}
580	// Set the ending.
581	if (mid2 < end)
582	ShadowSet(p: mid2, end, v: val);
583	else
584	DCHECK_EQ(mid2, end);
585	}
586
587	void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
588	uptr addr1 = RoundDown(p: addr, align: kShadowCell);
589	uptr size1 = RoundUp(p: size + addr - addr1, align: kShadowCell);
590	MemoryRangeSet(addr: addr1, size: size1, val: Shadow::kEmpty);
591	}
592
593	void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
594	// Callers must lock the slot to ensure synchronization with the reset.
595	// The problem with "freed" memory is that it's not "monotonic"
596	// with respect to bug detection: freed memory is bad to access,
597	// but then if the heap block is reallocated later, it's good to access.
598	// As the result a garbage "freed" shadow can lead to a false positive
599	// if it happens to match a real free in the thread trace,
600	// but the heap block was reallocated before the current memory access,
601	// so it's still good to access. It's not the case with data races.
602	DCHECK(thr->slot_locked);
603	DCHECK_EQ(addr % kShadowCell, `0`);
604	size = RoundUp(p: size, align: kShadowCell);
605	// Processing more than 1k (2k of shadow) is expensive,
606	// can cause excessive memory consumption (user does not necessary touch
607	// the whole range) and most likely unnecessary.
608	size = Min<uptr>(a: size, b: `1024`);
609	const AccessType typ = kAccessWrite \| kAccessFree \| kAccessSlotLocked \|
610	kAccessCheckOnly \| kAccessNoRodata;
611	TraceMemoryAccessRange(thr, pc, addr, size, typ);
612	RawShadow* shadow_mem = MemToShadow(x: addr);
613	Shadow cur(thr->fast_state, `0`, kShadowCell, typ);
614	#if TSAN_VECTORIZE
615	const m128 access = _mm_set1_epi32(i: static_cast<u32>(cur.raw()));
616	const m128 freed = _mm_setr_epi32(
617	i0: static_cast<u32>(Shadow::FreedMarker()),
618	i1: static_cast<u32>(Shadow::FreedInfo(sid: cur.sid(), epoch: cur.epoch())), i2: `0`, i3: `0`);
619	for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
620	const m128 shadow = _mm_load_si128(p: (m128*)shadow_mem);
621	if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
622	return;
623	_mm_store_si128(p: (m128*)shadow_mem, b: freed);
624	}
625	#else
626	for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
627	if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, `0`, `0`, typ)))
628	return;
629	StoreShadow(&shadow_mem[`0`], Shadow::FreedMarker());
630	StoreShadow(&shadow_mem[`1`], Shadow::FreedInfo(cur.sid(), cur.epoch()));
631	StoreShadow(&shadow_mem[`2`], Shadow::kEmpty);
632	StoreShadow(&shadow_mem[`3`], Shadow::kEmpty);
633	}
634	#endif
635	}
636
637	void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
638	DCHECK_EQ(addr % kShadowCell, `0`);
639	size = RoundUp(p: size, align: kShadowCell);
640	TraceMemoryAccessRange(thr, pc, addr, size, typ: kAccessWrite);
641	Shadow cur(thr->fast_state, `0`, `8`, kAccessWrite);
642	MemoryRangeSet(addr, size, val: cur.raw());
643	}
644
645	void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
646	uptr size) {
647	if (thr->ignore_reads_and_writes == `0`)
648	MemoryRangeImitateWrite(thr, pc, addr, size);
649	else
650	MemoryResetRange(thr, pc, addr, size);
651	}
652
653	ALWAYS_INLINE
654	bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
655	AccessType typ) {
656	LOAD_CURRENT_SHADOW(cur, shadow_mem);
657	if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
658	return false;
659	return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
660	}
661
662	template <bool is_read>
663	NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
664	uptr size) {
665	TraceSwitchPart(thr);
666	MemoryAccessRangeT<is_read>(thr, pc, addr, size);
667	}
668
669	template <bool is_read>
670	void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
671	const AccessType typ =
672	(is_read ? kAccessRead : kAccessWrite) \| kAccessNoRodata;
673	RawShadow* shadow_mem = MemToShadow(x: addr);
674	DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
675	(void)pc, (void*)addr, (int*)size, is_read);
676	DCHECK_NE(size, `0`);
677	#if SANITIZER_DEBUG
678	if (!IsAppMem(addr)) {
679	Printf("Access to non app mem start: %p\n", (void*)addr);
680	DCHECK(IsAppMem(addr));
681	}
682	if (!IsAppMem(addr + size - `1`)) {
683	Printf("Access to non app mem end: %p\n", (void*)(addr + size - `1`));
684	DCHECK(IsAppMem(addr + size - `1`));
685	}
686	if (!IsShadowMem(shadow_mem)) {
687	Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
688	DCHECK(IsShadowMem(shadow_mem));
689	}
690
691	uptr rounded_size =
692	(RoundUpTo(addr + size, kShadowCell) - RoundDownTo(addr, kShadowCell));
693	RawShadow* shadow_mem_end =
694	shadow_mem + rounded_size / kShadowCell * kShadowCnt;
695	if (!IsShadowMem(shadow_mem_end - `1`)) {
696	Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end - `1`,
697	(void*)(addr + size - `1`));
698	Printf(
699	"Shadow start addr (ok): %p (%p); size: 0x%zx; rounded_size: 0x%zx; "
700	"kShadowMultiplier: %zx\n",
701	shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier);
702	DCHECK(IsShadowMem(shadow_mem_end - `1`));
703	}
704	#endif
705
706	// Access to .rodata section, no races here.
707	// Measurements show that it can be 10-20% of all memory accesses.
708	// Check here once to not check for every access separately.
709	// Note: we could (and should) do this only for the is_read case
710	// (writes shouldn't go to .rodata). But it happens in Chromium tests:
711	// https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
712	// Details are unknown since it happens only on CI machines.
713	if (*shadow_mem == Shadow::kRodata)
714	return;
715
716	FastState fast_state = thr->fast_state;
717	if (UNLIKELY(fast_state.GetIgnoreBit()))
718	return;
719
720	if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
721	return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
722
723	if (UNLIKELY(addr % kShadowCell)) {
724	// Handle unaligned beginning, if any.
725	uptr size1 = Min(a: size, b: RoundUp(p: addr, align: kShadowCell) - addr);
726	size -= size1;
727	Shadow cur(fast_state, addr, size1, typ);
728	if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
729	return;
730	shadow_mem += kShadowCnt;
731	}
732	// Handle middle part, if any.
733	Shadow cur(fast_state, `0`, kShadowCell, typ);
734	for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
735	if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
736	return;
737	}
738	// Handle ending, if any.
739	if (UNLIKELY(size)) {
740	Shadow cur(fast_state, `0`, size, typ);
741	if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
742	return;
743	}
744	}
745
746	template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
747	uptr size);
748	template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
749	uptr size);
750
751	} // namespace __tsan
752
753	#if !SANITIZER_GO
754	// Must be included in this file to make sure everything is inlined.
755	# include "tsan_interface.inc"
756	#endif
757

Browse the source code of llvm_projects/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp