1//===-- tsan_rtl_access.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of ThreadSanitizer (TSan), a race detector.
10//
11// Definitions of memory access and function entry/exit entry points.
12//===----------------------------------------------------------------------===//
13
14#include "tsan_rtl.h"
15
16namespace __tsan {
17
18ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19 uptr addr, uptr size,
20 AccessType typ) {
21 DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
22 if (!kCollectHistory)
23 return true;
24 EventAccess* ev;
25 if (UNLIKELY(!TraceAcquire(thr, &ev)))
26 return false;
27 u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
28 uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
29 thr->trace_prev_pc = pc;
30 if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
31 ev->is_access = 1;
32 ev->is_read = !!(typ & kAccessRead);
33 ev->is_atomic = !!(typ & kAccessAtomic);
34 ev->size_log = size_log;
35 ev->pc_delta = pc_delta;
36 DCHECK_EQ(ev->pc_delta, pc_delta);
37 ev->addr = CompressAddr(addr);
38 TraceRelease(thr, evp: ev);
39 return true;
40 }
41 auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42 evex->is_access = 0;
43 evex->is_func = 0;
44 evex->type = EventType::kAccessExt;
45 evex->is_read = !!(typ & kAccessRead);
46 evex->is_atomic = !!(typ & kAccessAtomic);
47 evex->size_log = size_log;
48 // Note: this is important, see comment in EventAccessExt.
49 evex->_ = 0;
50 evex->addr = CompressAddr(addr);
51 evex->pc = pc;
52 TraceRelease(thr, evp: evex);
53 return true;
54}
55
56ALWAYS_INLINE
57bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58 AccessType typ) {
59 if (!kCollectHistory)
60 return true;
61 EventAccessRange* ev;
62 if (UNLIKELY(!TraceAcquire(thr, &ev)))
63 return false;
64 thr->trace_prev_pc = pc;
65 ev->is_access = 0;
66 ev->is_func = 0;
67 ev->type = EventType::kAccessRange;
68 ev->is_read = !!(typ & kAccessRead);
69 ev->is_free = !!(typ & kAccessFree);
70 ev->size_lo = size;
71 ev->pc = CompressAddr(addr: pc);
72 ev->addr = CompressAddr(addr);
73 ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74 TraceRelease(thr, evp: ev);
75 return true;
76}
77
78void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79 AccessType typ) {
80 if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81 return;
82 TraceSwitchPart(thr);
83 UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84 DCHECK(res);
85}
86
87void TraceFunc(ThreadState* thr, uptr pc) {
88 if (LIKELY(TryTraceFunc(thr, pc)))
89 return;
90 TraceSwitchPart(thr);
91 UNUSED bool res = TryTraceFunc(thr, pc);
92 DCHECK(res);
93}
94
95NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
96 TraceSwitchPart(thr);
97 FuncEntry(thr, pc);
98}
99
100NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101 TraceSwitchPart(thr);
102 FuncExit(thr);
103}
104
105void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106 StackID stk) {
107 DCHECK(type == EventType::kLock || type == EventType::kRLock);
108 if (!kCollectHistory)
109 return;
110 EventLock ev;
111 ev.is_access = 0;
112 ev.is_func = 0;
113 ev.type = type;
114 ev.pc = CompressAddr(addr: pc);
115 ev.stack_lo = stk;
116 ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117 ev._ = 0;
118 ev.addr = CompressAddr(addr);
119 TraceEvent(thr, ev);
120}
121
122void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123 if (!kCollectHistory)
124 return;
125 EventUnlock ev;
126 ev.is_access = 0;
127 ev.is_func = 0;
128 ev.type = EventType::kUnlock;
129 ev._ = 0;
130 ev.addr = CompressAddr(addr);
131 TraceEvent(thr, ev);
132}
133
134void TraceTime(ThreadState* thr) {
135 if (!kCollectHistory)
136 return;
137 FastState fast_state = thr->fast_state;
138 EventTime ev;
139 ev.is_access = 0;
140 ev.is_func = 0;
141 ev.type = EventType::kTime;
142 ev.sid = static_cast<u64>(fast_state.sid());
143 ev.epoch = static_cast<u64>(fast_state.epoch());
144 ev._ = 0;
145 TraceEvent(thr, ev);
146}
147
148NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149 Shadow old,
150 AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151 // For the free shadow markers the first element (that contains kFreeSid)
152 // triggers the race, but the second element contains info about the freeing
153 // thread, take it.
154 if (old.sid() == kFreeSid)
155 old = Shadow(LoadShadow(p: &shadow_mem[1]));
156 // This prevents trapping on this address in future.
157 for (uptr i = 0; i < kShadowCnt; i++)
158 StoreShadow(sp: &shadow_mem[i], s: i == 0 ? Shadow::kRodata : Shadow::kEmpty);
159 // See the comment in MemoryRangeFreed as to why the slot is locked
160 // for free memory accesses. ReportRace must not be called with
161 // the slot locked because of the fork. But MemoryRangeFreed is not
162 // called during fork because fork sets ignore_reads_and_writes,
163 // so simply unlocking the slot should be fine.
164 if (typ & kAccessSlotLocked)
165 SlotUnlock(thr);
166 ReportRace(thr, shadow_mem, cur, old: Shadow(old), typ);
167 if (typ & kAccessSlotLocked)
168 SlotLock(thr);
169}
170
171#if !TSAN_VECTORIZE
172ALWAYS_INLINE
173bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174 AccessType typ) {
175 for (uptr i = 0; i < kShadowCnt; i++) {
176 auto old = LoadShadow(&s[i]);
177 if (!(typ & kAccessRead)) {
178 if (old == cur.raw())
179 return true;
180 continue;
181 }
182 auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
183 static_cast<u32>(Shadow::kRodata));
184 if (masked == cur.raw())
185 return true;
186 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187 if (old == Shadow::kRodata)
188 return true;
189 }
190 }
191 return false;
192}
193
194ALWAYS_INLINE
195bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196 int unused0, int unused1, AccessType typ) {
197 bool stored = false;
198 for (uptr idx = 0; idx < kShadowCnt; idx++) {
199 RawShadow* sp = &shadow_mem[idx];
200 Shadow old(LoadShadow(sp));
201 if (LIKELY(old.raw() == Shadow::kEmpty)) {
202 if (!(typ & kAccessCheckOnly) && !stored)
203 StoreShadow(sp, cur.raw());
204 return false;
205 }
206 if (LIKELY(!(cur.access() & old.access())))
207 continue;
208 if (LIKELY(cur.sid() == old.sid())) {
209 if (!(typ & kAccessCheckOnly) &&
210 LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211 StoreShadow(sp, cur.raw());
212 stored = true;
213 }
214 continue;
215 }
216 if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217 continue;
218 if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219 continue;
220 DoReportRace(thr, shadow_mem, cur, old, typ);
221 return true;
222 }
223 // We did not find any races and had already stored
224 // the current access info, so we are done.
225 if (LIKELY(stored))
226 return false;
227 // Choose a random candidate slot and replace it.
228 uptr index =
229 atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230 StoreShadow(&shadow_mem[index], cur.raw());
231 return false;
232}
233
234# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
235
236#else /* !TSAN_VECTORIZE */
237
238ALWAYS_INLINE
239bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240 m128 access, AccessType typ) {
241 // Note: we could check if there is a larger access of the same type,
242 // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243 // and now do smaller reads/writes, these can also be considered as "same
244 // access". However, it will make the check more expensive, so it's unclear
245 // if it's worth it. But this would conserve trace space, so it's useful
246 // besides potential speed up.
247 if (!(typ & kAccessRead)) {
248 const m128 same = _mm_cmpeq_epi32(a: shadow, b: access);
249 return _mm_movemask_epi8(a: same);
250 }
251 // For reads we need to reset read bit in the shadow,
252 // because we need to match read with both reads and writes.
253 // Shadow::kRodata has only read bit set, so it does what we want.
254 // We also abuse it for rodata check to save few cycles
255 // since we already loaded Shadow::kRodata into a register.
256 // Reads from rodata can't race.
257 // Measurements show that they can be 10-20% of all memory accesses.
258 // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259 // (thread epochs start from 1). So the same read bit mask
260 // serves as rodata indicator.
261 const m128 read_mask = _mm_set1_epi32(i: static_cast<u32>(Shadow::kRodata));
262 const m128 masked_shadow = _mm_or_si128(a: shadow, b: read_mask);
263 m128 same = _mm_cmpeq_epi32(a: masked_shadow, b: access);
264 // Range memory accesses check Shadow::kRodata before calling this,
265 // Shadow::kRodatas is not possible for free memory access
266 // and Go does not use Shadow::kRodata.
267 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268 const m128 ro = _mm_cmpeq_epi32(a: shadow, b: read_mask);
269 same = _mm_or_si128(a: ro, b: same);
270 }
271 return _mm_movemask_epi8(a: same);
272}
273
274NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275 u32 race_mask, m128 shadow, AccessType typ) {
276 // race_mask points which of the shadow elements raced with the current
277 // access. Extract that element.
278 CHECK_NE(race_mask, 0);
279 u32 old;
280 // Note: _mm_extract_epi32 index must be a constant value.
281 switch (__builtin_ffs(race_mask) / 4) {
282 case 0:
283 old = _mm_extract_epi32(shadow, 0);
284 break;
285 case 1:
286 old = _mm_extract_epi32(shadow, 1);
287 break;
288 case 2:
289 old = _mm_extract_epi32(shadow, 2);
290 break;
291 case 3:
292 old = _mm_extract_epi32(shadow, 3);
293 break;
294 }
295 Shadow prev(static_cast<RawShadow>(old));
296 // For the free shadow markers the first element (that contains kFreeSid)
297 // triggers the race, but the second element contains info about the freeing
298 // thread, take it.
299 if (prev.sid() == kFreeSid)
300 prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
301 DoReportRace(thr, shadow_mem, cur, old: prev, typ);
302}
303
304ALWAYS_INLINE
305bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306 m128 shadow, m128 access, AccessType typ) {
307 // Note: empty/zero slots don't intersect with any access.
308 const m128 zero = _mm_setzero_si128();
309 const m128 mask_access = _mm_set1_epi32(i: 0x000000ff);
310 const m128 mask_sid = _mm_set1_epi32(i: 0x0000ff00);
311 const m128 mask_read_atomic = _mm_set1_epi32(i: 0xc0000000);
312 const m128 access_and = _mm_and_si128(a: access, b: shadow);
313 const m128 access_xor = _mm_xor_si128(a: access, b: shadow);
314 const m128 intersect = _mm_and_si128(a: access_and, b: mask_access);
315 const m128 not_intersect = _mm_cmpeq_epi32(a: intersect, b: zero);
316 const m128 not_same_sid = _mm_and_si128(a: access_xor, b: mask_sid);
317 const m128 same_sid = _mm_cmpeq_epi32(a: not_same_sid, b: zero);
318 const m128 both_read_or_atomic = _mm_and_si128(a: access_and, b: mask_read_atomic);
319 const m128 no_race =
320 _mm_or_si128(a: _mm_or_si128(a: not_intersect, b: same_sid), b: both_read_or_atomic);
321 const int race_mask = _mm_movemask_epi8(a: _mm_cmpeq_epi32(a: no_race, b: zero));
322 if (UNLIKELY(race_mask))
323 goto SHARED;
324
325STORE : {
326 if (typ & kAccessCheckOnly)
327 return false;
328 // We could also replace different sid's if access is the same,
329 // rw weaker and happens before. However, just checking access below
330 // is not enough because we also need to check that !both_read_or_atomic
331 // (reads from different sids can be concurrent).
332 // Theoretically we could replace smaller accesses with larger accesses,
333 // but it's unclear if it's worth doing.
334 const m128 mask_access_sid = _mm_set1_epi32(i: 0x0000ffff);
335 const m128 not_same_sid_access = _mm_and_si128(a: access_xor, b: mask_access_sid);
336 const m128 same_sid_access = _mm_cmpeq_epi32(a: not_same_sid_access, b: zero);
337 const m128 access_read_atomic =
338 _mm_set1_epi32(i: (typ & (kAccessRead | kAccessAtomic)) << 30);
339 const m128 rw_weaker =
340 _mm_cmpeq_epi32(a: _mm_max_epu32(V1: shadow, V2: access_read_atomic), b: shadow);
341 const m128 rewrite = _mm_and_si128(a: same_sid_access, b: rw_weaker);
342 const int rewrite_mask = _mm_movemask_epi8(a: rewrite);
343 int index = __builtin_ffs(rewrite_mask);
344 if (UNLIKELY(index == 0)) {
345 const m128 empty = _mm_cmpeq_epi32(a: shadow, b: zero);
346 const int empty_mask = _mm_movemask_epi8(a: empty);
347 index = __builtin_ffs(empty_mask);
348 if (UNLIKELY(index == 0))
349 index = (atomic_load_relaxed(a: &thr->trace_pos) / 2) % 16;
350 }
351 StoreShadow(sp: &shadow_mem[index / 4], s: cur.raw());
352 // We could zero other slots determined by rewrite_mask.
353 // That would help other threads to evict better slots,
354 // but it's unclear if it's worth it.
355 return false;
356}
357
358SHARED:
359 m128 thread_epochs = _mm_set1_epi32(i: 0x7fffffff);
360 // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361 // indexes must be constants.
362# define LOAD_EPOCH(idx) \
363 if (LIKELY(race_mask & (1 << (idx * 4)))) { \
364 u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
365 u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
366 thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
367 }
368 LOAD_EPOCH(0);
369 LOAD_EPOCH(1);
370 LOAD_EPOCH(2);
371 LOAD_EPOCH(3);
372# undef LOAD_EPOCH
373 const m128 mask_epoch = _mm_set1_epi32(i: 0x3fff0000);
374 const m128 shadow_epochs = _mm_and_si128(a: shadow, b: mask_epoch);
375 const m128 concurrent = _mm_cmplt_epi32(a: thread_epochs, b: shadow_epochs);
376 const int concurrent_mask = _mm_movemask_epi8(a: concurrent);
377 if (LIKELY(concurrent_mask == 0))
378 goto STORE;
379
380 DoReportRaceV(thr, shadow_mem, cur, race_mask: concurrent_mask, shadow, typ);
381 return true;
382}
383
384# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
385 const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386 const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387#endif
388
389char* DumpShadow(char* buf, RawShadow raw) {
390 if (raw == Shadow::kEmpty) {
391 internal_snprintf(buffer: buf, length: 64, format: "0");
392 return buf;
393 }
394 Shadow s(raw);
395 AccessType typ;
396 s.GetAccess(addr: nullptr, size: nullptr, typ: &typ);
397 internal_snprintf(buffer: buf, length: 64, format: "{tid=%u@%u access=0x%x typ=%x}",
398 static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399 s.access(), static_cast<u32>(typ));
400 return buf;
401}
402
403// TryTrace* and TraceRestart* functions allow to turn memory access and func
404// entry/exit callbacks into leaf functions with all associated performance
405// benefits. These hottest callbacks do only 2 slow path calls: report a race
406// and trace part switching. Race reporting is easy to turn into a tail call, we
407// just always return from the runtime after reporting a race. But trace part
408// switching is harder because it needs to be in the middle of callbacks. To
409// turn it into a tail call we immidiately return after TraceRestart* functions,
410// but TraceRestart* functions themselves recurse into the callback after
411// switching trace part. As the result the hottest callbacks contain only tail
412// calls, which effectively makes them leaf functions (can use all registers,
413// no frame setup, etc).
414NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415 uptr size, AccessType typ) {
416 TraceSwitchPart(thr);
417 MemoryAccess(thr, pc, addr, size, typ);
418}
419
420ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421 uptr size, AccessType typ) {
422 RawShadow* shadow_mem = MemToShadow(x: addr);
423 UNUSED char memBuf[4][64];
424 DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425 static_cast<int>(thr->fast_state.sid()),
426 static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427 static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
428 DumpShadow(memBuf[1], shadow_mem[1]),
429 DumpShadow(memBuf[2], shadow_mem[2]),
430 DumpShadow(memBuf[3], shadow_mem[3]));
431
432 FastState fast_state = thr->fast_state;
433 Shadow cur(fast_state, addr, size, typ);
434
435 LOAD_CURRENT_SHADOW(cur, shadow_mem);
436 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437 return;
438 if (UNLIKELY(fast_state.GetIgnoreBit()))
439 return;
440 if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441 return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
443}
444
445void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
446
447NOINLINE
448void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449 AccessType typ) {
450 TraceSwitchPart(thr);
451 MemoryAccess16(thr, pc, addr, typ);
452}
453
454ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455 AccessType typ) {
456 const uptr size = 16;
457 FastState fast_state = thr->fast_state;
458 if (UNLIKELY(fast_state.GetIgnoreBit()))
459 return;
460 Shadow cur(fast_state, 0, 8, typ);
461 RawShadow* shadow_mem = MemToShadow(x: addr);
462 bool traced = false;
463 {
464 LOAD_CURRENT_SHADOW(cur, shadow_mem);
465 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466 goto SECOND;
467 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468 return RestartMemoryAccess16(thr, pc, addr, typ);
469 traced = true;
470 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471 return;
472 }
473SECOND:
474 shadow_mem += kShadowCnt;
475 LOAD_CURRENT_SHADOW(cur, shadow_mem);
476 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477 return;
478 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479 return RestartMemoryAccess16(thr, pc, addr, typ);
480 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
481}
482
483NOINLINE
484void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485 uptr size, AccessType typ) {
486 TraceSwitchPart(thr);
487 UnalignedMemoryAccess(thr, pc, addr, size, typ);
488}
489
490ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491 uptr addr, uptr size,
492 AccessType typ) {
493 DCHECK_LE(size, 8);
494 FastState fast_state = thr->fast_state;
495 if (UNLIKELY(fast_state.GetIgnoreBit()))
496 return;
497 RawShadow* shadow_mem = MemToShadow(x: addr);
498 bool traced = false;
499 uptr size1 = Min<uptr>(a: size, b: RoundUp(p: addr + 1, align: kShadowCell) - addr);
500 {
501 Shadow cur(fast_state, addr, size1, typ);
502 LOAD_CURRENT_SHADOW(cur, shadow_mem);
503 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504 goto SECOND;
505 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507 traced = true;
508 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509 return;
510 }
511SECOND:
512 uptr size2 = size - size1;
513 if (LIKELY(size2 == 0))
514 return;
515 shadow_mem += kShadowCnt;
516 Shadow cur(fast_state, 0, size2, typ);
517 LOAD_CURRENT_SHADOW(cur, shadow_mem);
518 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519 return;
520 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522 CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523}
524
525void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526 DCHECK_LE(p, end);
527 DCHECK(IsShadowMem(p));
528 DCHECK(IsShadowMem(end));
529 UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530 DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
531 DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
532#if !TSAN_VECTORIZE
533 for (; p < end; p += kShadowCnt) {
534 p[0] = v;
535 for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
536 }
537#else
538 m128 vv = _mm_setr_epi32(
539 i0: static_cast<u32>(v), i1: static_cast<u32>(Shadow::kEmpty),
540 i2: static_cast<u32>(Shadow::kEmpty), i3: static_cast<u32>(Shadow::kEmpty));
541 m128* vp = reinterpret_cast<m128*>(p);
542 m128* vend = reinterpret_cast<m128*>(end);
543 for (; vp < vend; vp++) _mm_store_si128(p: vp, b: vv);
544#endif
545}
546
547static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548 if (size == 0)
549 return;
550 DCHECK_EQ(addr % kShadowCell, 0);
551 DCHECK_EQ(size % kShadowCell, 0);
552 // If a user passes some insane arguments (memset(0)),
553 // let it just crash as usual.
554 if (!IsAppMem(mem: addr) || !IsAppMem(mem: addr + size - 1))
555 return;
556 RawShadow* begin = MemToShadow(x: addr);
557 RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558 // Don't want to touch lots of shadow memory.
559 // If a program maps 10MB stack, there is no need reset the whole range.
560 // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561 if (SANITIZER_WINDOWS ||
562 size <= common_flags()->clear_shadow_mmap_threshold) {
563 ShadowSet(p: begin, end, v: val);
564 return;
565 }
566 // The region is big, reset only beginning and end.
567 const uptr kPageSize = GetPageSizeCached();
568 // Set at least first kPageSize/2 to page boundary.
569 RawShadow* mid1 =
570 Min(a: end, b: reinterpret_cast<RawShadow*>(RoundUp(
571 p: reinterpret_cast<uptr>(begin) + kPageSize / 2, align: kPageSize)));
572 ShadowSet(p: begin, end: mid1, v: val);
573 // Reset middle part.
574 RawShadow* mid2 = RoundDown(p: end, align: kPageSize);
575 if (mid2 > mid1) {
576 if (!MmapFixedSuperNoReserve(fixed_addr: (uptr)mid1, size: (uptr)mid2 - (uptr)mid1))
577 Die();
578 }
579 // Set the ending.
580 ShadowSet(p: mid2, end, v: val);
581}
582
583void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
584 uptr addr1 = RoundDown(p: addr, align: kShadowCell);
585 uptr size1 = RoundUp(p: size + addr - addr1, align: kShadowCell);
586 MemoryRangeSet(addr: addr1, size: size1, val: Shadow::kEmpty);
587}
588
589void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
590 // Callers must lock the slot to ensure synchronization with the reset.
591 // The problem with "freed" memory is that it's not "monotonic"
592 // with respect to bug detection: freed memory is bad to access,
593 // but then if the heap block is reallocated later, it's good to access.
594 // As the result a garbage "freed" shadow can lead to a false positive
595 // if it happens to match a real free in the thread trace,
596 // but the heap block was reallocated before the current memory access,
597 // so it's still good to access. It's not the case with data races.
598 DCHECK(thr->slot_locked);
599 DCHECK_EQ(addr % kShadowCell, 0);
600 size = RoundUp(p: size, align: kShadowCell);
601 // Processing more than 1k (2k of shadow) is expensive,
602 // can cause excessive memory consumption (user does not necessary touch
603 // the whole range) and most likely unnecessary.
604 size = Min<uptr>(a: size, b: 1024);
605 const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
606 kAccessCheckOnly | kAccessNoRodata;
607 TraceMemoryAccessRange(thr, pc, addr, size, typ);
608 RawShadow* shadow_mem = MemToShadow(x: addr);
609 Shadow cur(thr->fast_state, 0, kShadowCell, typ);
610#if TSAN_VECTORIZE
611 const m128 access = _mm_set1_epi32(i: static_cast<u32>(cur.raw()));
612 const m128 freed = _mm_setr_epi32(
613 i0: static_cast<u32>(Shadow::FreedMarker()),
614 i1: static_cast<u32>(Shadow::FreedInfo(sid: cur.sid(), epoch: cur.epoch())), i2: 0, i3: 0);
615 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
616 const m128 shadow = _mm_load_si128(p: (m128*)shadow_mem);
617 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
618 return;
619 _mm_store_si128(p: (m128*)shadow_mem, b: freed);
620 }
621#else
622 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
623 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
624 return;
625 StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
626 StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
627 StoreShadow(&shadow_mem[2], Shadow::kEmpty);
628 StoreShadow(&shadow_mem[3], Shadow::kEmpty);
629 }
630#endif
631}
632
633void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
634 DCHECK_EQ(addr % kShadowCell, 0);
635 size = RoundUp(p: size, align: kShadowCell);
636 TraceMemoryAccessRange(thr, pc, addr, size, typ: kAccessWrite);
637 Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
638 MemoryRangeSet(addr, size, val: cur.raw());
639}
640
641void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
642 uptr size) {
643 if (thr->ignore_reads_and_writes == 0)
644 MemoryRangeImitateWrite(thr, pc, addr, size);
645 else
646 MemoryResetRange(thr, pc, addr, size);
647}
648
649ALWAYS_INLINE
650bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
651 AccessType typ) {
652 LOAD_CURRENT_SHADOW(cur, shadow_mem);
653 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
654 return false;
655 return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
656}
657
658template <bool is_read>
659NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
660 uptr size) {
661 TraceSwitchPart(thr);
662 MemoryAccessRangeT<is_read>(thr, pc, addr, size);
663}
664
665template <bool is_read>
666void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
667 const AccessType typ =
668 (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
669 RawShadow* shadow_mem = MemToShadow(x: addr);
670 DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
671 (void*)pc, (void*)addr, (int)size, is_read);
672
673#if SANITIZER_DEBUG
674 if (!IsAppMem(addr)) {
675 Printf("Access to non app mem start: %p\n", (void*)addr);
676 DCHECK(IsAppMem(addr));
677 }
678 if (!IsAppMem(addr + size - 1)) {
679 Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
680 DCHECK(IsAppMem(addr + size - 1));
681 }
682 if (!IsShadowMem(shadow_mem)) {
683 Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
684 DCHECK(IsShadowMem(shadow_mem));
685 }
686
687 RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
688 reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
689 if (!IsShadowMem(shadow_mem_end)) {
690 Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
691 (void*)(addr + size - 1));
692 Printf(
693 "Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
694 "%zx\n",
695 shadow_mem, (void*)addr, size, kShadowMultiplier);
696 DCHECK(IsShadowMem(shadow_mem_end));
697 }
698#endif
699
700 // Access to .rodata section, no races here.
701 // Measurements show that it can be 10-20% of all memory accesses.
702 // Check here once to not check for every access separately.
703 // Note: we could (and should) do this only for the is_read case
704 // (writes shouldn't go to .rodata). But it happens in Chromium tests:
705 // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
706 // Details are unknown since it happens only on CI machines.
707 if (*shadow_mem == Shadow::kRodata)
708 return;
709
710 FastState fast_state = thr->fast_state;
711 if (UNLIKELY(fast_state.GetIgnoreBit()))
712 return;
713
714 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
715 return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
716
717 if (UNLIKELY(addr % kShadowCell)) {
718 // Handle unaligned beginning, if any.
719 uptr size1 = Min(a: size, b: RoundUp(p: addr, align: kShadowCell) - addr);
720 size -= size1;
721 Shadow cur(fast_state, addr, size1, typ);
722 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
723 return;
724 shadow_mem += kShadowCnt;
725 }
726 // Handle middle part, if any.
727 Shadow cur(fast_state, 0, kShadowCell, typ);
728 for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
729 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
730 return;
731 }
732 // Handle ending, if any.
733 if (UNLIKELY(size)) {
734 Shadow cur(fast_state, 0, size, typ);
735 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
736 return;
737 }
738}
739
740template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
741 uptr size);
742template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
743 uptr size);
744
745} // namespace __tsan
746
747#if !SANITIZER_GO
748// Must be included in this file to make sure everything is inlined.
749# include "tsan_interface.inc"
750#endif
751