| 1 | //===-- tsan_rtl_access.cpp -----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is a part of ThreadSanitizer (TSan), a race detector. |
| 10 | // |
| 11 | // Definitions of memory access and function entry/exit entry points. |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "tsan_rtl.h" |
| 15 | |
| 16 | namespace __tsan { |
| 17 | |
| 18 | ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, |
| 19 | uptr addr, uptr size, |
| 20 | AccessType typ) { |
| 21 | DCHECK(size == 1 || size == 2 || size == 4 || size == 8); |
| 22 | if (!kCollectHistory) |
| 23 | return true; |
| 24 | EventAccess* ev; |
| 25 | if (UNLIKELY(!TraceAcquire(thr, &ev))) |
| 26 | return false; |
| 27 | u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; |
| 28 | uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1)); |
| 29 | thr->trace_prev_pc = pc; |
| 30 | if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) { |
| 31 | ev->is_access = 1; |
| 32 | ev->is_read = !!(typ & kAccessRead); |
| 33 | ev->is_atomic = !!(typ & kAccessAtomic); |
| 34 | ev->size_log = size_log; |
| 35 | ev->pc_delta = pc_delta; |
| 36 | DCHECK_EQ(ev->pc_delta, pc_delta); |
| 37 | ev->addr = CompressAddr(addr); |
| 38 | TraceRelease(thr, evp: ev); |
| 39 | return true; |
| 40 | } |
| 41 | auto* evex = reinterpret_cast<EventAccessExt*>(ev); |
| 42 | evex->is_access = 0; |
| 43 | evex->is_func = 0; |
| 44 | evex->type = EventType::kAccessExt; |
| 45 | evex->is_read = !!(typ & kAccessRead); |
| 46 | evex->is_atomic = !!(typ & kAccessAtomic); |
| 47 | evex->size_log = size_log; |
| 48 | // Note: this is important, see comment in EventAccessExt. |
| 49 | evex->_ = 0; |
| 50 | evex->addr = CompressAddr(addr); |
| 51 | evex->pc = pc; |
| 52 | TraceRelease(thr, evp: evex); |
| 53 | return true; |
| 54 | } |
| 55 | |
| 56 | ALWAYS_INLINE |
| 57 | bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, |
| 58 | AccessType typ) { |
| 59 | if (!kCollectHistory) |
| 60 | return true; |
| 61 | EventAccessRange* ev; |
| 62 | if (UNLIKELY(!TraceAcquire(thr, &ev))) |
| 63 | return false; |
| 64 | thr->trace_prev_pc = pc; |
| 65 | ev->is_access = 0; |
| 66 | ev->is_func = 0; |
| 67 | ev->type = EventType::kAccessRange; |
| 68 | ev->is_read = !!(typ & kAccessRead); |
| 69 | ev->is_free = !!(typ & kAccessFree); |
| 70 | ev->size_lo = size; |
| 71 | ev->pc = CompressAddr(addr: pc); |
| 72 | ev->addr = CompressAddr(addr); |
| 73 | ev->size_hi = size >> EventAccessRange::kSizeLoBits; |
| 74 | TraceRelease(thr, evp: ev); |
| 75 | return true; |
| 76 | } |
| 77 | |
| 78 | void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, |
| 79 | AccessType typ) { |
| 80 | if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) |
| 81 | return; |
| 82 | TraceSwitchPart(thr); |
| 83 | UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ); |
| 84 | DCHECK(res); |
| 85 | } |
| 86 | |
| 87 | void TraceFunc(ThreadState* thr, uptr pc) { |
| 88 | if (LIKELY(TryTraceFunc(thr, pc))) |
| 89 | return; |
| 90 | TraceSwitchPart(thr); |
| 91 | UNUSED bool res = TryTraceFunc(thr, pc); |
| 92 | DCHECK(res); |
| 93 | } |
| 94 | |
| 95 | NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { |
| 96 | TraceSwitchPart(thr); |
| 97 | FuncEntry(thr, pc); |
| 98 | } |
| 99 | |
| 100 | NOINLINE void TraceRestartFuncExit(ThreadState* thr) { |
| 101 | TraceSwitchPart(thr); |
| 102 | FuncExit(thr); |
| 103 | } |
| 104 | |
| 105 | void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, |
| 106 | StackID stk) { |
| 107 | DCHECK(type == EventType::kLock || type == EventType::kRLock); |
| 108 | if (!kCollectHistory) |
| 109 | return; |
| 110 | EventLock ev; |
| 111 | ev.is_access = 0; |
| 112 | ev.is_func = 0; |
| 113 | ev.type = type; |
| 114 | ev.pc = CompressAddr(addr: pc); |
| 115 | ev.stack_lo = stk; |
| 116 | ev.stack_hi = stk >> EventLock::kStackIDLoBits; |
| 117 | ev._ = 0; |
| 118 | ev.addr = CompressAddr(addr); |
| 119 | TraceEvent(thr, ev); |
| 120 | } |
| 121 | |
| 122 | void TraceMutexUnlock(ThreadState* thr, uptr addr) { |
| 123 | if (!kCollectHistory) |
| 124 | return; |
| 125 | EventUnlock ev; |
| 126 | ev.is_access = 0; |
| 127 | ev.is_func = 0; |
| 128 | ev.type = EventType::kUnlock; |
| 129 | ev._ = 0; |
| 130 | ev.addr = CompressAddr(addr); |
| 131 | TraceEvent(thr, ev); |
| 132 | } |
| 133 | |
| 134 | void TraceTime(ThreadState* thr) { |
| 135 | if (!kCollectHistory) |
| 136 | return; |
| 137 | FastState fast_state = thr->fast_state; |
| 138 | EventTime ev; |
| 139 | ev.is_access = 0; |
| 140 | ev.is_func = 0; |
| 141 | ev.type = EventType::kTime; |
| 142 | ev.sid = static_cast<u64>(fast_state.sid()); |
| 143 | ev.epoch = static_cast<u64>(fast_state.epoch()); |
| 144 | ev._ = 0; |
| 145 | TraceEvent(thr, ev); |
| 146 | } |
| 147 | |
| 148 | NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
| 149 | Shadow old, |
| 150 | AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
| 151 | // For the free shadow markers the first element (that contains kFreeSid) |
| 152 | // triggers the race, but the second element contains info about the freeing |
| 153 | // thread, take it. |
| 154 | if (old.sid() == kFreeSid) |
| 155 | old = Shadow(LoadShadow(p: &shadow_mem[1])); |
| 156 | // This prevents trapping on this address in future. |
| 157 | for (uptr i = 0; i < kShadowCnt; i++) |
| 158 | StoreShadow(sp: &shadow_mem[i], s: i == 0 ? Shadow::kRodata : Shadow::kEmpty); |
| 159 | // See the comment in MemoryRangeFreed as to why the slot is locked |
| 160 | // for free memory accesses. ReportRace must not be called with |
| 161 | // the slot locked because of the fork. But MemoryRangeFreed is not |
| 162 | // called during fork because fork sets ignore_reads_and_writes, |
| 163 | // so simply unlocking the slot should be fine. |
| 164 | if (typ & kAccessSlotLocked) |
| 165 | SlotUnlock(thr); |
| 166 | ReportRace(thr, shadow_mem, cur, old: Shadow(old), typ); |
| 167 | if (typ & kAccessSlotLocked) |
| 168 | SlotLock(thr); |
| 169 | } |
| 170 | |
| 171 | #if !TSAN_VECTORIZE |
| 172 | ALWAYS_INLINE |
| 173 | bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, |
| 174 | AccessType typ) { |
| 175 | for (uptr i = 0; i < kShadowCnt; i++) { |
| 176 | auto old = LoadShadow(&s[i]); |
| 177 | if (!(typ & kAccessRead)) { |
| 178 | if (old == cur.raw()) |
| 179 | return true; |
| 180 | continue; |
| 181 | } |
| 182 | auto masked = static_cast<RawShadow>(static_cast<u32>(old) | |
| 183 | static_cast<u32>(Shadow::kRodata)); |
| 184 | if (masked == cur.raw()) |
| 185 | return true; |
| 186 | if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { |
| 187 | if (old == Shadow::kRodata) |
| 188 | return true; |
| 189 | } |
| 190 | } |
| 191 | return false; |
| 192 | } |
| 193 | |
| 194 | ALWAYS_INLINE |
| 195 | bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
| 196 | int unused0, int unused1, AccessType typ) { |
| 197 | bool stored = false; |
| 198 | for (uptr idx = 0; idx < kShadowCnt; idx++) { |
| 199 | RawShadow* sp = &shadow_mem[idx]; |
| 200 | Shadow old(LoadShadow(sp)); |
| 201 | if (LIKELY(old.raw() == Shadow::kEmpty)) { |
| 202 | if (!(typ & kAccessCheckOnly) && !stored) |
| 203 | StoreShadow(sp, cur.raw()); |
| 204 | return false; |
| 205 | } |
| 206 | if (LIKELY(!(cur.access() & old.access()))) |
| 207 | continue; |
| 208 | if (LIKELY(cur.sid() == old.sid())) { |
| 209 | if (!(typ & kAccessCheckOnly) && |
| 210 | LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { |
| 211 | StoreShadow(sp, cur.raw()); |
| 212 | stored = true; |
| 213 | } |
| 214 | continue; |
| 215 | } |
| 216 | if (LIKELY(old.IsBothReadsOrAtomic(typ))) |
| 217 | continue; |
| 218 | if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) |
| 219 | continue; |
| 220 | DoReportRace(thr, shadow_mem, cur, old, typ); |
| 221 | return true; |
| 222 | } |
| 223 | // We did not find any races and had already stored |
| 224 | // the current access info, so we are done. |
| 225 | if (LIKELY(stored)) |
| 226 | return false; |
| 227 | // Choose a random candidate slot and replace it. |
| 228 | uptr index = |
| 229 | atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; |
| 230 | StoreShadow(&shadow_mem[index], cur.raw()); |
| 231 | return false; |
| 232 | } |
| 233 | |
| 234 | # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 |
| 235 | |
| 236 | #else /* !TSAN_VECTORIZE */ |
| 237 | |
| 238 | ALWAYS_INLINE |
| 239 | bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, |
| 240 | m128 access, AccessType typ) { |
| 241 | // Note: we could check if there is a larger access of the same type, |
| 242 | // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) |
| 243 | // and now do smaller reads/writes, these can also be considered as "same |
| 244 | // access". However, it will make the check more expensive, so it's unclear |
| 245 | // if it's worth it. But this would conserve trace space, so it's useful |
| 246 | // besides potential speed up. |
| 247 | if (!(typ & kAccessRead)) { |
| 248 | const m128 same = _mm_cmpeq_epi32(a: shadow, b: access); |
| 249 | return _mm_movemask_epi8(a: same); |
| 250 | } |
| 251 | // For reads we need to reset read bit in the shadow, |
| 252 | // because we need to match read with both reads and writes. |
| 253 | // Shadow::kRodata has only read bit set, so it does what we want. |
| 254 | // We also abuse it for rodata check to save few cycles |
| 255 | // since we already loaded Shadow::kRodata into a register. |
| 256 | // Reads from rodata can't race. |
| 257 | // Measurements show that they can be 10-20% of all memory accesses. |
| 258 | // Shadow::kRodata has epoch 0 which cannot appear in shadow normally |
| 259 | // (thread epochs start from 1). So the same read bit mask |
| 260 | // serves as rodata indicator. |
| 261 | const m128 read_mask = _mm_set1_epi32(i: static_cast<u32>(Shadow::kRodata)); |
| 262 | const m128 masked_shadow = _mm_or_si128(a: shadow, b: read_mask); |
| 263 | m128 same = _mm_cmpeq_epi32(a: masked_shadow, b: access); |
| 264 | // Range memory accesses check Shadow::kRodata before calling this, |
| 265 | // Shadow::kRodatas is not possible for free memory access |
| 266 | // and Go does not use Shadow::kRodata. |
| 267 | if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { |
| 268 | const m128 ro = _mm_cmpeq_epi32(a: shadow, b: read_mask); |
| 269 | same = _mm_or_si128(a: ro, b: same); |
| 270 | } |
| 271 | return _mm_movemask_epi8(a: same); |
| 272 | } |
| 273 | |
| 274 | NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
| 275 | u32 race_mask, m128 shadow, AccessType typ) { |
| 276 | // race_mask points which of the shadow elements raced with the current |
| 277 | // access. Extract that element. |
| 278 | CHECK_NE(race_mask, 0); |
| 279 | u32 old; |
| 280 | // Note: _mm_extract_epi32 index must be a constant value. |
| 281 | switch (__builtin_ffs(race_mask) / 4) { |
| 282 | case 0: |
| 283 | old = _mm_extract_epi32(shadow, 0); |
| 284 | break; |
| 285 | case 1: |
| 286 | old = _mm_extract_epi32(shadow, 1); |
| 287 | break; |
| 288 | case 2: |
| 289 | old = _mm_extract_epi32(shadow, 2); |
| 290 | break; |
| 291 | case 3: |
| 292 | old = _mm_extract_epi32(shadow, 3); |
| 293 | break; |
| 294 | } |
| 295 | Shadow prev(static_cast<RawShadow>(old)); |
| 296 | // For the free shadow markers the first element (that contains kFreeSid) |
| 297 | // triggers the race, but the second element contains info about the freeing |
| 298 | // thread, take it. |
| 299 | if (prev.sid() == kFreeSid) |
| 300 | prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1))); |
| 301 | DoReportRace(thr, shadow_mem, cur, old: prev, typ); |
| 302 | } |
| 303 | |
| 304 | ALWAYS_INLINE |
| 305 | bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
| 306 | m128 shadow, m128 access, AccessType typ) { |
| 307 | // Note: empty/zero slots don't intersect with any access. |
| 308 | const m128 zero = _mm_setzero_si128(); |
| 309 | const m128 mask_access = _mm_set1_epi32(i: 0x000000ff); |
| 310 | const m128 mask_sid = _mm_set1_epi32(i: 0x0000ff00); |
| 311 | const m128 mask_read_atomic = _mm_set1_epi32(i: 0xc0000000); |
| 312 | const m128 access_and = _mm_and_si128(a: access, b: shadow); |
| 313 | const m128 access_xor = _mm_xor_si128(a: access, b: shadow); |
| 314 | const m128 intersect = _mm_and_si128(a: access_and, b: mask_access); |
| 315 | const m128 not_intersect = _mm_cmpeq_epi32(a: intersect, b: zero); |
| 316 | const m128 not_same_sid = _mm_and_si128(a: access_xor, b: mask_sid); |
| 317 | const m128 same_sid = _mm_cmpeq_epi32(a: not_same_sid, b: zero); |
| 318 | const m128 both_read_or_atomic = _mm_and_si128(a: access_and, b: mask_read_atomic); |
| 319 | const m128 no_race = |
| 320 | _mm_or_si128(a: _mm_or_si128(a: not_intersect, b: same_sid), b: both_read_or_atomic); |
| 321 | const int race_mask = _mm_movemask_epi8(a: _mm_cmpeq_epi32(a: no_race, b: zero)); |
| 322 | if (UNLIKELY(race_mask)) |
| 323 | goto SHARED; |
| 324 | |
| 325 | STORE : { |
| 326 | if (typ & kAccessCheckOnly) |
| 327 | return false; |
| 328 | // We could also replace different sid's if access is the same, |
| 329 | // rw weaker and happens before. However, just checking access below |
| 330 | // is not enough because we also need to check that !both_read_or_atomic |
| 331 | // (reads from different sids can be concurrent). |
| 332 | // Theoretically we could replace smaller accesses with larger accesses, |
| 333 | // but it's unclear if it's worth doing. |
| 334 | const m128 mask_access_sid = _mm_set1_epi32(i: 0x0000ffff); |
| 335 | const m128 not_same_sid_access = _mm_and_si128(a: access_xor, b: mask_access_sid); |
| 336 | const m128 same_sid_access = _mm_cmpeq_epi32(a: not_same_sid_access, b: zero); |
| 337 | const m128 access_read_atomic = |
| 338 | _mm_set1_epi32(i: (typ & (kAccessRead | kAccessAtomic)) << 30); |
| 339 | const m128 rw_weaker = |
| 340 | _mm_cmpeq_epi32(a: _mm_max_epu32(V1: shadow, V2: access_read_atomic), b: shadow); |
| 341 | const m128 rewrite = _mm_and_si128(a: same_sid_access, b: rw_weaker); |
| 342 | const int rewrite_mask = _mm_movemask_epi8(a: rewrite); |
| 343 | int index = __builtin_ffs(rewrite_mask); |
| 344 | if (UNLIKELY(index == 0)) { |
| 345 | const m128 empty = _mm_cmpeq_epi32(a: shadow, b: zero); |
| 346 | const int empty_mask = _mm_movemask_epi8(a: empty); |
| 347 | index = __builtin_ffs(empty_mask); |
| 348 | if (UNLIKELY(index == 0)) |
| 349 | index = (atomic_load_relaxed(a: &thr->trace_pos) / 2) % 16; |
| 350 | } |
| 351 | StoreShadow(sp: &shadow_mem[index / 4], s: cur.raw()); |
| 352 | // We could zero other slots determined by rewrite_mask. |
| 353 | // That would help other threads to evict better slots, |
| 354 | // but it's unclear if it's worth it. |
| 355 | return false; |
| 356 | } |
| 357 | |
| 358 | SHARED: |
| 359 | m128 thread_epochs = _mm_set1_epi32(i: 0x7fffffff); |
| 360 | // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 |
| 361 | // indexes must be constants. |
| 362 | # define LOAD_EPOCH(idx) \ |
| 363 | if (LIKELY(race_mask & (1 << (idx * 4)))) { \ |
| 364 | u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ |
| 365 | u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \ |
| 366 | thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ |
| 367 | } |
| 368 | LOAD_EPOCH(0); |
| 369 | LOAD_EPOCH(1); |
| 370 | LOAD_EPOCH(2); |
| 371 | LOAD_EPOCH(3); |
| 372 | # undef LOAD_EPOCH |
| 373 | const m128 mask_epoch = _mm_set1_epi32(i: 0x3fff0000); |
| 374 | const m128 shadow_epochs = _mm_and_si128(a: shadow, b: mask_epoch); |
| 375 | const m128 concurrent = _mm_cmplt_epi32(a: thread_epochs, b: shadow_epochs); |
| 376 | const int concurrent_mask = _mm_movemask_epi8(a: concurrent); |
| 377 | if (LIKELY(concurrent_mask == 0)) |
| 378 | goto STORE; |
| 379 | |
| 380 | DoReportRaceV(thr, shadow_mem, cur, race_mask: concurrent_mask, shadow, typ); |
| 381 | return true; |
| 382 | } |
| 383 | |
| 384 | # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ |
| 385 | const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \ |
| 386 | const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem)) |
| 387 | #endif |
| 388 | |
| 389 | char* DumpShadow(char* buf, RawShadow raw) { |
| 390 | if (raw == Shadow::kEmpty) { |
| 391 | internal_snprintf(buffer: buf, length: 64, format: "0" ); |
| 392 | return buf; |
| 393 | } |
| 394 | Shadow s(raw); |
| 395 | AccessType typ; |
| 396 | s.GetAccess(addr: nullptr, size: nullptr, typ: &typ); |
| 397 | internal_snprintf(buffer: buf, length: 64, format: "{tid=%u@%u access=0x%x typ=%x}" , |
| 398 | static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()), |
| 399 | s.access(), static_cast<u32>(typ)); |
| 400 | return buf; |
| 401 | } |
| 402 | |
| 403 | // TryTrace* and TraceRestart* functions allow to turn memory access and func |
| 404 | // entry/exit callbacks into leaf functions with all associated performance |
| 405 | // benefits. These hottest callbacks do only 2 slow path calls: report a race |
| 406 | // and trace part switching. Race reporting is easy to turn into a tail call, we |
| 407 | // just always return from the runtime after reporting a race. But trace part |
| 408 | // switching is harder because it needs to be in the middle of callbacks. To |
| 409 | // turn it into a tail call we immidiately return after TraceRestart* functions, |
| 410 | // but TraceRestart* functions themselves recurse into the callback after |
| 411 | // switching trace part. As the result the hottest callbacks contain only tail |
| 412 | // calls, which effectively makes them leaf functions (can use all registers, |
| 413 | // no frame setup, etc). |
| 414 | NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
| 415 | uptr size, AccessType typ) { |
| 416 | TraceSwitchPart(thr); |
| 417 | MemoryAccess(thr, pc, addr, size, typ); |
| 418 | } |
| 419 | |
| 420 | ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
| 421 | uptr size, AccessType typ) { |
| 422 | RawShadow* shadow_mem = MemToShadow(x: addr); |
| 423 | UNUSED char memBuf[4][64]; |
| 424 | DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n" , thr->tid, |
| 425 | static_cast<int>(thr->fast_state.sid()), |
| 426 | static_cast<int>(thr->fast_state.epoch()), (void*)addr, size, |
| 427 | static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]), |
| 428 | DumpShadow(memBuf[1], shadow_mem[1]), |
| 429 | DumpShadow(memBuf[2], shadow_mem[2]), |
| 430 | DumpShadow(memBuf[3], shadow_mem[3])); |
| 431 | |
| 432 | FastState fast_state = thr->fast_state; |
| 433 | Shadow cur(fast_state, addr, size, typ); |
| 434 | |
| 435 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 436 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 437 | return; |
| 438 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
| 439 | return; |
| 440 | if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) |
| 441 | return TraceRestartMemoryAccess(thr, pc, addr, size, typ); |
| 442 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
| 443 | } |
| 444 | |
| 445 | void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ); |
| 446 | |
| 447 | NOINLINE |
| 448 | void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr, |
| 449 | AccessType typ) { |
| 450 | TraceSwitchPart(thr); |
| 451 | MemoryAccess16(thr, pc, addr, typ); |
| 452 | } |
| 453 | |
| 454 | ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, |
| 455 | AccessType typ) { |
| 456 | const uptr size = 16; |
| 457 | FastState fast_state = thr->fast_state; |
| 458 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
| 459 | return; |
| 460 | Shadow cur(fast_state, 0, 8, typ); |
| 461 | RawShadow* shadow_mem = MemToShadow(x: addr); |
| 462 | bool traced = false; |
| 463 | { |
| 464 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 465 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 466 | goto SECOND; |
| 467 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
| 468 | return RestartMemoryAccess16(thr, pc, addr, typ); |
| 469 | traced = true; |
| 470 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
| 471 | return; |
| 472 | } |
| 473 | SECOND: |
| 474 | shadow_mem += kShadowCnt; |
| 475 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 476 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 477 | return; |
| 478 | if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
| 479 | return RestartMemoryAccess16(thr, pc, addr, typ); |
| 480 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
| 481 | } |
| 482 | |
| 483 | NOINLINE |
| 484 | void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
| 485 | uptr size, AccessType typ) { |
| 486 | TraceSwitchPart(thr); |
| 487 | UnalignedMemoryAccess(thr, pc, addr, size, typ); |
| 488 | } |
| 489 | |
| 490 | ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, |
| 491 | uptr addr, uptr size, |
| 492 | AccessType typ) { |
| 493 | DCHECK_LE(size, 8); |
| 494 | FastState fast_state = thr->fast_state; |
| 495 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
| 496 | return; |
| 497 | RawShadow* shadow_mem = MemToShadow(x: addr); |
| 498 | bool traced = false; |
| 499 | uptr size1 = Min<uptr>(a: size, b: RoundUp(p: addr + 1, align: kShadowCell) - addr); |
| 500 | { |
| 501 | Shadow cur(fast_state, addr, size1, typ); |
| 502 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 503 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 504 | goto SECOND; |
| 505 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
| 506 | return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); |
| 507 | traced = true; |
| 508 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
| 509 | return; |
| 510 | } |
| 511 | SECOND: |
| 512 | uptr size2 = size - size1; |
| 513 | if (LIKELY(size2 == 0)) |
| 514 | return; |
| 515 | shadow_mem += kShadowCnt; |
| 516 | Shadow cur(fast_state, 0, size2, typ); |
| 517 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 518 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 519 | return; |
| 520 | if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
| 521 | return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); |
| 522 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
| 523 | } |
| 524 | |
| 525 | void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { |
| 526 | DCHECK_LT(p, end); |
| 527 | DCHECK(IsShadowMem(p)); |
| 528 | DCHECK(IsShadowMem(end - 1)); |
| 529 | UNUSED const uptr kAlign = kShadowCnt * kShadowSize; |
| 530 | DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0); |
| 531 | DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0); |
| 532 | #if !TSAN_VECTORIZE |
| 533 | for (; p < end; p += kShadowCnt) { |
| 534 | p[0] = v; |
| 535 | for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; |
| 536 | } |
| 537 | #else |
| 538 | m128 vv = _mm_setr_epi32( |
| 539 | i0: static_cast<u32>(v), i1: static_cast<u32>(Shadow::kEmpty), |
| 540 | i2: static_cast<u32>(Shadow::kEmpty), i3: static_cast<u32>(Shadow::kEmpty)); |
| 541 | m128* vp = reinterpret_cast<m128*>(p); |
| 542 | m128* vend = reinterpret_cast<m128*>(end); |
| 543 | for (; vp < vend; vp++) _mm_store_si128(p: vp, b: vv); |
| 544 | #endif |
| 545 | } |
| 546 | |
| 547 | static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { |
| 548 | if (size == 0) |
| 549 | return; |
| 550 | DCHECK_EQ(addr % kShadowCell, 0); |
| 551 | DCHECK_EQ(size % kShadowCell, 0); |
| 552 | // If a user passes some insane arguments (memset(0)), |
| 553 | // let it just crash as usual. |
| 554 | if (!IsAppMem(mem: addr) || !IsAppMem(mem: addr + size - 1)) |
| 555 | return; |
| 556 | RawShadow* begin = MemToShadow(x: addr); |
| 557 | RawShadow* end = begin + size / kShadowCell * kShadowCnt; |
| 558 | // Don't want to touch lots of shadow memory. |
| 559 | // If a program maps 10MB stack, there is no need reset the whole range. |
| 560 | // UnmapOrDie/MmapFixedNoReserve does not work on Windows. |
| 561 | if (SANITIZER_WINDOWS || |
| 562 | size <= common_flags()->clear_shadow_mmap_threshold) { |
| 563 | ShadowSet(p: begin, end, v: val); |
| 564 | return; |
| 565 | } |
| 566 | // The region is big, reset only beginning and end. |
| 567 | const uptr kPageSize = GetPageSizeCached(); |
| 568 | // Set at least first kPageSize/2 to page boundary. |
| 569 | RawShadow* mid1 = |
| 570 | Min(a: end, b: reinterpret_cast<RawShadow*>(RoundUp( |
| 571 | p: reinterpret_cast<uptr>(begin) + kPageSize / 2, align: kPageSize))); |
| 572 | // begin must < mid1 |
| 573 | ShadowSet(p: begin, end: mid1, v: val); |
| 574 | // Reset middle part. |
| 575 | RawShadow* mid2 = RoundDown(p: end, align: kPageSize); |
| 576 | if (mid2 > mid1) { |
| 577 | if (!MmapFixedSuperNoReserve(fixed_addr: (uptr)mid1, size: (uptr)mid2 - (uptr)mid1)) |
| 578 | Die(); |
| 579 | } |
| 580 | // Set the ending. |
| 581 | if (mid2 < end) |
| 582 | ShadowSet(p: mid2, end, v: val); |
| 583 | else |
| 584 | DCHECK_EQ(mid2, end); |
| 585 | } |
| 586 | |
| 587 | void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
| 588 | uptr addr1 = RoundDown(p: addr, align: kShadowCell); |
| 589 | uptr size1 = RoundUp(p: size + addr - addr1, align: kShadowCell); |
| 590 | MemoryRangeSet(addr: addr1, size: size1, val: Shadow::kEmpty); |
| 591 | } |
| 592 | |
| 593 | void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
| 594 | // Callers must lock the slot to ensure synchronization with the reset. |
| 595 | // The problem with "freed" memory is that it's not "monotonic" |
| 596 | // with respect to bug detection: freed memory is bad to access, |
| 597 | // but then if the heap block is reallocated later, it's good to access. |
| 598 | // As the result a garbage "freed" shadow can lead to a false positive |
| 599 | // if it happens to match a real free in the thread trace, |
| 600 | // but the heap block was reallocated before the current memory access, |
| 601 | // so it's still good to access. It's not the case with data races. |
| 602 | DCHECK(thr->slot_locked); |
| 603 | DCHECK_EQ(addr % kShadowCell, 0); |
| 604 | size = RoundUp(p: size, align: kShadowCell); |
| 605 | // Processing more than 1k (2k of shadow) is expensive, |
| 606 | // can cause excessive memory consumption (user does not necessary touch |
| 607 | // the whole range) and most likely unnecessary. |
| 608 | size = Min<uptr>(a: size, b: 1024); |
| 609 | const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked | |
| 610 | kAccessCheckOnly | kAccessNoRodata; |
| 611 | TraceMemoryAccessRange(thr, pc, addr, size, typ); |
| 612 | RawShadow* shadow_mem = MemToShadow(x: addr); |
| 613 | Shadow cur(thr->fast_state, 0, kShadowCell, typ); |
| 614 | #if TSAN_VECTORIZE |
| 615 | const m128 access = _mm_set1_epi32(i: static_cast<u32>(cur.raw())); |
| 616 | const m128 freed = _mm_setr_epi32( |
| 617 | i0: static_cast<u32>(Shadow::FreedMarker()), |
| 618 | i1: static_cast<u32>(Shadow::FreedInfo(sid: cur.sid(), epoch: cur.epoch())), i2: 0, i3: 0); |
| 619 | for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { |
| 620 | const m128 shadow = _mm_load_si128(p: (m128*)shadow_mem); |
| 621 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
| 622 | return; |
| 623 | _mm_store_si128(p: (m128*)shadow_mem, b: freed); |
| 624 | } |
| 625 | #else |
| 626 | for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { |
| 627 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) |
| 628 | return; |
| 629 | StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); |
| 630 | StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); |
| 631 | StoreShadow(&shadow_mem[2], Shadow::kEmpty); |
| 632 | StoreShadow(&shadow_mem[3], Shadow::kEmpty); |
| 633 | } |
| 634 | #endif |
| 635 | } |
| 636 | |
| 637 | void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
| 638 | DCHECK_EQ(addr % kShadowCell, 0); |
| 639 | size = RoundUp(p: size, align: kShadowCell); |
| 640 | TraceMemoryAccessRange(thr, pc, addr, size, typ: kAccessWrite); |
| 641 | Shadow cur(thr->fast_state, 0, 8, kAccessWrite); |
| 642 | MemoryRangeSet(addr, size, val: cur.raw()); |
| 643 | } |
| 644 | |
| 645 | void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, |
| 646 | uptr size) { |
| 647 | if (thr->ignore_reads_and_writes == 0) |
| 648 | MemoryRangeImitateWrite(thr, pc, addr, size); |
| 649 | else |
| 650 | MemoryResetRange(thr, pc, addr, size); |
| 651 | } |
| 652 | |
| 653 | ALWAYS_INLINE |
| 654 | bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
| 655 | AccessType typ) { |
| 656 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
| 657 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
| 658 | return false; |
| 659 | return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
| 660 | } |
| 661 | |
| 662 | template <bool is_read> |
| 663 | NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, |
| 664 | uptr size) { |
| 665 | TraceSwitchPart(thr); |
| 666 | MemoryAccessRangeT<is_read>(thr, pc, addr, size); |
| 667 | } |
| 668 | |
| 669 | template <bool is_read> |
| 670 | void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
| 671 | const AccessType typ = |
| 672 | (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; |
| 673 | RawShadow* shadow_mem = MemToShadow(x: addr); |
| 674 | DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n" , thr->tid, |
| 675 | (void*)pc, (void*)addr, (int)size, is_read); |
| 676 | DCHECK_NE(size, 0); |
| 677 | #if SANITIZER_DEBUG |
| 678 | if (!IsAppMem(addr)) { |
| 679 | Printf("Access to non app mem start: %p\n" , (void*)addr); |
| 680 | DCHECK(IsAppMem(addr)); |
| 681 | } |
| 682 | if (!IsAppMem(addr + size - 1)) { |
| 683 | Printf("Access to non app mem end: %p\n" , (void*)(addr + size - 1)); |
| 684 | DCHECK(IsAppMem(addr + size - 1)); |
| 685 | } |
| 686 | if (!IsShadowMem(shadow_mem)) { |
| 687 | Printf("Bad shadow start addr: %p (%p)\n" , shadow_mem, (void*)addr); |
| 688 | DCHECK(IsShadowMem(shadow_mem)); |
| 689 | } |
| 690 | |
| 691 | uptr rounded_size = |
| 692 | (RoundUpTo(addr + size, kShadowCell) - RoundDownTo(addr, kShadowCell)); |
| 693 | RawShadow* shadow_mem_end = |
| 694 | shadow_mem + rounded_size / kShadowCell * kShadowCnt; |
| 695 | if (!IsShadowMem(shadow_mem_end - 1)) { |
| 696 | Printf("Bad shadow end addr: %p (%p)\n" , shadow_mem_end - 1, |
| 697 | (void*)(addr + size - 1)); |
| 698 | Printf( |
| 699 | "Shadow start addr (ok): %p (%p); size: 0x%zx; rounded_size: 0x%zx; " |
| 700 | "kShadowMultiplier: %zx\n" , |
| 701 | shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier); |
| 702 | DCHECK(IsShadowMem(shadow_mem_end - 1)); |
| 703 | } |
| 704 | #endif |
| 705 | |
| 706 | // Access to .rodata section, no races here. |
| 707 | // Measurements show that it can be 10-20% of all memory accesses. |
| 708 | // Check here once to not check for every access separately. |
| 709 | // Note: we could (and should) do this only for the is_read case |
| 710 | // (writes shouldn't go to .rodata). But it happens in Chromium tests: |
| 711 | // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19 |
| 712 | // Details are unknown since it happens only on CI machines. |
| 713 | if (*shadow_mem == Shadow::kRodata) |
| 714 | return; |
| 715 | |
| 716 | FastState fast_state = thr->fast_state; |
| 717 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
| 718 | return; |
| 719 | |
| 720 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
| 721 | return RestartMemoryAccessRange<is_read>(thr, pc, addr, size); |
| 722 | |
| 723 | if (UNLIKELY(addr % kShadowCell)) { |
| 724 | // Handle unaligned beginning, if any. |
| 725 | uptr size1 = Min(a: size, b: RoundUp(p: addr, align: kShadowCell) - addr); |
| 726 | size -= size1; |
| 727 | Shadow cur(fast_state, addr, size1, typ); |
| 728 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
| 729 | return; |
| 730 | shadow_mem += kShadowCnt; |
| 731 | } |
| 732 | // Handle middle part, if any. |
| 733 | Shadow cur(fast_state, 0, kShadowCell, typ); |
| 734 | for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { |
| 735 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
| 736 | return; |
| 737 | } |
| 738 | // Handle ending, if any. |
| 739 | if (UNLIKELY(size)) { |
| 740 | Shadow cur(fast_state, 0, size, typ); |
| 741 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
| 742 | return; |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr, |
| 747 | uptr size); |
| 748 | template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr, |
| 749 | uptr size); |
| 750 | |
| 751 | } // namespace __tsan |
| 752 | |
| 753 | #if !SANITIZER_GO |
| 754 | // Must be included in this file to make sure everything is inlined. |
| 755 | # include "tsan_interface.inc" |
| 756 | #endif |
| 757 | |