1//===-- tsd_shared.h --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef SCUDO_TSD_SHARED_H_
10#define SCUDO_TSD_SHARED_H_
11
12#include "tsd.h"
13
14#include "string_utils.h"
15
16#if SCUDO_HAS_PLATFORM_TLS_SLOT
17// This is a platform-provided header that needs to be on the include path when
18// Scudo is compiled. It must declare a function with the prototype:
19// uintptr_t *getPlatformAllocatorTlsSlot()
20// that returns the address of a thread-local word of storage reserved for
21// Scudo, that must be zero-initialized in newly created threads.
22#include "scudo_platform_tls_slot.h"
23#endif
24
25namespace scudo {
26
27template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
28struct TSDRegistrySharedT {
29 using ThisT = TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>;
30
31 struct ScopedTSD {
32 ALWAYS_INLINE ScopedTSD(ThisT &TSDRegistry) {
33 CurrentTSD = TSDRegistry.getTSDAndLock();
34 DCHECK_NE(CurrentTSD, nullptr);
35 }
36
37 ~ScopedTSD() { CurrentTSD->unlock(); }
38
39 TSD<Allocator> &operator*() { return *CurrentTSD; }
40
41 TSD<Allocator> *operator->() {
42 CurrentTSD->assertLocked(/*BypassCheck=*/false);
43 return CurrentTSD;
44 }
45
46 private:
47 TSD<Allocator> *CurrentTSD;
48 };
49
50 void init(Allocator *Instance) EXCLUDES(Mutex) {
51 ScopedLock L(Mutex);
52 // If more than one thread is initializing at the exact same moment, the
53 // threads that lose don't need to do anything.
54 if (UNLIKELY(atomic_load_relaxed(&Initialized) != 0))
55 return;
56
57 Instance->init();
58 for (u32 I = 0; I < TSDsArraySize; I++)
59 TSDs[I].init(Instance);
60 const u32 NumberOfCPUs = getNumberOfCPUs();
61 setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
62 : Min(A: NumberOfCPUs, B: DefaultTSDCount));
63 atomic_store_relaxed(A: &Initialized, V: 1);
64 }
65
66 void initOnceMaybe(Allocator *Instance) {
67 if (LIKELY(atomic_load_relaxed(&Initialized) != 0))
68 return;
69 init(Instance); // Sets Initialized.
70 }
71
72 void unmapTestOnly(Allocator *Instance) {
73 ScopedLock L(Mutex);
74 for (u32 I = 0; I < TSDsArraySize; I++) {
75 TSDs[I].commitBack(Instance);
76 TSDs[I] = {};
77 }
78 setCurrentTSD(nullptr);
79 atomic_store_relaxed(A: &Initialized, V: 0);
80 }
81
82 void drainCaches(Allocator *Instance) {
83 u32 TotalTSDs = atomic_load_relaxed(A: &NumberOfTSDs);
84 for (uptr I = 0; I < TotalTSDs; ++I) {
85 TSDs[I].lock();
86 Instance->drainCache(&TSDs[I]);
87 TSDs[I].unlock();
88 }
89 }
90
91 ALWAYS_INLINE void initThreadMaybe(Allocator *Instance,
92 UNUSED bool MinimalInit) {
93 if (LIKELY(getCurrentTSD()))
94 return;
95 initThread(Instance);
96 }
97
98 void disable() NO_THREAD_SAFETY_ANALYSIS {
99 Mutex.lock();
100 for (u32 I = 0; I < TSDsArraySize; I++)
101 TSDs[I].lock();
102 }
103
104 void enable() NO_THREAD_SAFETY_ANALYSIS {
105 for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--)
106 TSDs[I].unlock();
107 Mutex.unlock();
108 }
109
110 bool setOption(Option O, sptr Value) {
111 if (O == Option::MaxTSDsCount) {
112 ScopedLock L(Mutex);
113 return setNumberOfTSDs(static_cast<u32>(Value));
114 }
115 if (O == Option::ThreadDisableMemInit)
116 setDisableMemInit(Value);
117 // Not supported by the TSD Registry, but not an error either.
118 return true;
119 }
120
121 bool getDisableMemInit() const { return *getTlsPtr() & 1; }
122
123 void getStats(ScopedString *Str) {
124 u32 TotalTSDs = atomic_load_relaxed(A: &NumberOfTSDs);
125 Str->append(Format: "Stats: SharedTSDs: %u available; total %u\n", TotalTSDs,
126 TSDsArraySize);
127 for (uptr I = 0; I < TotalTSDs; ++I) {
128 TSDs[I].lock();
129 // Theoretically, we want to mark TSD::lock()/TSD::unlock() with proper
130 // thread annotations. However, given the TSD is only locked on shared
131 // path, do the assertion in a separate path to avoid confusing the
132 // analyzer.
133 TSDs[I].assertLocked(/*BypassCheck=*/true);
134 Str->append(Format: " Shared TSD[%zu]:\n", I);
135 TSDs[I].getSizeClassAllocator().getStats(Str);
136 TSDs[I].unlock();
137 }
138 }
139
140private:
141 ALWAYS_INLINE TSD<Allocator> *getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS {
142 TSD<Allocator> *TSD = getCurrentTSD();
143 DCHECK(TSD);
144 // Try to lock the currently associated context.
145 if (TSD->tryLock())
146 return TSD;
147 // If that fails, go down the slow path.
148 if (TSDsArraySize == 1U) {
149 // Only 1 TSD, not need to go any further.
150 // The compiler will optimize this one way or the other.
151 TSD->lock();
152 return TSD;
153 }
154 return getTSDSlow(CurrentTSD: TSD);
155 }
156
157 ALWAYS_INLINE uptr *getTlsPtr() const {
158#if SCUDO_HAS_PLATFORM_TLS_SLOT
159 return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot());
160#else
161 static thread_local uptr ThreadTSD;
162 return &ThreadTSD;
163#endif
164 }
165
166 static_assert(alignof(TSD<Allocator>) >= 2, "");
167
168 ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) {
169 *getTlsPtr() &= 1;
170 *getTlsPtr() |= reinterpret_cast<uptr>(CurrentTSD);
171 }
172
173 ALWAYS_INLINE TSD<Allocator> *getCurrentTSD() {
174 return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL);
175 }
176
177 // Requires a lock to avoid multiple threads trying to set the TSDs at once.
178 bool setNumberOfTSDs(u32 N) REQUIRES(Mutex) {
179 const u32 TotalTSDs = atomic_load_relaxed(A: &NumberOfTSDs);
180 // In order to avoid needing locks for these values, the number of TSDs
181 // can never be decreased.
182 if (N < TotalTSDs)
183 return false;
184
185 if (N > TSDsArraySize)
186 N = TSDsArraySize;
187
188 // In order to avoid locks around the CoPrimes, compute all of the
189 // new values and then set them in place after.
190 u32 NewCoPrimes[TSDsArraySize] = {};
191 u32 NewNumberOfCoPrimes = 0;
192
193 // Compute all the coprimes of the TSDs. This will be used to walk the
194 // array of TSDs in a random order. For details, see:
195 // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/
196 for (u32 I = 0; I < N; I++) {
197 u32 A = I + 1;
198 u32 B = N;
199 // Find the GCD between I + 1 and N. If 1, they are coprimes.
200 while (B != 0) {
201 const u32 T = A;
202 A = B;
203 B = T % B;
204 }
205 if (A == 1)
206 NewCoPrimes[NewNumberOfCoPrimes++] = I + 1;
207 }
208
209 // Set these values in this particular order so that if getTSDSlow
210 // is called while they are being modified, nothing crashes, or encounters
211 // a zero CoPrime value.
212 for (u32 I = 0; I < NewNumberOfCoPrimes; I++) {
213 atomic_store(&CoPrimes[I], NewCoPrimes[I], memory_order_release);
214 }
215 atomic_store(A: &NumberOfCoPrimes, V: NewNumberOfCoPrimes, MO: memory_order_release);
216 atomic_store(A: &NumberOfTSDs, V: N, MO: memory_order_release);
217 return true;
218 }
219
220 void setDisableMemInit(bool B) {
221 *getTlsPtr() &= ~1ULL;
222 *getTlsPtr() |= B;
223 }
224
225 NOINLINE void initThread(Allocator *Instance) {
226 initOnceMaybe(Instance);
227 // Initial context assignment is done in a plain round-robin fashion.
228 const u32 Index = atomic_fetch_add(A: &CurrentIndex, V: 1U, MO: memory_order_relaxed);
229 setCurrentTSD(&TSDs[Index % atomic_load_relaxed(A: &NumberOfTSDs)]);
230 Instance->callPostInitCallback();
231 }
232
233 // TSDs is an array of locks which is not supported for marking thread-safety
234 // capability.
235 NOINLINE TSD<Allocator> *getTSDSlow(TSD<Allocator> *CurrentTSD) {
236 const u32 TotalTSDs = atomic_load_relaxed(A: &NumberOfTSDs);
237 if (UNLIKELY(TotalTSDs <= 1U)) {
238 CurrentTSD->lock();
239 return CurrentTSD;
240 }
241
242 // Use the Precedence of the current TSD as our random seed. Since we are
243 // in the slow path, it means that tryLock failed, and as a result it's
244 // very likely that said Precedence is non-zero.
245 const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
246 const u32 TotalCoPrimes = atomic_load_relaxed(A: &NumberOfCoPrimes);
247 DCHECK_NE(TotalCoPrimes, 0U);
248 const u32 Inc = atomic_load_relaxed(&CoPrimes[R % TotalCoPrimes]);
249
250 u32 Index = R % TotalTSDs;
251 uptr LowestPrecedence = UINTPTR_MAX;
252 TSD<Allocator> *CandidateTSD = nullptr;
253 // Go randomly through at most 4 contexts and find a candidate.
254 for (u32 I = 0; I < Min(A: 4U, B: TotalTSDs); I++) {
255 if (TSDs[Index].tryLock()) {
256 setCurrentTSD(&TSDs[Index]);
257 return &TSDs[Index];
258 }
259 const uptr Precedence = TSDs[Index].getPrecedence();
260 // A 0 precedence here means another thread just locked this TSD.
261 if (Precedence && Precedence < LowestPrecedence) {
262 CandidateTSD = &TSDs[Index];
263 LowestPrecedence = Precedence;
264 }
265 Index += Inc;
266 if (Index >= TotalTSDs)
267 Index -= TotalTSDs;
268 }
269 if (CandidateTSD) {
270 CandidateTSD->lock();
271 setCurrentTSD(CandidateTSD);
272 return CandidateTSD;
273 }
274 // Last resort, stick with the current one.
275 CurrentTSD->lock();
276 return CurrentTSD;
277 }
278
279 atomic_u32 CurrentIndex = {};
280 atomic_u32 NumberOfTSDs = {};
281 atomic_u32 NumberOfCoPrimes = {};
282 atomic_u32 CoPrimes[TSDsArraySize] = {};
283 atomic_u8 Initialized = {};
284 // Used for global initialization and TSDs access.
285 // Acquiring the global initialization should only lock once in normal
286 // operation, which is why using it for TSDs access should not cause
287 // any interference.
288 HybridMutex Mutex;
289 TSD<Allocator> TSDs[TSDsArraySize];
290};
291
292} // namespace scudo
293
294#endif // SCUDO_TSD_SHARED_H_
295