| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
| 10 | #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
| 11 | |
| 12 | /* cxa_guard_impl.h - Implements the C++ runtime support for function local |
| 13 | * static guards. |
| 14 | * The layout of the guard object is the same across ARM and Itanium. |
| 15 | * |
| 16 | * The first "guard byte" (which is checked by the compiler) is set only upon |
| 17 | * the completion of cxa release. |
| 18 | * |
| 19 | * The second "init byte" does the rest of the bookkeeping. It tracks if |
| 20 | * initialization is complete or pending, and if there are waiting threads. |
| 21 | * |
| 22 | * If the guard variable is 64-bits and the platforms supplies a 32-bit thread |
| 23 | * identifier, it is used to detect recursive initialization. The thread ID of |
| 24 | * the thread currently performing initialization is stored in the second word. |
| 25 | * |
| 26 | * Guard Object Layout: |
| 27 | * --------------------------------------------------------------------------- |
| 28 | * | a+0: guard byte | a+1: init byte | a+2: unused ... | a+4: thread-id ... | |
| 29 | * --------------------------------------------------------------------------- |
| 30 | * |
| 31 | * Note that we don't do what the ABI docs suggest (put a mutex in the guard |
| 32 | * object which we acquire in cxa_guard_acquire and release in |
| 33 | * cxa_guard_release). Instead we use the init byte to imitate that behaviour, |
| 34 | * but without actually holding anything mutex related between aquire and |
| 35 | * release/abort. |
| 36 | * |
| 37 | * Access Protocol: |
| 38 | * For each implementation the guard byte is checked and set before accessing |
| 39 | * the init byte. |
| 40 | * |
| 41 | * Overall Design: |
| 42 | * The implementation was designed to allow each implementation to be tested |
| 43 | * independent of the C++ runtime or platform support. |
| 44 | * |
| 45 | */ |
| 46 | |
| 47 | #include "__cxxabi_config.h" |
| 48 | #include "include/atomic_support.h" // from libc++ |
| 49 | #if defined(__has_include) |
| 50 | # if __has_include(<sys/futex.h>) |
| 51 | # include <sys/futex.h> |
| 52 | # endif |
| 53 | # if __has_include(<sys/syscall.h>) |
| 54 | # include <sys/syscall.h> |
| 55 | # endif |
| 56 | # if __has_include(<unistd.h>) |
| 57 | # include <unistd.h> |
| 58 | # endif |
| 59 | #endif |
| 60 | |
| 61 | #include <__thread/support.h> |
| 62 | #include <cstdint> |
| 63 | #include <cstring> |
| 64 | #include <limits.h> |
| 65 | #include <stdlib.h> |
| 66 | |
| 67 | #ifndef _LIBCXXABI_HAS_NO_THREADS |
| 68 | # if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) |
| 69 | # pragma comment(lib, "pthread") |
| 70 | # endif |
| 71 | #endif |
| 72 | |
| 73 | #if defined(__clang__) |
| 74 | # pragma clang diagnostic push |
| 75 | # pragma clang diagnostic ignored "-Wtautological-pointer-compare" |
| 76 | #elif defined(__GNUC__) |
| 77 | # pragma GCC diagnostic push |
| 78 | # pragma GCC diagnostic ignored "-Waddress" |
| 79 | #endif |
| 80 | |
| 81 | // To make testing possible, this header is included from both cxa_guard.cpp |
| 82 | // and a number of tests. |
| 83 | // |
| 84 | // For this reason we place everything in an anonymous namespace -- even though |
| 85 | // we're in a header. We want the actual implementation and the tests to have |
| 86 | // unique definitions of the types in this header (since the tests may depend |
| 87 | // on function local statics). |
| 88 | // |
| 89 | // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be |
| 90 | // defined when including this file. Only `src/cxa_guard.cpp` should define |
| 91 | // the former. |
| 92 | #ifdef BUILDING_CXA_GUARD |
| 93 | # include "abort_message.h" |
| 94 | # define ABORT_WITH_MESSAGE(...) ::__abort_message(__VA_ARGS__) |
| 95 | #elif defined(TESTING_CXA_GUARD) |
| 96 | # define ABORT_WITH_MESSAGE(...) ::abort() |
| 97 | #else |
| 98 | # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined" |
| 99 | #endif |
| 100 | |
| 101 | #if __has_feature(thread_sanitizer) |
| 102 | extern "C" void __tsan_acquire(void*); |
| 103 | extern "C" void __tsan_release(void*); |
| 104 | #else |
| 105 | # define __tsan_acquire(addr) ((void)0) |
| 106 | # define __tsan_release(addr) ((void)0) |
| 107 | #endif |
| 108 | |
| 109 | namespace __cxxabiv1 { |
| 110 | // Use an anonymous namespace to ensure that the tests and actual implementation |
| 111 | // have unique definitions of these symbols. |
| 112 | namespace { |
| 113 | |
| 114 | //===----------------------------------------------------------------------===// |
| 115 | // Misc Utilities |
| 116 | //===----------------------------------------------------------------------===// |
| 117 | |
| 118 | template <class T, T (*Init)()> |
| 119 | struct LazyValue { |
| 120 | LazyValue() : is_init(false) {} |
| 121 | |
| 122 | T& get() { |
| 123 | if (!is_init) { |
| 124 | value = Init(); |
| 125 | is_init = true; |
| 126 | } |
| 127 | return value; |
| 128 | } |
| 129 | |
| 130 | private: |
| 131 | T value; |
| 132 | bool is_init = false; |
| 133 | }; |
| 134 | |
| 135 | template <class IntType> |
| 136 | class AtomicInt { |
| 137 | public: |
| 138 | using MemoryOrder = std::__libcpp_atomic_order; |
| 139 | |
| 140 | explicit AtomicInt(IntType* b) : b_(b) {} |
| 141 | AtomicInt(AtomicInt const&) = delete; |
| 142 | AtomicInt& operator=(AtomicInt const&) = delete; |
| 143 | |
| 144 | IntType load(MemoryOrder ord) { return std::__libcpp_atomic_load(b_, ord); } |
| 145 | void store(IntType val, MemoryOrder ord) { std::__libcpp_atomic_store(b_, val, ord); } |
| 146 | IntType exchange(IntType new_val, MemoryOrder ord) { return std::__libcpp_atomic_exchange(b_, new_val, ord); } |
| 147 | bool compare_exchange(IntType* expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) { |
| 148 | return std::__libcpp_atomic_compare_exchange(b_, expected, desired, ord_success, ord_failure); |
| 149 | } |
| 150 | |
| 151 | private: |
| 152 | IntType* b_; |
| 153 | }; |
| 154 | |
| 155 | //===----------------------------------------------------------------------===// |
| 156 | // PlatformGetThreadID |
| 157 | //===----------------------------------------------------------------------===// |
| 158 | |
| 159 | #if defined(__APPLE__) && _LIBCPP_HAS_THREAD_API_PTHREAD |
| 160 | uint32_t PlatformThreadID() { |
| 161 | static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "" ); |
| 162 | return static_cast<uint32_t>(pthread_mach_thread_np(std::__libcpp_thread_get_current_id())); |
| 163 | } |
| 164 | #elif defined(SYS_gettid) && _LIBCPP_HAS_THREAD_API_PTHREAD |
| 165 | uint32_t PlatformThreadID() { |
| 166 | static_assert(sizeof(pid_t) == sizeof(uint32_t), "" ); |
| 167 | return static_cast<uint32_t>(syscall(SYS_gettid)); |
| 168 | } |
| 169 | #else |
| 170 | constexpr uint32_t (*PlatformThreadID)() = nullptr; |
| 171 | #endif |
| 172 | |
| 173 | //===----------------------------------------------------------------------===// |
| 174 | // GuardByte |
| 175 | //===----------------------------------------------------------------------===// |
| 176 | |
| 177 | static constexpr uint8_t UNSET = 0; |
| 178 | static constexpr uint8_t COMPLETE_BIT = (1 << 0); |
| 179 | static constexpr uint8_t PENDING_BIT = (1 << 1); |
| 180 | static constexpr uint8_t WAITING_BIT = (1 << 2); |
| 181 | |
| 182 | /// Manages reads and writes to the guard byte. |
| 183 | struct GuardByte { |
| 184 | GuardByte() = delete; |
| 185 | GuardByte(GuardByte const&) = delete; |
| 186 | GuardByte& operator=(GuardByte const&) = delete; |
| 187 | |
| 188 | explicit GuardByte(uint8_t* const guard_byte_address) : guard_byte(guard_byte_address) {} |
| 189 | |
| 190 | public: |
| 191 | /// The guard byte portion of cxa_guard_acquire. Returns true if |
| 192 | /// initialization has already been completed. |
| 193 | bool acquire() { |
| 194 | // if guard_byte is non-zero, we have already completed initialization |
| 195 | // (i.e. release has been called) |
| 196 | return guard_byte.load(ord: std::_AO_Acquire) != UNSET; |
| 197 | } |
| 198 | |
| 199 | /// The guard byte portion of cxa_guard_release. |
| 200 | void release() { guard_byte.store(val: COMPLETE_BIT, ord: std::_AO_Release); } |
| 201 | |
| 202 | /// The guard byte portion of cxa_guard_abort. |
| 203 | void abort() {} // Nothing to do |
| 204 | |
| 205 | private: |
| 206 | AtomicInt<uint8_t> guard_byte; |
| 207 | }; |
| 208 | |
| 209 | //===----------------------------------------------------------------------===// |
| 210 | // InitByte Implementations |
| 211 | //===----------------------------------------------------------------------===// |
| 212 | // |
| 213 | // Each initialization byte implementation supports the following methods: |
| 214 | // |
| 215 | // InitByte(uint8_t* _init_byte_address, uint32_t* _thread_id_address) |
| 216 | // Construct the InitByte object, initializing our member variables |
| 217 | // |
| 218 | // bool acquire() |
| 219 | // Called before we start the initialization. Check if someone else has already started, and if |
| 220 | // not to signal our intent to start it ourselves. We determine the current status from the init |
| 221 | // byte, which is one of 4 possible values: |
| 222 | // COMPLETE: Initialization was finished by somebody else. Return true. |
| 223 | // PENDING: Somebody has started the initialization already, set the WAITING bit, |
| 224 | // then wait for the init byte to get updated with a new value. |
| 225 | // (PENDING|WAITING): Somebody has started the initialization already, and we're not the |
| 226 | // first one waiting. Wait for the init byte to get updated. |
| 227 | // UNSET: Initialization hasn't successfully completed, and nobody is currently |
| 228 | // performing the initialization. Set the PENDING bit to indicate our |
| 229 | // intention to start the initialization, and return false. |
| 230 | // The return value indicates whether initialization has already been completed. |
| 231 | // |
| 232 | // void release() |
| 233 | // Called after successfully completing the initialization. Update the init byte to reflect |
| 234 | // that, then if anybody else is waiting, wake them up. |
| 235 | // |
| 236 | // void abort() |
| 237 | // Called after an error is thrown during the initialization. Reset the init byte to UNSET to |
| 238 | // indicate that we're no longer performing the initialization, then if anybody is waiting, wake |
| 239 | // them up so they can try performing the initialization. |
| 240 | // |
| 241 | |
| 242 | //===----------------------------------------------------------------------===// |
| 243 | // Single Threaded Implementation |
| 244 | //===----------------------------------------------------------------------===// |
| 245 | |
| 246 | /// InitByteNoThreads - Doesn't use any inter-thread synchronization when |
| 247 | /// managing reads and writes to the init byte. |
| 248 | struct InitByteNoThreads { |
| 249 | InitByteNoThreads() = delete; |
| 250 | InitByteNoThreads(InitByteNoThreads const&) = delete; |
| 251 | InitByteNoThreads& operator=(InitByteNoThreads const&) = delete; |
| 252 | |
| 253 | explicit InitByteNoThreads(uint8_t* _init_byte_address, uint32_t*) : init_byte_address(_init_byte_address) {} |
| 254 | |
| 255 | /// The init byte portion of cxa_guard_acquire. Returns true if |
| 256 | /// initialization has already been completed. |
| 257 | bool acquire() { |
| 258 | if (*init_byte_address == COMPLETE_BIT) |
| 259 | return true; |
| 260 | if (*init_byte_address & PENDING_BIT) |
| 261 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?" ); |
| 262 | *init_byte_address = PENDING_BIT; |
| 263 | return false; |
| 264 | } |
| 265 | |
| 266 | /// The init byte portion of cxa_guard_release. |
| 267 | void release() { *init_byte_address = COMPLETE_BIT; } |
| 268 | /// The init byte portion of cxa_guard_abort. |
| 269 | void abort() { *init_byte_address = UNSET; } |
| 270 | |
| 271 | private: |
| 272 | /// The address of the byte used during initialization. |
| 273 | uint8_t* const init_byte_address; |
| 274 | }; |
| 275 | |
| 276 | //===----------------------------------------------------------------------===// |
| 277 | // Global Mutex Implementation |
| 278 | //===----------------------------------------------------------------------===// |
| 279 | |
| 280 | struct LibcppMutex; |
| 281 | struct LibcppCondVar; |
| 282 | |
| 283 | #ifndef _LIBCXXABI_HAS_NO_THREADS |
| 284 | struct LibcppMutex { |
| 285 | LibcppMutex() = default; |
| 286 | LibcppMutex(LibcppMutex const&) = delete; |
| 287 | LibcppMutex& operator=(LibcppMutex const&) = delete; |
| 288 | |
| 289 | bool lock() { return std::__libcpp_mutex_lock(m: &mutex); } |
| 290 | bool unlock() { return std::__libcpp_mutex_unlock(m: &mutex); } |
| 291 | |
| 292 | private: |
| 293 | friend struct LibcppCondVar; |
| 294 | std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER; |
| 295 | }; |
| 296 | |
| 297 | struct LibcppCondVar { |
| 298 | LibcppCondVar() = default; |
| 299 | LibcppCondVar(LibcppCondVar const&) = delete; |
| 300 | LibcppCondVar& operator=(LibcppCondVar const&) = delete; |
| 301 | |
| 302 | bool wait(LibcppMutex& mut) { return std::__libcpp_condvar_wait(cv: &cond, m: &mut.mutex); } |
| 303 | bool broadcast() { return std::__libcpp_condvar_broadcast(cv: &cond); } |
| 304 | |
| 305 | private: |
| 306 | std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER; |
| 307 | }; |
| 308 | #else |
| 309 | struct LibcppMutex {}; |
| 310 | struct LibcppCondVar {}; |
| 311 | #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) |
| 312 | |
| 313 | /// InitByteGlobalMutex - Uses a global mutex and condition variable (common to |
| 314 | /// all static local variables) to manage reads and writes to the init byte. |
| 315 | template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond, |
| 316 | uint32_t (*GetThreadID)() = PlatformThreadID> |
| 317 | struct InitByteGlobalMutex { |
| 318 | |
| 319 | explicit InitByteGlobalMutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) |
| 320 | : init_byte_address(_init_byte_address), thread_id_address(_thread_id_address), |
| 321 | has_thread_id_support(_thread_id_address != nullptr && GetThreadID != nullptr) {} |
| 322 | |
| 323 | public: |
| 324 | /// The init byte portion of cxa_guard_acquire. Returns true if |
| 325 | /// initialization has already been completed. |
| 326 | bool acquire() { |
| 327 | LockGuard g("__cxa_guard_acquire" ); |
| 328 | // Check for possible recursive initialization. |
| 329 | if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { |
| 330 | if (*thread_id_address == current_thread_id.get()) |
| 331 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?" ); |
| 332 | } |
| 333 | |
| 334 | // Wait until the pending bit is not set. |
| 335 | while (*init_byte_address & PENDING_BIT) { |
| 336 | *init_byte_address |= WAITING_BIT; |
| 337 | global_cond.wait(global_mutex); |
| 338 | } |
| 339 | |
| 340 | if (*init_byte_address == COMPLETE_BIT) |
| 341 | return true; |
| 342 | |
| 343 | if (has_thread_id_support) |
| 344 | *thread_id_address = current_thread_id.get(); |
| 345 | |
| 346 | *init_byte_address = PENDING_BIT; |
| 347 | return false; |
| 348 | } |
| 349 | |
| 350 | /// The init byte portion of cxa_guard_release. |
| 351 | void release() { |
| 352 | bool has_waiting; |
| 353 | { |
| 354 | LockGuard g("__cxa_guard_release" ); |
| 355 | has_waiting = *init_byte_address & WAITING_BIT; |
| 356 | *init_byte_address = COMPLETE_BIT; |
| 357 | } |
| 358 | if (has_waiting) { |
| 359 | if (global_cond.broadcast()) { |
| 360 | ABORT_WITH_MESSAGE("%s failed to broadcast" , "__cxa_guard_release" ); |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | /// The init byte portion of cxa_guard_abort. |
| 366 | void abort() { |
| 367 | bool has_waiting; |
| 368 | { |
| 369 | LockGuard g("__cxa_guard_abort" ); |
| 370 | if (has_thread_id_support) |
| 371 | *thread_id_address = 0; |
| 372 | has_waiting = *init_byte_address & WAITING_BIT; |
| 373 | *init_byte_address = UNSET; |
| 374 | } |
| 375 | if (has_waiting) { |
| 376 | if (global_cond.broadcast()) { |
| 377 | ABORT_WITH_MESSAGE("%s failed to broadcast" , "__cxa_guard_abort" ); |
| 378 | } |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | private: |
| 383 | /// The address of the byte used during initialization. |
| 384 | uint8_t* const init_byte_address; |
| 385 | /// An optional address storing an identifier for the thread performing initialization. |
| 386 | /// It's used to detect recursive initialization. |
| 387 | uint32_t* const thread_id_address; |
| 388 | |
| 389 | const bool has_thread_id_support; |
| 390 | LazyValue<uint32_t, GetThreadID> current_thread_id; |
| 391 | |
| 392 | private: |
| 393 | struct LockGuard { |
| 394 | LockGuard() = delete; |
| 395 | LockGuard(LockGuard const&) = delete; |
| 396 | LockGuard& operator=(LockGuard const&) = delete; |
| 397 | |
| 398 | explicit LockGuard(const char* calling_func) : calling_func_(calling_func) { |
| 399 | if (global_mutex.lock()) |
| 400 | ABORT_WITH_MESSAGE("%s failed to acquire mutex" , calling_func_); |
| 401 | } |
| 402 | |
| 403 | ~LockGuard() { |
| 404 | if (global_mutex.unlock()) |
| 405 | ABORT_WITH_MESSAGE("%s failed to release mutex" , calling_func_); |
| 406 | } |
| 407 | |
| 408 | private: |
| 409 | const char* const calling_func_; |
| 410 | }; |
| 411 | }; |
| 412 | |
| 413 | //===----------------------------------------------------------------------===// |
| 414 | // Futex Implementation |
| 415 | //===----------------------------------------------------------------------===// |
| 416 | |
| 417 | #if defined(__OpenBSD__) |
| 418 | void PlatformFutexWait(int* addr, int expect) { |
| 419 | constexpr int WAIT = 0; |
| 420 | futex(reinterpret_cast<volatile uint32_t*>(addr), WAIT, expect, NULL, NULL); |
| 421 | __tsan_acquire(addr); |
| 422 | } |
| 423 | void PlatformFutexWake(int* addr) { |
| 424 | constexpr int WAKE = 1; |
| 425 | __tsan_release(addr); |
| 426 | futex(reinterpret_cast<volatile uint32_t*>(addr), WAKE, INT_MAX, NULL, NULL); |
| 427 | } |
| 428 | #elif defined(SYS_futex) |
| 429 | void PlatformFutexWait(int* addr, int expect) { |
| 430 | constexpr int WAIT = 0; |
| 431 | syscall(SYS_futex, addr, WAIT, expect, 0); |
| 432 | __tsan_acquire(addr); |
| 433 | } |
| 434 | void PlatformFutexWake(int* addr) { |
| 435 | constexpr int WAKE = 1; |
| 436 | __tsan_release(addr); |
| 437 | syscall(SYS_futex, addr, WAKE, INT_MAX); |
| 438 | } |
| 439 | #else |
| 440 | constexpr void (*PlatformFutexWait)(int*, int) = nullptr; |
| 441 | constexpr void (*PlatformFutexWake)(int*) = nullptr; |
| 442 | #endif |
| 443 | |
| 444 | constexpr bool PlatformSupportsFutex() { return +PlatformFutexWait != nullptr; } |
| 445 | |
| 446 | /// InitByteFutex - Uses a futex to manage reads and writes to the init byte. |
| 447 | template <void (*Wait)(int*, int) = PlatformFutexWait, void (*Wake)(int*) = PlatformFutexWake, |
| 448 | uint32_t (*GetThreadIDArg)() = PlatformThreadID> |
| 449 | struct InitByteFutex { |
| 450 | |
| 451 | explicit InitByteFutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) |
| 452 | : init_byte(_init_byte_address), |
| 453 | has_thread_id_support(_thread_id_address != nullptr && GetThreadIDArg != nullptr), |
| 454 | thread_id(_thread_id_address), |
| 455 | base_address(reinterpret_cast<int*>(/*_init_byte_address & ~0x3*/ _init_byte_address - 1)) {} |
| 456 | |
| 457 | public: |
| 458 | /// The init byte portion of cxa_guard_acquire. Returns true if |
| 459 | /// initialization has already been completed. |
| 460 | bool acquire() { |
| 461 | while (true) { |
| 462 | uint8_t last_val = UNSET; |
| 463 | if (init_byte.compare_exchange(expected: &last_val, desired: PENDING_BIT, ord_success: std::_AO_Acq_Rel, ord_failure: std::_AO_Acquire)) { |
| 464 | if (has_thread_id_support) { |
| 465 | thread_id.store(val: current_thread_id.get(), ord: std::_AO_Relaxed); |
| 466 | } |
| 467 | return false; |
| 468 | } |
| 469 | |
| 470 | if (last_val == COMPLETE_BIT) |
| 471 | return true; |
| 472 | |
| 473 | if (last_val & PENDING_BIT) { |
| 474 | |
| 475 | // Check for recursive initialization |
| 476 | if (has_thread_id_support && thread_id.load(ord: std::_AO_Relaxed) == current_thread_id.get()) { |
| 477 | ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?" ); |
| 478 | } |
| 479 | |
| 480 | if ((last_val & WAITING_BIT) == 0) { |
| 481 | // This compare exchange can fail for several reasons |
| 482 | // (1) another thread finished the whole thing before we got here |
| 483 | // (2) another thread set the waiting bit we were trying to thread |
| 484 | // (3) another thread had an exception and failed to finish |
| 485 | if (!init_byte.compare_exchange(expected: &last_val, desired: PENDING_BIT | WAITING_BIT, ord_success: std::_AO_Acq_Rel, ord_failure: std::_AO_Release)) { |
| 486 | // (1) success, via someone else's work! |
| 487 | if (last_val == COMPLETE_BIT) |
| 488 | return true; |
| 489 | |
| 490 | // (3) someone else, bailed on doing the work, retry from the start! |
| 491 | if (last_val == UNSET) |
| 492 | continue; |
| 493 | |
| 494 | // (2) the waiting bit got set, so we are happy to keep waiting |
| 495 | } |
| 496 | } |
| 497 | wait_on_initialization(); |
| 498 | } |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | /// The init byte portion of cxa_guard_release. |
| 503 | void release() { |
| 504 | uint8_t old = init_byte.exchange(new_val: COMPLETE_BIT, ord: std::_AO_Acq_Rel); |
| 505 | if (old & WAITING_BIT) |
| 506 | wake_all(); |
| 507 | } |
| 508 | |
| 509 | /// The init byte portion of cxa_guard_abort. |
| 510 | void abort() { |
| 511 | if (has_thread_id_support) |
| 512 | thread_id.store(val: 0, ord: std::_AO_Relaxed); |
| 513 | |
| 514 | uint8_t old = init_byte.exchange(new_val: UNSET, ord: std::_AO_Acq_Rel); |
| 515 | if (old & WAITING_BIT) |
| 516 | wake_all(); |
| 517 | } |
| 518 | |
| 519 | private: |
| 520 | /// Use the futex to wait on the current guard variable. Futex expects a |
| 521 | /// 32-bit 4-byte aligned address as the first argument, so we use the 4-byte |
| 522 | /// aligned address that encompasses the init byte (i.e. the address of the |
| 523 | /// raw guard object that was passed to __cxa_guard_acquire/release/abort). |
| 524 | void wait_on_initialization() { Wait(base_address, expected_value_for_futex(b: PENDING_BIT | WAITING_BIT)); } |
| 525 | void wake_all() { Wake(base_address); } |
| 526 | |
| 527 | private: |
| 528 | AtomicInt<uint8_t> init_byte; |
| 529 | |
| 530 | const bool has_thread_id_support; |
| 531 | // Unsafe to use unless has_thread_id_support |
| 532 | AtomicInt<uint32_t> thread_id; |
| 533 | LazyValue<uint32_t, GetThreadIDArg> current_thread_id; |
| 534 | |
| 535 | /// the 4-byte-aligned address that encompasses the init byte (i.e. the |
| 536 | /// address of the raw guard object). |
| 537 | int* const base_address; |
| 538 | |
| 539 | /// Create the expected integer value for futex `wait(int* addr, int expected)`. |
| 540 | /// We pass the base address as the first argument, So this function creates |
| 541 | /// an zero-initialized integer with `b` copied at the correct offset. |
| 542 | static int expected_value_for_futex(uint8_t b) { |
| 543 | int dest_val = 0; |
| 544 | std::memcpy(dest: reinterpret_cast<char*>(&dest_val) + 1, src: &b, n: 1); |
| 545 | return dest_val; |
| 546 | } |
| 547 | |
| 548 | static_assert(Wait != nullptr && Wake != nullptr, "" ); |
| 549 | }; |
| 550 | |
| 551 | //===----------------------------------------------------------------------===// |
| 552 | // GuardObject |
| 553 | //===----------------------------------------------------------------------===// |
| 554 | |
| 555 | enum class AcquireResult { |
| 556 | INIT_IS_DONE, |
| 557 | INIT_IS_PENDING, |
| 558 | }; |
| 559 | constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; |
| 560 | constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; |
| 561 | |
| 562 | /// Co-ordinates between GuardByte and InitByte. |
| 563 | template <class InitByteT> |
| 564 | struct GuardObject { |
| 565 | GuardObject() = delete; |
| 566 | GuardObject(GuardObject const&) = delete; |
| 567 | GuardObject& operator=(GuardObject const&) = delete; |
| 568 | |
| 569 | private: |
| 570 | GuardByte guard_byte; |
| 571 | InitByteT init_byte; |
| 572 | |
| 573 | public: |
| 574 | /// ARM Constructor |
| 575 | explicit GuardObject(uint32_t* raw_guard_object) |
| 576 | : guard_byte(reinterpret_cast<uint8_t*>(raw_guard_object)), |
| 577 | init_byte(reinterpret_cast<uint8_t*>(raw_guard_object) + 1, nullptr) {} |
| 578 | |
| 579 | /// Itanium Constructor |
| 580 | explicit GuardObject(uint64_t* raw_guard_object) |
| 581 | : guard_byte(reinterpret_cast<uint8_t*>(raw_guard_object)), |
| 582 | init_byte(reinterpret_cast<uint8_t*>(raw_guard_object) + 1, reinterpret_cast<uint32_t*>(raw_guard_object) + 1) { |
| 583 | } |
| 584 | |
| 585 | /// Implements __cxa_guard_acquire. |
| 586 | AcquireResult cxa_guard_acquire() { |
| 587 | // Use short-circuit evaluation to avoid calling init_byte.acquire when |
| 588 | // guard_byte.acquire returns true. (i.e. don't call it when we know from |
| 589 | // the guard byte that initialization has already been completed) |
| 590 | if (guard_byte.acquire() || init_byte.acquire()) |
| 591 | return INIT_IS_DONE; |
| 592 | return INIT_IS_PENDING; |
| 593 | } |
| 594 | |
| 595 | /// Implements __cxa_guard_release. |
| 596 | void cxa_guard_release() { |
| 597 | // Update guard byte first, so if somebody is woken up by init_byte.release |
| 598 | // and comes all the way back around to __cxa_guard_acquire again, they see |
| 599 | // it as having completed initialization. |
| 600 | guard_byte.release(); |
| 601 | init_byte.release(); |
| 602 | } |
| 603 | |
| 604 | /// Implements __cxa_guard_abort. |
| 605 | void cxa_guard_abort() { |
| 606 | guard_byte.abort(); |
| 607 | init_byte.abort(); |
| 608 | } |
| 609 | }; |
| 610 | |
| 611 | //===----------------------------------------------------------------------===// |
| 612 | // Convenience Classes |
| 613 | //===----------------------------------------------------------------------===// |
| 614 | |
| 615 | /// NoThreadsGuard - Manages initialization without performing any inter-thread |
| 616 | /// synchronization. |
| 617 | using NoThreadsGuard = GuardObject<InitByteNoThreads>; |
| 618 | |
| 619 | /// GlobalMutexGuard - Manages initialization using a global mutex and |
| 620 | /// condition variable. |
| 621 | template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond, |
| 622 | uint32_t (*GetThreadID)() = PlatformThreadID> |
| 623 | using GlobalMutexGuard = GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond, GetThreadID>>; |
| 624 | |
| 625 | /// FutexGuard - Manages initialization using atomics and the futex syscall for |
| 626 | /// waiting and waking. |
| 627 | template <void (*Wait)(int*, int) = PlatformFutexWait, void (*Wake)(int*) = PlatformFutexWake, |
| 628 | uint32_t (*GetThreadIDArg)() = PlatformThreadID> |
| 629 | using FutexGuard = GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>>; |
| 630 | |
| 631 | //===----------------------------------------------------------------------===// |
| 632 | // |
| 633 | //===----------------------------------------------------------------------===// |
| 634 | |
| 635 | template <class T> |
| 636 | struct GlobalStatic { |
| 637 | static T instance; |
| 638 | }; |
| 639 | template <class T> |
| 640 | _LIBCPP_CONSTINIT T GlobalStatic<T>::instance = {}; |
| 641 | |
| 642 | enum class Implementation { NoThreads, GlobalMutex, Futex }; |
| 643 | |
| 644 | template <Implementation Impl> |
| 645 | struct SelectImplementation; |
| 646 | |
| 647 | template <> |
| 648 | struct SelectImplementation<Implementation::NoThreads> { |
| 649 | using type = NoThreadsGuard; |
| 650 | }; |
| 651 | |
| 652 | template <> |
| 653 | struct SelectImplementation<Implementation::GlobalMutex> { |
| 654 | using type = GlobalMutexGuard<LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance, |
| 655 | GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>; |
| 656 | }; |
| 657 | |
| 658 | template <> |
| 659 | struct SelectImplementation<Implementation::Futex> { |
| 660 | using type = FutexGuard<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>; |
| 661 | }; |
| 662 | |
| 663 | // TODO(EricWF): We should prefer the futex implementation when available. But |
| 664 | // it should be done in a separate step from adding the implementation. |
| 665 | constexpr Implementation CurrentImplementation = |
| 666 | #if defined(_LIBCXXABI_HAS_NO_THREADS) |
| 667 | Implementation::NoThreads; |
| 668 | #elif defined(_LIBCXXABI_USE_FUTEX) |
| 669 | Implementation::Futex; |
| 670 | #else |
| 671 | Implementation::GlobalMutex; |
| 672 | #endif |
| 673 | |
| 674 | static_assert(CurrentImplementation != Implementation::Futex || PlatformSupportsFutex(), |
| 675 | "Futex selected but not supported" ); |
| 676 | |
| 677 | using SelectedImplementation = SelectImplementation<CurrentImplementation>::type; |
| 678 | |
| 679 | } // namespace |
| 680 | } // namespace __cxxabiv1 |
| 681 | |
| 682 | #if defined(__clang__) |
| 683 | # pragma clang diagnostic pop |
| 684 | #elif defined(__GNUC__) |
| 685 | # pragma GCC diagnostic pop |
| 686 | #endif |
| 687 | |
| 688 | #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H |
| 689 | |