1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <__thread/timed_backoff_policy.h>
10#include <atomic>
11#include <climits>
12#include <cstddef>
13#include <cstdint>
14#include <cstring>
15#include <functional>
16#include <new>
17#include <thread>
18#include <type_traits>
19
20#include "include/apple_availability.h"
21
22#ifdef __linux__
23
24# include <linux/futex.h>
25# include <sys/syscall.h>
26# include <unistd.h>
27
28// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures
29// with a 64 bit time_t, we need to specify SYS_futex_time64.
30# if !defined(SYS_futex) && defined(SYS_futex_time64)
31# define SYS_futex SYS_futex_time64
32# endif
33# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__)
34
35#elif defined(__FreeBSD__)
36
37# include <sys/types.h>
38# include <sys/umtx.h>
39
40# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__)
41
42#elif defined(__OpenBSD__)
43
44# include <sys/futex.h>
45
46// OpenBSD has no indirect syscalls
47# define _LIBCPP_FUTEX(...) futex(__VA_ARGS__)
48
49#elif defined(_WIN32)
50
51# include <memory>
52# include <windows.h>
53
54#else // <- Add other operating systems here
55
56// Baseline needs no new headers
57
58# define _LIBCPP_FUTEX(...) syscall(SYS_futex, __VA_ARGS__)
59
60#endif
61
62_LIBCPP_BEGIN_NAMESPACE_STD
63
64#ifdef __linux__
65
66template <std::size_t _Size>
67static void __platform_wait_on_address(void const* __ptr, void const* __val, uint64_t __timeout_ns) {
68 static_assert(_Size == 4, "Can only wait on 4 bytes value");
69 char buffer[_Size];
70 std::memcpy(dest: &buffer, src: const_cast<const void*>(__val), n: _Size);
71 static constexpr timespec __default_timeout = {.tv_sec: 2, .tv_nsec: 0};
72 timespec __timeout;
73 if (__timeout_ns == 0) {
74 __timeout = __default_timeout;
75 } else {
76 __timeout.tv_sec = __timeout_ns / 1'000'000'000;
77 __timeout.tv_nsec = __timeout_ns % 1'000'000'000;
78 }
79 _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, *reinterpret_cast<__cxx_contention_t const*>(&buffer), &__timeout, 0, 0);
80}
81
82template <std::size_t _Size>
83static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
84 static_assert(_Size == 4, "Can only wake up on 4 bytes value");
85 _LIBCPP_FUTEX(__ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0);
86}
87
88#elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)
89
90extern "C" int __ulock_wait(
91 uint32_t operation, void* addr, uint64_t value, uint32_t timeout); /* timeout is specified in microseconds */
92extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value);
93
94// https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/sys/ulock.h#L82
95# define UL_COMPARE_AND_WAIT 1
96# define UL_COMPARE_AND_WAIT64 5
97# define ULF_WAKE_ALL 0x00000100
98
99template <std::size_t _Size>
100static void __platform_wait_on_address(void const* __ptr, void const* __val, uint64_t __timeout_ns) {
101 static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
102 char buffer[_Size];
103 std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
104 auto __timeout_us = __timeout_ns == 0 ? 0 : static_cast<uint32_t>(__timeout_ns / 1000);
105 if constexpr (_Size == 4)
106 __ulock_wait(
107 UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const*>(&buffer), __timeout_us);
108 else
109 __ulock_wait(
110 UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const*>(&buffer), __timeout_us);
111}
112
113template <std::size_t _Size>
114static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
115 static_assert(_Size == 8 || _Size == 4, "Can only wake up on 8 bytes or 4 bytes value");
116
117 if constexpr (_Size == 4)
118 __ulock_wake(UL_COMPARE_AND_WAIT | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
119 else
120 __ulock_wake(UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
121}
122
123#elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
124/*
125 * Since __cxx_contention_t is int64_t even on 32bit FreeBSD
126 * platforms, we have to use umtx ops that work on the long type, and
127 * limit its use to architectures where long and int64_t are synonyms.
128 */
129
130template <std::size_t _Size>
131static void __platform_wait_on_address(void const* __ptr, void const* __val, uint64_t __timeout_ns) {
132 static_assert(_Size == 8, "Can only wait on 8 bytes value");
133 char buffer[_Size];
134 std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
135 if (__timeout_ns == 0) {
136 _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAIT, *reinterpret_cast<__cxx_contention_t*>(&buffer), nullptr, nullptr);
137 } else {
138 _umtx_time ut;
139 ut._timeout.tv_sec = __timeout_ns / 1'000'000'000;
140 ut._timeout.tv_nsec = __timeout_ns % 1'000'000'000;
141 ut._flags = 0; // Relative time (not absolute)
142 ut._clockid = CLOCK_MONOTONIC; // Use monotonic clock
143
144 _umtx_op(const_cast<void*>(__ptr),
145 UMTX_OP_WAIT,
146 *reinterpret_cast<__cxx_contention_t*>(&buffer),
147 reinterpret_cast<void*>(sizeof(ut)), // Pass size as uaddr
148 &ut); // Pass _umtx_time structure as uaddr2
149 }
150}
151
152template <std::size_t _Size>
153static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
154 static_assert(_Size == 8, "Can only wake up on 8 bytes value");
155 _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
156}
157
158#elif defined(_WIN32)
159
160static void* win32_get_synch_api_function(const char* function_name) {
161 // Attempt to load the API set. Note that as per the Microsoft STL implementation, we assume this API is already
162 // loaded and accessible. While this isn't explicitly guaranteed by publicly available Win32 API documentation, it is
163 // true in practice, and may be guaranteed by internal documentation not released publicly. In any case the fact that
164 // the Microsoft STL made this assumption is reasonable basis to say that we can too. The alternative to this would be
165 // to use LoadLibrary, but then leak the module handle. We can't call FreeLibrary, as this would have to be triggered
166 // by a global static destructor, which would hang off DllMain, and calling FreeLibrary from DllMain is explicitly
167 // mentioned as not being allowed:
168 // https://learn.microsoft.com/en-us/windows/win32/dlls/dllmain
169 // Given the range of bad options here, we have chosen to mirror what Microsoft did, as it seems fair to assume that
170 // Microsoft will guarantee compatibility for us, as we are exposed to the same conditions as all existing Windows
171 // apps using the Microsoft STL VS2015/2017/2019/2022 runtimes, where Windows 7 support has not been excluded at
172 // compile time.
173 static auto module_handle = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
174 if (module_handle == nullptr) {
175 return nullptr;
176 }
177
178 // Attempt to locate the function in the API and return the result to the caller. Note that the NULL return from this
179 // method is documented as being interchangeable with nullptr.
180 // https://devblogs.microsoft.com/oldnewthing/20180307-00/?p=98175
181 return reinterpret_cast<void*>(GetProcAddress(module_handle, function_name));
182}
183
184template <std::size_t _Size>
185static void __platform_wait_on_address(void const* __ptr, void const* __val, uint64_t __timeout_ns) {
186 static_assert(_Size == 8, "Can only wait on 8 bytes value");
187 // WaitOnAddress was added in Windows 8 (build 9200)
188 static auto wait_on_address =
189 reinterpret_cast<BOOL(WINAPI*)(void*, PVOID, SIZE_T, DWORD)>(win32_get_synch_api_function("WaitOnAddress"));
190 if (wait_on_address != nullptr) {
191 wait_on_address(const_cast<void*>(__ptr),
192 const_cast<void*>(__val),
193 _Size,
194 __timeout_ns == 0 ? INFINITE : static_cast<DWORD>(__timeout_ns / 1'000'000));
195 } else {
196 __libcpp_thread_poll_with_backoff(
197 [=]() -> bool { return std::memcmp(const_cast<const void*>(__ptr), __val, _Size) != 0; },
198 __libcpp_timed_backoff_policy(),
199 std::chrono::nanoseconds(__timeout_ns));
200 }
201}
202
203template <std::size_t _Size>
204static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
205 static_assert(_Size == 8, "Can only wake up on 8 bytes value");
206 if (__notify_one) {
207 // WakeByAddressSingle was added in Windows 8 (build 9200)
208 static auto wake_by_address_single =
209 reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressSingle"));
210 if (wake_by_address_single != nullptr) {
211 wake_by_address_single(const_cast<void*>(__ptr));
212 } else {
213 // The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
214 // there's nothing to do here.
215 }
216 } else {
217 // WakeByAddressAll was added in Windows 8 (build 9200)
218 static auto wake_by_address_all =
219 reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressAll"));
220 if (wake_by_address_all != nullptr) {
221 wake_by_address_all(const_cast<void*>(__ptr));
222 } else {
223 // The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
224 // there's nothing to do here.
225 }
226 }
227}
228
229#else // <- Add other operating systems here
230
231// Baseline is just a timed backoff
232
233template <std::size_t _Size>
234static void __platform_wait_on_address(void const* __ptr, void const* __val, uint64_t __timeout_ns) {
235 __libcpp_thread_poll_with_backoff(
236 [=]() -> bool { return std::memcmp(const_cast<const void*>(__ptr), __val, _Size) != 0; },
237 __libcpp_timed_backoff_policy(),
238 std::chrono::nanoseconds(__timeout_ns));
239}
240
241template <std::size_t _Size>
242static void __platform_wake_by_address(void const*, bool) {}
243
244#endif // __linux__
245
246// =============================
247// Local hidden helper functions
248// =============================
249
250/* Given an atomic to track contention and an atomic to actually wait on, which may be
251 the same atomic, we try to detect contention to avoid spuriously calling the platform. */
252
253template <std::size_t _Size>
254static void
255__contention_notify(__cxx_atomic_contention_t* __waiter_count, void const* __address_to_notify, bool __notify_one) {
256 if (0 != __cxx_atomic_load(a: __waiter_count, order: memory_order_seq_cst))
257 // We only call 'wake' if we consumed a contention bit here.
258 __platform_wake_by_address<_Size>(__address_to_notify, __notify_one);
259}
260
261template <std::size_t _Size>
262static void __contention_wait(__cxx_atomic_contention_t* __waiter_count,
263 void const* __address_to_wait,
264 void const* __old_value,
265 uint64_t __timeout_ns) {
266 __cxx_atomic_fetch_add(a: __waiter_count, delta: __cxx_contention_t(1), order: memory_order_relaxed);
267 // https://llvm.org/PR109290
268 // There are no platform guarantees of a memory barrier in the platform wait implementation
269 __cxx_atomic_thread_fence(order: memory_order_seq_cst);
270 // We sleep as long as the monitored value hasn't changed.
271 __platform_wait_on_address<_Size>(__address_to_wait, __old_value, __timeout_ns);
272 __cxx_atomic_fetch_sub(a: __waiter_count, delta: __cxx_contention_t(1), order: memory_order_release);
273}
274
275static constexpr size_t __contention_table_size = (1 << 8); /* < there's no magic in this number */
276
277static constexpr hash<void const*> __contention_hasher;
278
279// Waiter count table for all atomics with the correct size that use itself as the wait/notify address.
280
281struct alignas(
282 std::hardware_constructive_interference_size) /* aim to avoid false sharing */ __contention_state_native {
283 __cxx_atomic_contention_t __waiter_count;
284 constexpr __contention_state_native() : __waiter_count(0) {}
285};
286
287static __contention_state_native __contention_table_native[__contention_table_size];
288
289static __cxx_atomic_contention_t* __get_native_waiter_count(void const* p) {
290 return &__contention_table_native[__contention_hasher(p) & (__contention_table_size - 1)].__waiter_count;
291}
292
293// Global contention table for all atomics with the wrong size that use the global table's atomic as wait/notify
294// address.
295
296struct alignas(
297 std::hardware_constructive_interference_size) /* aim to avoid false sharing */ __contention_state_global {
298 __cxx_atomic_contention_t __waiter_count;
299 __cxx_atomic_contention_t __platform_state;
300 constexpr __contention_state_global() : __waiter_count(0), __platform_state(0) {}
301};
302
303static __contention_state_global __contention_table_global[__contention_table_size];
304
305static __contention_state_global* __get_global_contention_state(void const* p) {
306 return &__contention_table_global[__contention_hasher(p) & (__contention_table_size - 1)];
307}
308
309/* When the incoming atomic is the wrong size for the platform wait size, need to
310 launder the value sequence through an atomic from our table. */
311
312static void __atomic_notify_global_table(void const* __location) {
313 auto const __entry = __get_global_contention_state(p: __location);
314 // The value sequence laundering happens on the next line below.
315 __cxx_atomic_fetch_add(a: &__entry->__platform_state, delta: __cxx_contention_t(1), order: memory_order_seq_cst);
316 __contention_notify<sizeof(__cxx_atomic_contention_t)>(
317 waiter_count: &__entry->__waiter_count, address_to_notify: &__entry->__platform_state, notify_one: false /* when laundering, we can't handle notify_one */);
318}
319
320// =============================
321// New dylib exported symbols
322// =============================
323
324// global
325_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __atomic_monitor_global(void const* __location) noexcept {
326 auto const __entry = __get_global_contention_state(p: __location);
327 return __cxx_atomic_load(a: &__entry->__platform_state, order: memory_order_acquire);
328}
329
330_LIBCPP_EXPORTED_FROM_ABI void
331__atomic_wait_global_table(void const* __location, __cxx_contention_t __old_value) noexcept {
332 auto const __entry = __get_global_contention_state(p: __location);
333 __contention_wait<sizeof(__cxx_atomic_contention_t)>(
334 waiter_count: &__entry->__waiter_count, address_to_wait: &__entry->__platform_state, old_value: &__old_value, timeout_ns: 0);
335}
336
337_LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_global_table_with_timeout(
338 void const* __location, __cxx_contention_t __old_value, uint64_t __timeout_ns) _NOEXCEPT {
339 auto const __entry = __get_global_contention_state(p: __location);
340 __contention_wait<sizeof(__cxx_atomic_contention_t)>(
341 waiter_count: &__entry->__waiter_count, address_to_wait: &__entry->__platform_state, old_value: &__old_value, __timeout_ns);
342}
343
344_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_global_table(void const* __location) noexcept {
345 __atomic_notify_global_table(__location);
346}
347
348_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_global_table(void const* __location) noexcept {
349 __atomic_notify_global_table(__location);
350}
351
352// native
353
354template <std::size_t _Size>
355_LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native(void const* __address, void const* __old_value) noexcept {
356 __contention_wait<_Size>(__get_native_waiter_count(p: __address), __address, __old_value, 0);
357}
358
359template <std::size_t _Size>
360_LIBCPP_EXPORTED_FROM_ABI void
361__atomic_wait_native_with_timeout(void const* __address, void const* __old_value, uint64_t __timeout_ns) noexcept {
362 __contention_wait<_Size>(__get_native_waiter_count(p: __address), __address, __old_value, __timeout_ns);
363}
364
365template <std::size_t _Size>
366_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native(void const* __location) noexcept {
367 __contention_notify<_Size>(__get_native_waiter_count(p: __location), __location, true);
368}
369
370template <std::size_t _Size>
371_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native(void const* __location) noexcept {
372 __contention_notify<_Size>(__get_native_waiter_count(p: __location), __location, false);
373}
374
375// ==================================================
376// Instantiation of the templates with supported size
377// ==================================================
378
379#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
380
381# define _INSTANTIATE(_SIZE) \
382 template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native<_SIZE>(void const*, void const*) noexcept; \
383 template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native_with_timeout<_SIZE>( \
384 void const*, void const*, uint64_t) noexcept; \
385 template _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native<_SIZE>(void const*) noexcept; \
386 template _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native<_SIZE>(void const*) noexcept;
387
388_LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_INSTANTIATE)
389
390# undef _INSTANTIATE
391
392#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
393
394template _LIBCPP_EXPORTED_FROM_ABI void
395__atomic_wait_native<sizeof(__cxx_contention_t)>(void const* __address, void const* __old_value) noexcept;
396
397template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native_with_timeout<sizeof(__cxx_contention_t)>(
398 void const* __address, void const* __old_value, uint64_t) noexcept;
399
400template _LIBCPP_EXPORTED_FROM_ABI void
401__atomic_notify_one_native<sizeof(__cxx_contention_t)>(void const* __location) noexcept;
402
403template _LIBCPP_EXPORTED_FROM_ABI void
404__atomic_notify_all_native<sizeof(__cxx_contention_t)>(void const* __location) noexcept;
405
406#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
407
408// =============================================================
409// Old dylib exported symbols, for backwards compatibility
410// =============================================================
411_LIBCPP_DIAGNOSTIC_PUSH
412_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes")
413
414_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) noexcept {
415 __atomic_notify_global_table(location: const_cast<void const*>(__location));
416}
417
418_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __location) noexcept {
419 __atomic_notify_global_table(location: const_cast<void const*>(__location));
420}
421
422_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) noexcept {
423 auto const __entry = __get_global_contention_state(p: const_cast<void const*>(__location));
424 return __cxx_atomic_load(a: &__entry->__platform_state, order: memory_order_acquire);
425}
426
427_LIBCPP_EXPORTED_FROM_ABI void
428__libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
429 auto const __entry = __get_global_contention_state(p: const_cast<void const*>(__location));
430 __contention_wait<sizeof(__cxx_atomic_contention_t)>(
431 waiter_count: &__entry->__waiter_count, address_to_wait: &__entry->__platform_state, old_value: &__old_value, timeout_ns: 0);
432}
433
434_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept {
435 auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
436 __contention_notify<sizeof(__cxx_atomic_contention_t)>(
437 waiter_count: __get_native_waiter_count(p: __location_cast), address_to_notify: __location_cast, notify_one: true);
438}
439
440_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
441 auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
442 __contention_notify<sizeof(__cxx_atomic_contention_t)>(
443 waiter_count: __get_native_waiter_count(p: __location_cast), address_to_notify: __location_cast, notify_one: false);
444}
445
446_LIBCPP_EXPORTED_FROM_ABI void
447__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) noexcept {
448 auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
449 __contention_wait<sizeof(__cxx_atomic_contention_t)>(
450 waiter_count: __get_native_waiter_count(p: __location_cast), address_to_wait: __location_cast, old_value: &__old_value, timeout_ns: 0);
451}
452
453// this function is even unused in the old ABI
454_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
455__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
456 return __cxx_atomic_load(a: __location, order: memory_order_acquire);
457}
458
459_LIBCPP_DIAGNOSTIC_POP
460
461_LIBCPP_END_NAMESPACE_STD
462