1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14 barrier synopsis
15
16namespace std
17{
18
19 template<class CompletionFunction = see below>
20 class barrier
21 {
22 public:
23 using arrival_token = see below;
24
25 static constexpr ptrdiff_t max() noexcept;
26
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
29 ~barrier();
30
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
33
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
36
37 void arrive_and_wait();
38 void arrive_and_drop();
39
40 private:
41 CompletionFunction completion; // exposition only
42 };
43
44}
45
46*/
47
48#include <__config>
49
50#if !defined(_LIBCPP_HAS_NO_THREADS)
51
52# include <__assert>
53# include <__atomic/atomic_base.h>
54# include <__atomic/memory_order.h>
55# include <__memory/unique_ptr.h>
56# include <__thread/poll_with_backoff.h>
57# include <__thread/timed_backoff_policy.h>
58# include <__utility/move.h>
59# include <cstddef>
60# include <cstdint>
61# include <limits>
62# include <version>
63
64# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
65# pragma GCC system_header
66# endif
67
68_LIBCPP_PUSH_MACROS
69# include <__undef_macros>
70
71# if _LIBCPP_STD_VER >= 14
72
73_LIBCPP_BEGIN_NAMESPACE_STD
74
75struct __empty_completion {
76 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
77};
78
79# ifndef _LIBCPP_HAS_NO_TREE_BARRIER
80
81/*
82
83The default implementation of __barrier_base is a classic tree barrier.
84
85It looks different from literature pseudocode for two main reasons:
86 1. Threads that call into std::barrier functions do not provide indices,
87 so a numbering step is added before the actual barrier algorithm,
88 appearing as an N+1 round to the N rounds of the tree barrier.
89 2. A great deal of attention has been paid to avoid cache line thrashing
90 by flattening the tree structure into cache-line sized arrays, that
91 are indexed in an efficient way.
92
93*/
94
95using __barrier_phase_t = uint8_t;
96
97class __barrier_algorithm_base;
98
99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
100__construct_barrier_algorithm_base(ptrdiff_t& __expected);
101
102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
103__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept;
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
106__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept;
107
108template <class _CompletionF>
109class __barrier_base {
110 ptrdiff_t __expected_;
111 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
112 __atomic_base<ptrdiff_t> __expected_adjustment_;
113 _CompletionF __completion_;
114 __atomic_base<__barrier_phase_t> __phase_;
115
116public:
117 using arrival_token = __barrier_phase_t;
118
119 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
120
121 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
122 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
123 : __expected_(__expected),
124 __base_(std::__construct_barrier_algorithm_base(expected&: this->__expected_), &__destroy_barrier_algorithm_base),
125 __expected_adjustment_(0),
126 __completion_(std::move(__completion)),
127 __phase_(0) {}
128 _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
129 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
130 __update <= __expected_, "update is greater than the expected count for the current barrier phase");
131
132 auto const __old_phase = __phase_.load(m: memory_order_relaxed);
133 for (; __update; --__update)
134 if (__arrive_barrier_algorithm_base(barrier: __base_.get(), __old_phase)) {
135 __completion_();
136 __expected_ += __expected_adjustment_.load(m: memory_order_relaxed);
137 __expected_adjustment_.store(d: 0, m: memory_order_relaxed);
138 __phase_.store(d: __old_phase + 2, m: memory_order_release);
139 __phase_.notify_all();
140 }
141 return __old_phase;
142 }
143 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
144 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(m: memory_order_acquire) != __old_phase; };
145 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
146 }
147 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
148 __expected_adjustment_.fetch_sub(op: 1, m: memory_order_relaxed);
149 (void)arrive(update: 1);
150 }
151};
152
153# else
154
155/*
156
157The alternative implementation of __barrier_base is a central barrier.
158
159Two versions of this algorithm are provided:
160 1. A fairly straightforward implementation of the litterature for the
161 general case where the completion function is not empty.
162 2. An optimized implementation that exploits 2's complement arithmetic
163 and well-defined overflow in atomic arithmetic, to handle the phase
164 roll-over for free.
165
166*/
167
168template <class _CompletionF>
169class __barrier_base {
170 __atomic_base<ptrdiff_t> __expected;
171 __atomic_base<ptrdiff_t> __arrived;
172 _CompletionF __completion;
173 __atomic_base<bool> __phase;
174
175public:
176 using arrival_token = bool;
177
178 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
179
180 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
181 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
182 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
183 auto const __old_phase = __phase.load(memory_order_relaxed);
184 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
185 auto const new_expected = __expected.load(memory_order_relaxed);
186
187 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
188 update <= new_expected, "update is greater than the expected count for the current barrier phase");
189
190 if (0 == __result) {
191 __completion();
192 __arrived.store(new_expected, memory_order_relaxed);
193 __phase.store(!__old_phase, memory_order_release);
194 __phase.notify_all();
195 }
196 return __old_phase;
197 }
198 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
199 __phase.wait(__old_phase, memory_order_acquire);
200 }
201 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
202 __expected.fetch_sub(1, memory_order_relaxed);
203 (void)arrive(1);
204 }
205};
206
207template <>
208class __barrier_base<__empty_completion> {
209 static constexpr uint64_t __expected_unit = 1ull;
210 static constexpr uint64_t __arrived_unit = 1ull << 32;
211 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
212 static constexpr uint64_t __phase_bit = 1ull << 63;
213 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
214
215 __atomic_base<uint64_t> __phase_arrived_expected;
216
217 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
218 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
219 }
220
221public:
222 using arrival_token = uint64_t;
223
224 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
225
226 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
227 : __phase_arrived_expected(__init(__count)) {}
228 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
229 auto const __inc = __arrived_unit * update;
230 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
231
232 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
233 update <= __old, "update is greater than the expected count for the current barrier phase");
234
235 if ((__old ^ (__old + __inc)) & __phase_bit) {
236 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
237 __phase_arrived_expected.notify_all();
238 }
239 return __old & __phase_bit;
240 }
241 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
242 auto const __test_fn = [=]() -> bool {
243 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
244 return ((__current & __phase_bit) != __phase);
245 };
246 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
247 }
248 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
249 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
250 (void)arrive(1);
251 }
252};
253
254# endif // !_LIBCPP_HAS_NO_TREE_BARRIER
255
256template <class _CompletionF = __empty_completion>
257class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier {
258 __barrier_base<_CompletionF> __b_;
259
260public:
261 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
262
263 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
264
265 _LIBCPP_AVAILABILITY_SYNC
266 _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
267 : __b_(__count, std::move(__completion)) {
268 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
269 __count >= 0,
270 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
271 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
272 __count <= max(),
273 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
274 "a value greater than max()");
275 }
276
277 barrier(barrier const&) = delete;
278 barrier& operator=(barrier const&) = delete;
279
280 _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
281 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
282 return __b_.arrive(__update);
283 }
284 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
285 __b_.wait(std::move(__phase));
286 }
287 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(phase: arrive()); }
288 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
289};
290
291_LIBCPP_END_NAMESPACE_STD
292
293# endif // _LIBCPP_STD_VER >= 14
294
295_LIBCPP_POP_MACROS
296
297#endif // !defined(_LIBCPP_HAS_NO_THREADS)
298
299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
300# include <atomic>
301# include <concepts>
302# include <iterator>
303# include <memory>
304# include <stdexcept>
305# include <variant>
306#endif
307
308#endif //_LIBCPP_BARRIER
309