1 | // -*- C++ -*- |
2 | //===----------------------------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef _LIBCPP_BARRIER |
11 | #define _LIBCPP_BARRIER |
12 | |
13 | /* |
14 | barrier synopsis |
15 | |
16 | namespace std |
17 | { |
18 | |
19 | template<class CompletionFunction = see below> |
20 | class barrier |
21 | { |
22 | public: |
23 | using arrival_token = see below; |
24 | |
25 | static constexpr ptrdiff_t max() noexcept; |
26 | |
27 | constexpr explicit barrier(ptrdiff_t phase_count, |
28 | CompletionFunction f = CompletionFunction()); |
29 | ~barrier(); |
30 | |
31 | barrier(const barrier&) = delete; |
32 | barrier& operator=(const barrier&) = delete; |
33 | |
34 | [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); |
35 | void wait(arrival_token&& arrival) const; |
36 | |
37 | void arrive_and_wait(); |
38 | void arrive_and_drop(); |
39 | |
40 | private: |
41 | CompletionFunction completion; // exposition only |
42 | }; |
43 | |
44 | } |
45 | |
46 | */ |
47 | |
48 | #include <__config> |
49 | |
50 | #if !defined(_LIBCPP_HAS_NO_THREADS) |
51 | |
52 | # include <__assert> |
53 | # include <__atomic/atomic_base.h> |
54 | # include <__atomic/memory_order.h> |
55 | # include <__memory/unique_ptr.h> |
56 | # include <__thread/poll_with_backoff.h> |
57 | # include <__thread/timed_backoff_policy.h> |
58 | # include <__utility/move.h> |
59 | # include <cstddef> |
60 | # include <cstdint> |
61 | # include <limits> |
62 | # include <version> |
63 | |
64 | # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
65 | # pragma GCC system_header |
66 | # endif |
67 | |
68 | _LIBCPP_PUSH_MACROS |
69 | # include <__undef_macros> |
70 | |
71 | # if _LIBCPP_STD_VER >= 14 |
72 | |
73 | _LIBCPP_BEGIN_NAMESPACE_STD |
74 | |
75 | struct __empty_completion { |
76 | inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {} |
77 | }; |
78 | |
79 | # ifndef _LIBCPP_HAS_NO_TREE_BARRIER |
80 | |
81 | /* |
82 | |
83 | The default implementation of __barrier_base is a classic tree barrier. |
84 | |
85 | It looks different from literature pseudocode for two main reasons: |
86 | 1. Threads that call into std::barrier functions do not provide indices, |
87 | so a numbering step is added before the actual barrier algorithm, |
88 | appearing as an N+1 round to the N rounds of the tree barrier. |
89 | 2. A great deal of attention has been paid to avoid cache line thrashing |
90 | by flattening the tree structure into cache-line sized arrays, that |
91 | are indexed in an efficient way. |
92 | |
93 | */ |
94 | |
95 | using __barrier_phase_t = uint8_t; |
96 | |
97 | class __barrier_algorithm_base; |
98 | |
99 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* |
100 | __construct_barrier_algorithm_base(ptrdiff_t& __expected); |
101 | |
102 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool |
103 | __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept; |
104 | |
105 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void |
106 | __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept; |
107 | |
108 | template <class _CompletionF> |
109 | class __barrier_base { |
110 | ptrdiff_t __expected_; |
111 | unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_; |
112 | __atomic_base<ptrdiff_t> __expected_adjustment_; |
113 | _CompletionF __completion_; |
114 | __atomic_base<__barrier_phase_t> __phase_; |
115 | |
116 | public: |
117 | using arrival_token = __barrier_phase_t; |
118 | |
119 | static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } |
120 | |
121 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI |
122 | __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) |
123 | : __expected_(__expected), |
124 | __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base), |
125 | __expected_adjustment_(0), |
126 | __completion_(std::move(__completion)), |
127 | __phase_(0) {} |
128 | _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { |
129 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
130 | __update <= __expected_, "update is greater than the expected count for the current barrier phase" ); |
131 | |
132 | auto const __old_phase = __phase_.load(memory_order_relaxed); |
133 | for (; __update; --__update) |
134 | if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { |
135 | __completion_(); |
136 | __expected_ += __expected_adjustment_.load(memory_order_relaxed); |
137 | __expected_adjustment_.store(0, memory_order_relaxed); |
138 | __phase_.store(__old_phase + 2, memory_order_release); |
139 | __phase_.notify_all(); |
140 | } |
141 | return __old_phase; |
142 | } |
143 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { |
144 | auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; }; |
145 | std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); |
146 | } |
147 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { |
148 | __expected_adjustment_.fetch_sub(1, memory_order_relaxed); |
149 | (void)arrive(1); |
150 | } |
151 | }; |
152 | |
153 | # else |
154 | |
155 | /* |
156 | |
157 | The alternative implementation of __barrier_base is a central barrier. |
158 | |
159 | Two versions of this algorithm are provided: |
160 | 1. A fairly straightforward implementation of the litterature for the |
161 | general case where the completion function is not empty. |
162 | 2. An optimized implementation that exploits 2's complement arithmetic |
163 | and well-defined overflow in atomic arithmetic, to handle the phase |
164 | roll-over for free. |
165 | |
166 | */ |
167 | |
168 | template <class _CompletionF> |
169 | class __barrier_base { |
170 | __atomic_base<ptrdiff_t> __expected; |
171 | __atomic_base<ptrdiff_t> __arrived; |
172 | _CompletionF __completion; |
173 | __atomic_base<bool> __phase; |
174 | |
175 | public: |
176 | using arrival_token = bool; |
177 | |
178 | static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } |
179 | |
180 | _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) |
181 | : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {} |
182 | [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { |
183 | auto const __old_phase = __phase.load(memory_order_relaxed); |
184 | auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; |
185 | auto const new_expected = __expected.load(memory_order_relaxed); |
186 | |
187 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
188 | update <= new_expected, "update is greater than the expected count for the current barrier phase" ); |
189 | |
190 | if (0 == __result) { |
191 | __completion(); |
192 | __arrived.store(new_expected, memory_order_relaxed); |
193 | __phase.store(!__old_phase, memory_order_release); |
194 | __phase.notify_all(); |
195 | } |
196 | return __old_phase; |
197 | } |
198 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { |
199 | __phase.wait(__old_phase, memory_order_acquire); |
200 | } |
201 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { |
202 | __expected.fetch_sub(1, memory_order_relaxed); |
203 | (void)arrive(1); |
204 | } |
205 | }; |
206 | |
207 | template <> |
208 | class __barrier_base<__empty_completion> { |
209 | static constexpr uint64_t __expected_unit = 1ull; |
210 | static constexpr uint64_t __arrived_unit = 1ull << 32; |
211 | static constexpr uint64_t __expected_mask = __arrived_unit - 1; |
212 | static constexpr uint64_t __phase_bit = 1ull << 63; |
213 | static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; |
214 | |
215 | __atomic_base<uint64_t> __phase_arrived_expected; |
216 | |
217 | static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { |
218 | return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); |
219 | } |
220 | |
221 | public: |
222 | using arrival_token = uint64_t; |
223 | |
224 | static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } |
225 | |
226 | _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) |
227 | : __phase_arrived_expected(__init(__count)) {} |
228 | [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { |
229 | auto const __inc = __arrived_unit * update; |
230 | auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); |
231 | |
232 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
233 | update <= __old, "update is greater than the expected count for the current barrier phase" ); |
234 | |
235 | if ((__old ^ (__old + __inc)) & __phase_bit) { |
236 | __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); |
237 | __phase_arrived_expected.notify_all(); |
238 | } |
239 | return __old & __phase_bit; |
240 | } |
241 | inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { |
242 | auto const __test_fn = [=]() -> bool { |
243 | uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); |
244 | return ((__current & __phase_bit) != __phase); |
245 | }; |
246 | __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); |
247 | } |
248 | inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { |
249 | __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); |
250 | (void)arrive(1); |
251 | } |
252 | }; |
253 | |
254 | # endif // !_LIBCPP_HAS_NO_TREE_BARRIER |
255 | |
256 | template <class _CompletionF = __empty_completion> |
257 | class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier { |
258 | __barrier_base<_CompletionF> __b_; |
259 | |
260 | public: |
261 | using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; |
262 | |
263 | static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } |
264 | |
265 | _LIBCPP_AVAILABILITY_SYNC |
266 | _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) |
267 | : __b_(__count, std::move(__completion)) { |
268 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
269 | __count >= 0, |
270 | "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value" ); |
271 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
272 | __count <= max(), |
273 | "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " |
274 | "a value greater than max()" ); |
275 | } |
276 | |
277 | barrier(barrier const&) = delete; |
278 | barrier& operator=(barrier const&) = delete; |
279 | |
280 | _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { |
281 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0" ); |
282 | return __b_.arrive(__update); |
283 | } |
284 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { |
285 | __b_.wait(std::move(__phase)); |
286 | } |
287 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); } |
288 | _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); } |
289 | }; |
290 | |
291 | _LIBCPP_END_NAMESPACE_STD |
292 | |
293 | # endif // _LIBCPP_STD_VER >= 14 |
294 | |
295 | _LIBCPP_POP_MACROS |
296 | |
297 | #endif // !defined(_LIBCPP_HAS_NO_THREADS) |
298 | |
299 | #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 |
300 | # include <atomic> |
301 | # include <concepts> |
302 | # include <iterator> |
303 | # include <memory> |
304 | # include <stdexcept> |
305 | # include <variant> |
306 | #endif |
307 | |
308 | #endif //_LIBCPP_BARRIER |
309 | |