1//===----------------------------------------------------------------------===//
2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3// See https://llvm.org/LICENSE.txt for license information.
4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5//
6//===----------------------------------------------------------------------===//
7
8#include <numeric>
9#include <stop_token>
10#include <thread>
11
12#include "benchmark/benchmark.h"
13#include "make_test_thread.h"
14
15using namespace std::chrono_literals;
16
17// We have a single thread created by std::jthread consuming the stop_token:
18// polling for stop_requested.
19void BM_stop_token_single_thread_polling_stop_requested(benchmark::State& state) {
20 auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* loop_count) {
21 while (!st.stop_requested()) {
22 // doing some work
23 loop_count->fetch_add(1, std::memory_order_relaxed);
24 }
25 };
26
27 std::atomic<std::uint64_t> loop_count(0);
28 std::uint64_t total_loop_test_param = state.range(0);
29
30 auto thread = support::make_test_jthread(thread_func, &loop_count);
31
32 for (auto _ : state) {
33 auto start_total = loop_count.load(std::memory_order_relaxed);
34
35 while (loop_count.load(std::memory_order_relaxed) - start_total < total_loop_test_param) {
36 std::this_thread::yield();
37 }
38 }
39}
40
41BENCHMARK(BM_stop_token_single_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
42
43// We have multiple threads polling for stop_requested of the same stop_token.
44void BM_stop_token_multi_thread_polling_stop_requested(benchmark::State& state) {
45 std::atomic<bool> start{false};
46
47 auto thread_func = [&start](std::atomic<std::uint64_t>* loop_count, std::stop_token st) {
48 start.wait(false);
49 while (!st.stop_requested()) {
50 // doing some work
51 loop_count->fetch_add(1, std::memory_order_relaxed);
52 }
53 };
54
55 constexpr size_t thread_count = 20;
56
57 std::uint64_t total_loop_test_param = state.range(0);
58
59 std::vector<std::atomic<std::uint64_t>> loop_counts(thread_count);
60 std::stop_source ss;
61 std::vector<std::jthread> threads;
62 threads.reserve(thread_count);
63
64 for (size_t i = 0; i < thread_count; ++i) {
65 threads.emplace_back(support::make_test_jthread(thread_func, &loop_counts[i], ss.get_token()));
66 }
67
68 auto get_total_loop = [&loop_counts] {
69 std::uint64_t total = 0;
70 for (const auto& loop_count : loop_counts) {
71 total += loop_count.load(std::memory_order_relaxed);
72 }
73 return total;
74 };
75
76 start = true;
77 start.notify_all();
78
79 for (auto _ : state) {
80 auto start_total = get_total_loop();
81
82 while (get_total_loop() - start_total < total_loop_test_param) {
83 std::this_thread::yield();
84 }
85 }
86
87 ss.request_stop();
88}
89
90BENCHMARK(BM_stop_token_multi_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
91
92// We have a single thread created by std::jthread consuming the stop_token:
93// registering/deregistering callbacks, one at a time.
94void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) {
95 auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* reg_count) {
96 while (!st.stop_requested()) {
97 std::stop_callback cb{st, [&]() noexcept {}};
98 benchmark::DoNotOptimize(cb);
99 reg_count->fetch_add(1, std::memory_order_relaxed);
100 }
101 };
102
103 std::atomic<std::uint64_t> reg_count(0);
104 std::uint64_t total_reg_test_param = state.range(0);
105
106 auto thread = support::make_test_jthread(thread_func, &reg_count);
107
108 for (auto _ : state) {
109 auto start_total = reg_count.load(std::memory_order_relaxed);
110
111 while (reg_count.load(std::memory_order_relaxed) - start_total < total_reg_test_param) {
112 std::this_thread::yield();
113 }
114 }
115}
116BENCHMARK(BM_stop_token_single_thread_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
117
118// At startup, it creates a single stop_source which it will then pass an associated stop_token to every
119// request.
120//
121// Assume a thread-pool handles these requests and for each request it polls for stop_requested(), then attaches a
122// stop-callback, does some work, then detaches the stop-callback some time later. The lifetime of requests/callbacks
123// would overlap with other requests/callback from the same thread.
124//
125// Say something like each thread keeping a circular buffer of N stop-callbacks and destroying the stop-callbacks in
126// FIFO order
127void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) {
128 struct dummy_stop_callback {
129 void operator()() const noexcept {}
130 };
131
132 constexpr size_t thread_count = 20;
133 constexpr size_t concurrent_request_count = 1000;
134 std::atomic<bool> start{false};
135
136 std::uint64_t total_reg_test_param = state.range(0);
137 std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count);
138
139 std::stop_source ss;
140 std::vector<std::jthread> threads;
141 threads.reserve(thread_count);
142
143 auto thread_func = [&start](std::atomic<std::uint64_t>* count, std::stop_token st) {
144 std::vector<std::optional<std::stop_callback<dummy_stop_callback>>> cbs(concurrent_request_count);
145
146 start.wait(false);
147
148 std::uint32_t index = 0;
149 while (!st.stop_requested()) {
150 cbs[index].emplace(st, dummy_stop_callback{});
151 index = (index + 1) % concurrent_request_count;
152 count->fetch_add(1, std::memory_order_relaxed);
153 }
154 };
155
156 for (size_t i = 0; i < thread_count; ++i) {
157 threads.emplace_back(support::make_test_jthread(thread_func, &reg_counts[i], ss.get_token()));
158 }
159
160 auto get_total_reg = [&] {
161 std::uint64_t total = 0;
162 for (const auto& reg_count : reg_counts) {
163 total += reg_count.load(std::memory_order_relaxed);
164 }
165 return total;
166 };
167
168 start = true;
169 start.notify_all();
170
171 for (auto _ : state) {
172 auto start_total = get_total_reg();
173
174 while (get_total_reg() - start_total < total_reg_test_param) {
175 std::this_thread::yield();
176 }
177 }
178
179 ss.request_stop();
180}
181BENCHMARK(BM_stop_token_async_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
182
183BENCHMARK_MAIN();
184