1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Unix specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Unix.h"
14#include "llvm/ADT/ScopeExit.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/Support/MemoryBuffer.h"
20#include "llvm/Support/raw_ostream.h"
21
22#if defined(__APPLE__)
23#include <mach/mach_init.h>
24#include <mach/mach_port.h>
25#include <pthread/qos.h>
26#include <sys/sysctl.h>
27#include <sys/types.h>
28#endif
29
30#include <pthread.h>
31
32#if defined(__FreeBSD__) || defined(__OpenBSD__)
33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34#endif
35
36#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
37#include <errno.h>
38#include <sys/cpuset.h>
39#include <sys/sysctl.h>
40#include <sys/user.h>
41#include <unistd.h>
42#endif
43
44#if defined(__NetBSD__)
45#include <lwp.h> // For _lwp_self()
46#endif
47
48#if defined(__OpenBSD__)
49#include <unistd.h> // For getthrid()
50#endif
51
52#if defined(__linux__)
53#include <sched.h> // For sched_getaffinity
54#include <sys/syscall.h> // For syscall codes
55#include <unistd.h> // For syscall()
56#endif
57
58namespace llvm {
59pthread_t
60llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
61 std::optional<unsigned> StackSizeInBytes) {
62 int errnum;
63
64 // Construct the attributes object.
65 pthread_attr_t Attr;
66 if ((errnum = ::pthread_attr_init(attr: &Attr)) != 0) {
67 ReportErrnumFatal(Msg: "pthread_attr_init failed", errnum);
68 }
69
70 auto AttrGuard = llvm::make_scope_exit(F: [&] {
71 if ((errnum = ::pthread_attr_destroy(attr: &Attr)) != 0) {
72 ReportErrnumFatal(Msg: "pthread_attr_destroy failed", errnum);
73 }
74 });
75
76 // Set the requested stack size, if given.
77 if (StackSizeInBytes) {
78 if ((errnum = ::pthread_attr_setstacksize(attr: &Attr, stacksize: *StackSizeInBytes)) != 0) {
79 ReportErrnumFatal(Msg: "pthread_attr_setstacksize failed", errnum);
80 }
81 }
82
83 // Construct and execute the thread.
84 pthread_t Thread;
85 if ((errnum = ::pthread_create(newthread: &Thread, attr: &Attr, start_routine: ThreadFunc, arg: Arg)) != 0)
86 ReportErrnumFatal(Msg: "pthread_create failed", errnum);
87
88 return Thread;
89}
90
91void llvm_thread_detach_impl(pthread_t Thread) {
92 int errnum;
93
94 if ((errnum = ::pthread_detach(th: Thread)) != 0) {
95 ReportErrnumFatal(Msg: "pthread_detach failed", errnum);
96 }
97}
98
99void llvm_thread_join_impl(pthread_t Thread) {
100 int errnum;
101
102 if ((errnum = ::pthread_join(th: Thread, thread_return: nullptr)) != 0) {
103 ReportErrnumFatal(Msg: "pthread_join failed", errnum);
104 }
105}
106
107pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
108
109pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
110
111} // namespace llvm
112
113uint64_t llvm::get_threadid() {
114#if defined(__APPLE__)
115 // Calling "mach_thread_self()" bumps the reference count on the thread
116 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
117 // count.
118 static thread_local thread_port_t Self = [] {
119 thread_port_t InitSelf = mach_thread_self();
120 mach_port_deallocate(mach_task_self(), Self);
121 return InitSelf;
122 }();
123 return Self;
124#elif defined(__FreeBSD__)
125 return uint64_t(pthread_getthreadid_np());
126#elif defined(__NetBSD__)
127 return uint64_t(_lwp_self());
128#elif defined(__OpenBSD__)
129 return uint64_t(getthrid());
130#elif defined(__ANDROID__)
131 return uint64_t(gettid());
132#elif defined(__linux__)
133 return uint64_t(syscall(SYS_gettid));
134#else
135 return uint64_t(pthread_self());
136#endif
137}
138
139static constexpr uint32_t get_max_thread_name_length_impl() {
140#if defined(__NetBSD__)
141 return PTHREAD_MAX_NAMELEN_NP;
142#elif defined(__APPLE__)
143 return 64;
144#elif defined(__linux__)
145#if HAVE_PTHREAD_SETNAME_NP
146 return 16;
147#else
148 return 0;
149#endif
150#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
151 return 16;
152#elif defined(__OpenBSD__)
153 return 32;
154#else
155 return 0;
156#endif
157}
158
159uint32_t llvm::get_max_thread_name_length() {
160 return get_max_thread_name_length_impl();
161}
162
163void llvm::set_thread_name(const Twine &Name) {
164 // Make sure the input is null terminated.
165 SmallString<64> Storage;
166 StringRef NameStr = Name.toNullTerminatedStringRef(Out&: Storage);
167
168 // Truncate from the beginning, not the end, if the specified name is too
169 // long. For one, this ensures that the resulting string is still null
170 // terminated, but additionally the end of a long thread name will usually
171 // be more unique than the beginning, since a common pattern is for similar
172 // threads to share a common prefix.
173 // Note that the name length includes the null terminator.
174 if (get_max_thread_name_length() > 0)
175 NameStr = NameStr.take_back(N: get_max_thread_name_length() - 1);
176 (void)NameStr;
177#if defined(__linux__)
178#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
179#if HAVE_PTHREAD_SETNAME_NP
180 ::pthread_setname_np(target_thread: ::pthread_self(), name: NameStr.data());
181#endif
182#endif
183#elif defined(__FreeBSD__) || defined(__OpenBSD__)
184 ::pthread_set_name_np(::pthread_self(), NameStr.data());
185#elif defined(__NetBSD__)
186 ::pthread_setname_np(::pthread_self(), "%s",
187 const_cast<char *>(NameStr.data()));
188#elif defined(__APPLE__)
189 ::pthread_setname_np(NameStr.data());
190#endif
191}
192
193void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
194 Name.clear();
195
196#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
197 int pid = ::getpid();
198 uint64_t tid = get_threadid();
199
200 struct kinfo_proc *kp = nullptr, *nkp;
201 size_t len = 0;
202 int error;
203 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
204 (int)pid};
205
206 while (1) {
207 error = sysctl(ctl, 4, kp, &len, nullptr, 0);
208 if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
209 // Add extra space in case threads are added before next call.
210 len += sizeof(*kp) + len / 10;
211 nkp = (struct kinfo_proc *)::realloc(kp, len);
212 if (nkp == nullptr) {
213 free(kp);
214 return;
215 }
216 kp = nkp;
217 continue;
218 }
219 if (error != 0)
220 len = 0;
221 break;
222 }
223
224 for (size_t i = 0; i < len / sizeof(*kp); i++) {
225 if (kp[i].ki_tid == (lwpid_t)tid) {
226 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
227 break;
228 }
229 }
230 free(kp);
231 return;
232#elif defined(__NetBSD__)
233 constexpr uint32_t len = get_max_thread_name_length_impl();
234 char buf[len];
235 ::pthread_getname_np(::pthread_self(), buf, len);
236
237 Name.append(buf, buf + strlen(buf));
238#elif defined(__OpenBSD__)
239 constexpr uint32_t len = get_max_thread_name_length_impl();
240 char buf[len];
241 ::pthread_get_name_np(::pthread_self(), buf, len);
242
243 Name.append(buf, buf + strlen(buf));
244#elif defined(__linux__)
245#if HAVE_PTHREAD_GETNAME_NP
246 constexpr uint32_t len = get_max_thread_name_length_impl();
247 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
248 if (0 == ::pthread_getname_np(target_thread: ::pthread_self(), buf: Buffer, buflen: len))
249 Name.append(in_start: Buffer, in_end: Buffer + strlen(s: Buffer));
250#endif
251#endif
252}
253
254SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
255#if defined(__linux__) && defined(SCHED_IDLE)
256 // Some *really* old glibcs are missing SCHED_IDLE.
257 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
258 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
259 sched_param priority;
260 // For each of the above policies, param->sched_priority must be 0.
261 priority.sched_priority = 0;
262 // SCHED_IDLE for running very low priority background jobs.
263 // SCHED_OTHER the standard round-robin time-sharing policy;
264 return !pthread_setschedparam(
265 target_thread: pthread_self(),
266 // FIXME: consider SCHED_BATCH for Low
267 policy: Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
268 param: &priority)
269 ? SetThreadPriorityResult::SUCCESS
270 : SetThreadPriorityResult::FAILURE;
271#elif defined(__APPLE__)
272 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
273 //
274 // Background - Applies to work that isn’t visible to the user and may take
275 // significant time to complete. Examples include indexing, backing up, or
276 // synchronizing data. This class emphasizes energy efficiency.
277 //
278 // Utility - Applies to work that takes anywhere from a few seconds to a few
279 // minutes to complete. Examples include downloading a document or importing
280 // data. This class offers a balance between responsiveness, performance, and
281 // energy efficiency.
282 const auto qosClass = [&]() {
283 switch (Priority) {
284 case ThreadPriority::Background:
285 return QOS_CLASS_BACKGROUND;
286 case ThreadPriority::Low:
287 return QOS_CLASS_UTILITY;
288 case ThreadPriority::Default:
289 return QOS_CLASS_DEFAULT;
290 }
291 }();
292 return !pthread_set_qos_class_self_np(qosClass, 0)
293 ? SetThreadPriorityResult::SUCCESS
294 : SetThreadPriorityResult::FAILURE;
295#endif
296 return SetThreadPriorityResult::FAILURE;
297}
298
299#include <thread>
300
301static int computeHostNumHardwareThreads() {
302#if defined(__FreeBSD__)
303 cpuset_t mask;
304 CPU_ZERO(&mask);
305 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
306 &mask) == 0)
307 return CPU_COUNT(&mask);
308#elif defined(__linux__)
309 cpu_set_t Set;
310 if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Set), cpuset: &Set) == 0)
311 return CPU_COUNT(&Set);
312#endif
313 // Guard against std::thread::hardware_concurrency() returning 0.
314 if (unsigned Val = std::thread::hardware_concurrency())
315 return Val;
316 return 1;
317}
318
319void llvm::ThreadPoolStrategy::apply_thread_strategy(
320 unsigned ThreadPoolNum) const {}
321
322llvm::BitVector llvm::get_thread_affinity_mask() {
323 // FIXME: Implement
324 llvm_unreachable("Not implemented!");
325}
326
327unsigned llvm::get_cpus() { return 1; }
328
329#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
330// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
331// using the number of unique physical/core id pairs. The following
332// implementation reads the /proc/cpuinfo format on an x86_64 system.
333static int computeHostNumPhysicalCores() {
334 // Enabled represents the number of physical id/core id pairs with at least
335 // one processor id enabled by the CPU affinity mask.
336 cpu_set_t Affinity, Enabled;
337 if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Affinity), cpuset: &Affinity) != 0)
338 return -1;
339 CPU_ZERO(&Enabled);
340
341 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
342 // mmapped because it appears to have 0 size.
343 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
344 llvm::MemoryBuffer::getFileAsStream(Filename: "/proc/cpuinfo");
345 if (std::error_code EC = Text.getError()) {
346 llvm::errs() << "Can't read "
347 << "/proc/cpuinfo: " << EC.message() << "\n";
348 return -1;
349 }
350 SmallVector<StringRef, 8> strs;
351 (*Text)->getBuffer().split(A&: strs, Separator: "\n", /*MaxSplit=*/MaxSplit: -1,
352 /*KeepEmpty=*/KeepEmpty: false);
353 int CurProcessor = -1;
354 int CurPhysicalId = -1;
355 int CurSiblings = -1;
356 int CurCoreId = -1;
357 for (StringRef Line : strs) {
358 std::pair<StringRef, StringRef> Data = Line.split(Separator: ':');
359 auto Name = Data.first.trim();
360 auto Val = Data.second.trim();
361 // These fields are available if the kernel is configured with CONFIG_SMP.
362 if (Name == "processor")
363 Val.getAsInteger(Radix: 10, Result&: CurProcessor);
364 else if (Name == "physical id")
365 Val.getAsInteger(Radix: 10, Result&: CurPhysicalId);
366 else if (Name == "siblings")
367 Val.getAsInteger(Radix: 10, Result&: CurSiblings);
368 else if (Name == "core id") {
369 Val.getAsInteger(Radix: 10, Result&: CurCoreId);
370 // The processor id corresponds to an index into cpu_set_t.
371 if (CPU_ISSET(CurProcessor, &Affinity))
372 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
373 }
374 }
375 return CPU_COUNT(&Enabled);
376}
377#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
378static int computeHostNumPhysicalCores() {
379 return sysconf(_SC_NPROCESSORS_ONLN);
380}
381#elif defined(__linux__) && !defined(__ANDROID__)
382static int computeHostNumPhysicalCores() {
383 cpu_set_t Affinity;
384 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
385 return CPU_COUNT(&Affinity);
386
387 // The call to sched_getaffinity() may have failed because the Affinity
388 // mask is too small for the number of CPU's on the system (i.e. the
389 // system has more than 1024 CPUs). Allocate a mask large enough for
390 // twice as many CPUs.
391 cpu_set_t *DynAffinity;
392 DynAffinity = CPU_ALLOC(2048);
393 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
394 int NumCPUs = CPU_COUNT(DynAffinity);
395 CPU_FREE(DynAffinity);
396 return NumCPUs;
397 }
398 return -1;
399}
400#elif defined(__APPLE__)
401// Gets the number of *physical cores* on the machine.
402static int computeHostNumPhysicalCores() {
403 uint32_t count;
404 size_t len = sizeof(count);
405 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
406 if (count < 1) {
407 int nm[2];
408 nm[0] = CTL_HW;
409 nm[1] = HW_AVAILCPU;
410 sysctl(nm, 2, &count, &len, NULL, 0);
411 if (count < 1)
412 return -1;
413 }
414 return count;
415}
416#elif defined(__MVS__)
417static int computeHostNumPhysicalCores() {
418 enum {
419 // Byte offset of the pointer to the Communications Vector Table (CVT) in
420 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
421 // will be zero-extended to uintptr_t.
422 FLCCVT = 16,
423 // Byte offset of the pointer to the Common System Data Area (CSD) in the
424 // CVT. The table entry is a 31-bit pointer and will be zero-extended to
425 // uintptr_t.
426 CVTCSD = 660,
427 // Byte offset to the number of live CPs in the LPAR, stored as a signed
428 // 32-bit value in the table.
429 CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
430 };
431 char *PSA = 0;
432 char *CVT = reinterpret_cast<char *>(
433 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
434 char *CSD = reinterpret_cast<char *>(
435 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
436 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
437}
438#else
439// On other systems, return -1 to indicate unknown.
440static int computeHostNumPhysicalCores() { return -1; }
441#endif
442
443int llvm::get_physical_cores() {
444 static int NumCores = computeHostNumPhysicalCores();
445 return NumCores;
446}
447