1 | //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides the Unix specific implementation of Threading functions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Unix.h" |
14 | #include "llvm/ADT/ScopeExit.h" |
15 | #include "llvm/ADT/SmallString.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Support/MemoryBuffer.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | #if defined(__APPLE__) |
23 | #include <mach/mach_init.h> |
24 | #include <mach/mach_port.h> |
25 | #include <pthread/qos.h> |
26 | #include <sys/sysctl.h> |
27 | #include <sys/types.h> |
28 | #endif |
29 | |
30 | #include <pthread.h> |
31 | |
32 | #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) |
33 | #include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np() |
34 | #endif |
35 | |
36 | // Must be included after Threading.inc to provide definition for llvm::thread |
37 | // because FreeBSD's condvar.h (included by user.h) misuses the "thread" |
38 | // keyword. |
39 | #ifndef __FreeBSD__ |
40 | #include "llvm/Support/thread.h" |
41 | #endif |
42 | |
43 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
44 | #include <errno.h> |
45 | #include <sys/cpuset.h> |
46 | #include <sys/sysctl.h> |
47 | #include <sys/user.h> |
48 | #include <unistd.h> |
49 | #endif |
50 | |
51 | #if defined(__NetBSD__) |
52 | #include <lwp.h> // For _lwp_self() |
53 | #endif |
54 | |
55 | #if defined(__OpenBSD__) |
56 | #include <unistd.h> // For getthrid() |
57 | #endif |
58 | |
59 | #if defined(__linux__) |
60 | #include <sched.h> // For sched_getaffinity |
61 | #include <sys/syscall.h> // For syscall codes |
62 | #include <unistd.h> // For syscall() |
63 | #endif |
64 | |
65 | #if defined(__CYGWIN__) |
66 | #include <sys/cpuset.h> |
67 | #endif |
68 | |
69 | #if defined(__HAIKU__) |
70 | #include <OS.h> // For B_OS_NAME_LENGTH |
71 | #endif |
72 | |
73 | namespace llvm { |
74 | pthread_t |
75 | llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg, |
76 | std::optional<unsigned> StackSizeInBytes) { |
77 | int errnum; |
78 | |
79 | // Construct the attributes object. |
80 | pthread_attr_t Attr; |
81 | if ((errnum = ::pthread_attr_init(attr: &Attr)) != 0) { |
82 | ReportErrnumFatal(Msg: "pthread_attr_init failed" , errnum); |
83 | } |
84 | |
85 | auto AttrGuard = llvm::make_scope_exit(F: [&] { |
86 | if ((errnum = ::pthread_attr_destroy(attr: &Attr)) != 0) { |
87 | ReportErrnumFatal(Msg: "pthread_attr_destroy failed" , errnum); |
88 | } |
89 | }); |
90 | |
91 | // Set the requested stack size, if given. |
92 | if (StackSizeInBytes) { |
93 | if ((errnum = ::pthread_attr_setstacksize(attr: &Attr, stacksize: *StackSizeInBytes)) != 0) { |
94 | ReportErrnumFatal(Msg: "pthread_attr_setstacksize failed" , errnum); |
95 | } |
96 | } |
97 | |
98 | // Construct and execute the thread. |
99 | pthread_t Thread; |
100 | if ((errnum = ::pthread_create(newthread: &Thread, attr: &Attr, start_routine: ThreadFunc, arg: Arg)) != 0) |
101 | ReportErrnumFatal(Msg: "pthread_create failed" , errnum); |
102 | |
103 | return Thread; |
104 | } |
105 | |
106 | void llvm_thread_detach_impl(pthread_t Thread) { |
107 | int errnum; |
108 | |
109 | if ((errnum = ::pthread_detach(th: Thread)) != 0) { |
110 | ReportErrnumFatal(Msg: "pthread_detach failed" , errnum); |
111 | } |
112 | } |
113 | |
114 | void llvm_thread_join_impl(pthread_t Thread) { |
115 | int errnum; |
116 | |
117 | if ((errnum = ::pthread_join(th: Thread, thread_return: nullptr)) != 0) { |
118 | ReportErrnumFatal(Msg: "pthread_join failed" , errnum); |
119 | } |
120 | } |
121 | |
122 | pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; } |
123 | |
124 | pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); } |
125 | |
126 | } // namespace llvm |
127 | |
128 | uint64_t llvm::get_threadid() { |
129 | #if defined(__APPLE__) |
130 | // Calling "mach_thread_self()" bumps the reference count on the thread |
131 | // port, so we need to deallocate it. mach_task_self() doesn't bump the ref |
132 | // count. |
133 | static thread_local thread_port_t Self = [] { |
134 | thread_port_t InitSelf = mach_thread_self(); |
135 | mach_port_deallocate(mach_task_self(), Self); |
136 | return InitSelf; |
137 | }(); |
138 | return Self; |
139 | #elif defined(__FreeBSD__) || defined(__DragonFly__) |
140 | return uint64_t(pthread_getthreadid_np()); |
141 | #elif defined(__NetBSD__) |
142 | return uint64_t(_lwp_self()); |
143 | #elif defined(__OpenBSD__) |
144 | return uint64_t(getthrid()); |
145 | #elif defined(__ANDROID__) |
146 | return uint64_t(gettid()); |
147 | #elif defined(__linux__) |
148 | return uint64_t(syscall(__NR_gettid)); |
149 | #elif defined(_AIX) |
150 | return uint64_t(thread_self()); |
151 | #else |
152 | return uint64_t(pthread_self()); |
153 | #endif |
154 | } |
155 | |
156 | static constexpr uint32_t get_max_thread_name_length_impl() { |
157 | #if defined(PTHREAD_MAX_NAMELEN_NP) |
158 | return PTHREAD_MAX_NAMELEN_NP; |
159 | #elif defined(__HAIKU__) |
160 | return B_OS_NAME_LENGTH; |
161 | #elif defined(__APPLE__) |
162 | return 64; |
163 | #elif defined(__sun__) && defined(__svr4__) |
164 | return 31; |
165 | #elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP |
166 | return 16; |
167 | #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ |
168 | defined(__DragonFly__) |
169 | return 16; |
170 | #elif defined(__OpenBSD__) |
171 | return 24; |
172 | #elif defined(__CYGWIN__) |
173 | return 16; |
174 | #else |
175 | return 0; |
176 | #endif |
177 | } |
178 | |
179 | uint32_t llvm::get_max_thread_name_length() { |
180 | return get_max_thread_name_length_impl(); |
181 | } |
182 | |
183 | void llvm::set_thread_name(const Twine &Name) { |
184 | // Make sure the input is null terminated. |
185 | SmallString<64> Storage; |
186 | StringRef NameStr = Name.toNullTerminatedStringRef(Out&: Storage); |
187 | |
188 | // Truncate from the beginning, not the end, if the specified name is too |
189 | // long. For one, this ensures that the resulting string is still null |
190 | // terminated, but additionally the end of a long thread name will usually |
191 | // be more unique than the beginning, since a common pattern is for similar |
192 | // threads to share a common prefix. |
193 | // Note that the name length includes the null terminator. |
194 | if (get_max_thread_name_length() > 0) |
195 | NameStr = NameStr.take_back(N: get_max_thread_name_length() - 1); |
196 | (void)NameStr; |
197 | #if defined(HAVE_PTHREAD_SET_NAME_NP) |
198 | ::pthread_set_name_np(::pthread_self(), NameStr.data()); |
199 | #elif defined(HAVE_PTHREAD_SETNAME_NP) |
200 | #if defined(__NetBSD__) |
201 | ::pthread_setname_np(::pthread_self(), "%s" , |
202 | const_cast<char *>(NameStr.data())); |
203 | #elif defined(__APPLE__) |
204 | ::pthread_setname_np(NameStr.data()); |
205 | #else |
206 | ::pthread_setname_np(target_thread: ::pthread_self(), name: NameStr.data()); |
207 | #endif |
208 | #endif |
209 | } |
210 | |
211 | void llvm::get_thread_name(SmallVectorImpl<char> &Name) { |
212 | Name.clear(); |
213 | |
214 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
215 | int pid = ::getpid(); |
216 | uint64_t tid = get_threadid(); |
217 | |
218 | struct kinfo_proc *kp = nullptr, *nkp; |
219 | size_t len = 0; |
220 | int error; |
221 | int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD, |
222 | (int)pid}; |
223 | |
224 | while (1) { |
225 | error = sysctl(ctl, 4, kp, &len, nullptr, 0); |
226 | if (kp == nullptr || (error != 0 && errno == ENOMEM)) { |
227 | // Add extra space in case threads are added before next call. |
228 | len += sizeof(*kp) + len / 10; |
229 | nkp = (struct kinfo_proc *)::realloc(kp, len); |
230 | if (nkp == nullptr) { |
231 | free(kp); |
232 | return; |
233 | } |
234 | kp = nkp; |
235 | continue; |
236 | } |
237 | if (error != 0) |
238 | len = 0; |
239 | break; |
240 | } |
241 | |
242 | for (size_t i = 0; i < len / sizeof(*kp); i++) { |
243 | if (kp[i].ki_tid == (lwpid_t)tid) { |
244 | Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname)); |
245 | break; |
246 | } |
247 | } |
248 | free(kp); |
249 | return; |
250 | #elif (defined(__linux__) || defined(__CYGWIN__)) && HAVE_PTHREAD_GETNAME_NP |
251 | constexpr uint32_t len = get_max_thread_name_length_impl(); |
252 | char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. |
253 | if (0 == ::pthread_getname_np(target_thread: ::pthread_self(), buf: Buffer, buflen: len)) |
254 | Name.append(in_start: Buffer, in_end: Buffer + strlen(s: Buffer)); |
255 | #elif defined(HAVE_PTHREAD_GET_NAME_NP) && HAVE_PTHREAD_GET_NAME_NP |
256 | constexpr uint32_t len = get_max_thread_name_length_impl(); |
257 | char buf[len]; |
258 | ::pthread_get_name_np(::pthread_self(), buf, len); |
259 | |
260 | Name.append(buf, buf + strlen(buf)); |
261 | |
262 | #elif defined(HAVE_PTHREAD_GETNAME_NP) && HAVE_PTHREAD_GETNAME_NP |
263 | constexpr uint32_t len = get_max_thread_name_length_impl(); |
264 | char buf[len]; |
265 | ::pthread_getname_np(::pthread_self(), buf, len); |
266 | |
267 | Name.append(buf, buf + strlen(buf)); |
268 | #endif |
269 | } |
270 | |
271 | SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { |
272 | #if (defined(__linux__) || defined(__CYGWIN__)) && defined(SCHED_IDLE) |
273 | // Some *really* old glibcs are missing SCHED_IDLE. |
274 | // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html |
275 | // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html |
276 | sched_param priority; |
277 | // For each of the above policies, param->sched_priority must be 0. |
278 | priority.sched_priority = 0; |
279 | // SCHED_IDLE for running very low priority background jobs. |
280 | // SCHED_OTHER the standard round-robin time-sharing policy; |
281 | return !pthread_setschedparam( |
282 | target_thread: pthread_self(), |
283 | // FIXME: consider SCHED_BATCH for Low |
284 | policy: Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE, |
285 | param: &priority) |
286 | ? SetThreadPriorityResult::SUCCESS |
287 | : SetThreadPriorityResult::FAILURE; |
288 | #elif defined(__APPLE__) |
289 | // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon |
290 | // |
291 | // Background - Applies to work that isn’t visible to the user and may take |
292 | // significant time to complete. Examples include indexing, backing up, or |
293 | // synchronizing data. This class emphasizes energy efficiency. |
294 | // |
295 | // Utility - Applies to work that takes anywhere from a few seconds to a few |
296 | // minutes to complete. Examples include downloading a document or importing |
297 | // data. This class offers a balance between responsiveness, performance, and |
298 | // energy efficiency. |
299 | const auto qosClass = [&]() { |
300 | switch (Priority) { |
301 | case ThreadPriority::Background: |
302 | return QOS_CLASS_BACKGROUND; |
303 | case ThreadPriority::Low: |
304 | return QOS_CLASS_UTILITY; |
305 | case ThreadPriority::Default: |
306 | return QOS_CLASS_DEFAULT; |
307 | } |
308 | }(); |
309 | return !pthread_set_qos_class_self_np(qosClass, 0) |
310 | ? SetThreadPriorityResult::SUCCESS |
311 | : SetThreadPriorityResult::FAILURE; |
312 | #endif |
313 | return SetThreadPriorityResult::FAILURE; |
314 | } |
315 | |
316 | #include <thread> |
317 | |
318 | static int computeHostNumHardwareThreads() { |
319 | #if defined(__FreeBSD__) |
320 | cpuset_t mask; |
321 | CPU_ZERO(&mask); |
322 | if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask), |
323 | &mask) == 0) |
324 | return CPU_COUNT(&mask); |
325 | #elif (defined(__linux__) || defined(__CYGWIN__)) |
326 | cpu_set_t Set; |
327 | CPU_ZERO(&Set); |
328 | if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Set), cpuset: &Set) == 0) |
329 | return CPU_COUNT(&Set); |
330 | #endif |
331 | // Guard against std::thread::hardware_concurrency() returning 0. |
332 | if (unsigned Val = std::thread::hardware_concurrency()) |
333 | return Val; |
334 | return 1; |
335 | } |
336 | |
337 | void llvm::ThreadPoolStrategy::apply_thread_strategy( |
338 | unsigned ThreadPoolNum) const {} |
339 | |
340 | llvm::BitVector llvm::get_thread_affinity_mask() { |
341 | // FIXME: Implement |
342 | llvm_unreachable("Not implemented!" ); |
343 | } |
344 | |
345 | unsigned llvm::get_cpus() { return 1; } |
346 | |
347 | #if (defined(__linux__) || defined(__CYGWIN__)) && \ |
348 | (defined(__i386__) || defined(__x86_64__)) |
349 | // On Linux, the number of physical cores can be computed from /proc/cpuinfo, |
350 | // using the number of unique physical/core id pairs. The following |
351 | // implementation reads the /proc/cpuinfo format on an x86_64 system. |
352 | static int computeHostNumPhysicalCores() { |
353 | // Enabled represents the number of physical id/core id pairs with at least |
354 | // one processor id enabled by the CPU affinity mask. |
355 | cpu_set_t Affinity, Enabled; |
356 | if (sched_getaffinity(pid: 0, cpusetsize: sizeof(Affinity), cpuset: &Affinity) != 0) |
357 | return -1; |
358 | CPU_ZERO(&Enabled); |
359 | |
360 | // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be |
361 | // mmapped because it appears to have 0 size. |
362 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
363 | llvm::MemoryBuffer::getFileAsStream(Filename: "/proc/cpuinfo" ); |
364 | if (std::error_code EC = Text.getError()) { |
365 | llvm::errs() << "Can't read " |
366 | << "/proc/cpuinfo: " << EC.message() << "\n" ; |
367 | return -1; |
368 | } |
369 | SmallVector<StringRef, 8> strs; |
370 | (*Text)->getBuffer().split(A&: strs, Separator: "\n" , /*MaxSplit=*/MaxSplit: -1, |
371 | /*KeepEmpty=*/KeepEmpty: false); |
372 | int CurProcessor = -1; |
373 | int CurPhysicalId = -1; |
374 | int CurSiblings = -1; |
375 | int CurCoreId = -1; |
376 | for (StringRef Line : strs) { |
377 | std::pair<StringRef, StringRef> Data = Line.split(Separator: ':'); |
378 | auto Name = Data.first.trim(); |
379 | auto Val = Data.second.trim(); |
380 | // These fields are available if the kernel is configured with CONFIG_SMP. |
381 | if (Name == "processor" ) |
382 | Val.getAsInteger(Radix: 10, Result&: CurProcessor); |
383 | else if (Name == "physical id" ) |
384 | Val.getAsInteger(Radix: 10, Result&: CurPhysicalId); |
385 | else if (Name == "siblings" ) |
386 | Val.getAsInteger(Radix: 10, Result&: CurSiblings); |
387 | else if (Name == "core id" ) { |
388 | Val.getAsInteger(Radix: 10, Result&: CurCoreId); |
389 | // The processor id corresponds to an index into cpu_set_t. |
390 | if (CPU_ISSET(CurProcessor, &Affinity)) |
391 | CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); |
392 | } |
393 | } |
394 | return CPU_COUNT(&Enabled); |
395 | } |
396 | #elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX) |
397 | static int computeHostNumPhysicalCores() { |
398 | return sysconf(_SC_NPROCESSORS_ONLN); |
399 | } |
400 | #elif defined(__linux__) |
401 | static int computeHostNumPhysicalCores() { |
402 | cpu_set_t Affinity; |
403 | if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) |
404 | return CPU_COUNT(&Affinity); |
405 | |
406 | // The call to sched_getaffinity() may have failed because the Affinity |
407 | // mask is too small for the number of CPU's on the system (i.e. the |
408 | // system has more than 1024 CPUs). Allocate a mask large enough for |
409 | // twice as many CPUs. |
410 | cpu_set_t *DynAffinity; |
411 | DynAffinity = CPU_ALLOC(2048); |
412 | if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { |
413 | int NumCPUs = CPU_COUNT(DynAffinity); |
414 | CPU_FREE(DynAffinity); |
415 | return NumCPUs; |
416 | } |
417 | return -1; |
418 | } |
419 | #elif defined(__APPLE__) |
420 | // Gets the number of *physical cores* on the machine. |
421 | static int computeHostNumPhysicalCores() { |
422 | uint32_t count; |
423 | size_t len = sizeof(count); |
424 | sysctlbyname("hw.physicalcpu" , &count, &len, NULL, 0); |
425 | if (count < 1) { |
426 | int nm[2]; |
427 | nm[0] = CTL_HW; |
428 | nm[1] = HW_AVAILCPU; |
429 | sysctl(nm, 2, &count, &len, NULL, 0); |
430 | if (count < 1) |
431 | return -1; |
432 | } |
433 | return count; |
434 | } |
435 | #elif defined(__MVS__) |
436 | static int computeHostNumPhysicalCores() { |
437 | enum { |
438 | // Byte offset of the pointer to the Communications Vector Table (CVT) in |
439 | // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and |
440 | // will be zero-extended to uintptr_t. |
441 | FLCCVT = 16, |
442 | // Byte offset of the pointer to the Common System Data Area (CSD) in the |
443 | // CVT. The table entry is a 31-bit pointer and will be zero-extended to |
444 | // uintptr_t. |
445 | CVTCSD = 660, |
446 | // Byte offset to the number of live CPs in the LPAR, stored as a signed |
447 | // 32-bit value in the table. |
448 | CSD_NUMBER_ONLINE_STANDARD_CPS = 264, |
449 | }; |
450 | char *PSA = 0; |
451 | char *CVT = reinterpret_cast<char *>( |
452 | static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); |
453 | char *CSD = reinterpret_cast<char *>( |
454 | static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); |
455 | return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); |
456 | } |
457 | #else |
458 | // On other systems, return -1 to indicate unknown. |
459 | static int computeHostNumPhysicalCores() { return -1; } |
460 | #endif |
461 | |
462 | int llvm::get_physical_cores() { |
463 | static int NumCores = computeHostNumPhysicalCores(); |
464 | return NumCores; |
465 | } |
466 | |