Threading.inc source code [llvm_projects/llvm/lib/Support/Unix/Threading.inc]

1	//===- Unix/Threading.inc - Unix Threading Implementation ----- -- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file provides the Unix specific implementation of Threading functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "Unix.h"
14	#include "llvm/ADT/ScopeExit.h"
15	#include "llvm/ADT/SmallString.h"
16	#include "llvm/ADT/SmallVector.h"
17	#include "llvm/ADT/StringRef.h"
18	#include "llvm/ADT/Twine.h"
19	#include "llvm/Support/MemoryBuffer.h"
20	#include "llvm/Support/raw_ostream.h"
21
22	#if defined(__APPLE__)
23	#include <mach/mach_init.h>
24	#include <mach/mach_port.h>
25	#include <pthread/qos.h>
26	#include <sys/sysctl.h>
27	#include <sys/types.h>
28	#endif
29
30	#include <pthread.h>
31
32	#if defined(__FreeBSD__) \|\| defined(__OpenBSD__)
33	#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34	#endif
35
36	#if defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
37	#include <errno.h>
38	#include <sys/cpuset.h>
39	#include <sys/sysctl.h>
40	#include <sys/user.h>
41	#include <unistd.h>
42	#endif
43
44	#if defined(__NetBSD__)
45	#include <lwp.h> // For _lwp_self()
46	#endif
47
48	#if defined(__OpenBSD__)
49	#include <unistd.h> // For getthrid()
50	#endif
51
52	#if defined(__linux__)
53	#include <sched.h> // For sched_getaffinity
54	#include <sys/syscall.h> // For syscall codes
55	#include <unistd.h> // For syscall()
56	#endif
57
58	namespace llvm {
59	pthread_t
60	llvm_execute_on_thread_impl(void (ThreadFunc)(void ), void* *Arg,
61	std::optional<unsigned> StackSizeInBytes) {
62	int errnum;
63
64	// Construct the attributes object.
65	pthread_attr_t Attr;
66	if ((errnum = ::pthread_attr_init(attr: &Attr)) != `0`) {
67	ReportErrnumFatal(Msg: "pthread_attr_init failed", errnum);
68	}
69
70	auto AttrGuard = llvm::make_scope_exit(F: [&] {
71	if ((errnum = ::pthread_attr_destroy(attr: &Attr)) != `0`) {
72	ReportErrnumFatal(Msg: "pthread_attr_destroy failed", errnum);
73	}
74	});
75
76	// Set the requested stack size, if given.
77	if (StackSizeInBytes) {
78	if ((errnum = ::pthread_attr_setstacksize(attr: &Attr, stacksize: *StackSizeInBytes)) != `0`) {
79	ReportErrnumFatal(Msg: "pthread_attr_setstacksize failed", errnum);
80	}
81	}
82
83	// Construct and execute the thread.
84	pthread_t Thread;
85	if ((errnum = ::pthread_create(newthread: &Thread, attr: &Attr, start_routine: ThreadFunc, arg: Arg)) != `0`)
86	ReportErrnumFatal(Msg: "pthread_create failed", errnum);
87
88	return Thread;
89	}
90
91	void llvm_thread_detach_impl(pthread_t Thread) {
92	int errnum;
93
94	if ((errnum = ::pthread_detach(th: Thread)) != `0`) {
95	ReportErrnumFatal(Msg: "pthread_detach failed", errnum);
96	}
97	}
98
99	void llvm_thread_join_impl(pthread_t Thread) {
100	int errnum;
101
102	if ((errnum = ::pthread_join(th: Thread, thread_return: nullptr)) != `0`) {
103	ReportErrnumFatal(Msg: "pthread_join failed", errnum);
104	}
105	}
106
107	pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
108
109	pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
110
111	} // namespace llvm
112
113	uint64_t llvm::get_threadid() {
114	#if defined(__APPLE__)
115	// Calling "mach_thread_self()" bumps the reference count on the thread
116	// port, so we need to deallocate it. mach_task_self() doesn't bump the ref
117	// count.
118	static thread_local thread_port_t Self = [] {
119	thread_port_t InitSelf = mach_thread_self();
120	mach_port_deallocate(mach_task_self(), Self);
121	return InitSelf;
122	}();
123	return Self;
124	#elif defined(__FreeBSD__)
125	return uint64_t(pthread_getthreadid_np());
126	#elif defined(__NetBSD__)
127	return uint64_t(_lwp_self());
128	#elif defined(__OpenBSD__)
129	return uint64_t(getthrid());
130	#elif defined(__ANDROID__)
131	return uint64_t(gettid());
132	#elif defined(__linux__)
133	return uint64_t(syscall(SYS_gettid));
134	#else
135	return uint64_t(pthread_self());
136	#endif
137	}
138
139	static constexpr uint32_t get_max_thread_name_length_impl() {
140	#if defined(__NetBSD__)
141	return PTHREAD_MAX_NAMELEN_NP;
142	#elif defined(__APPLE__)
143	return `64`;
144	#elif defined(__linux__)
145	#if HAVE_PTHREAD_SETNAME_NP
146	return `16`;
147	#else
148	return `0`;
149	#endif
150	#elif defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
151	return `16`;
152	#elif defined(__OpenBSD__)
153	return `32`;
154	#else
155	return `0`;
156	#endif
157	}
158
159	uint32_t llvm::get_max_thread_name_length() {
160	return get_max_thread_name_length_impl();
161	}
162
163	void llvm::set_thread_name(const Twine &Name) {
164	// Make sure the input is null terminated.
165	SmallString<`64`> Storage;
166	StringRef NameStr = Name.toNullTerminatedStringRef(Out&: Storage);
167
168	// Truncate from the beginning, not the end, if the specified name is too
169	// long. For one, this ensures that the resulting string is still null
170	// terminated, but additionally the end of a long thread name will usually
171	// be more unique than the beginning, since a common pattern is for similar
172	// threads to share a common prefix.
173	// Note that the name length includes the null terminator.
174	if (get_max_thread_name_length() > `0`)
175	NameStr = NameStr.take_back(N: get_max_thread_name_length() - `1`);
176	(void)NameStr;
177	#if defined(__linux__)
178	#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) \|\| defined(__ANDROID__)
179	#if HAVE_PTHREAD_SETNAME_NP
180	::pthread_setname_np(target_thread: ::pthread_self(), name: NameStr.data());
181	#endif
182	#endif
183	#elif defined(__FreeBSD__) \|\| defined(__OpenBSD__)
184	::pthread_set_name_np(::pthread_self(), NameStr.data());
185	#elif defined(__NetBSD__)
186	::pthread_setname_np(::pthread_self(), "%s",
187	const_cast<char *>(NameStr.data()));
188	#elif defined(__APPLE__)
189	::pthread_setname_np(NameStr.data());
190	#endif
191	}
192
193	void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
194	Name.clear();
195
196	#if defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
197	int pid = ::getpid();
198	uint64_t tid = get_threadid();
199
200	struct kinfo_proc kp = nullptr, nkp;
201	size_t len = `0`;
202	int error;
203	int ctl[`4`] = {CTL_KERN, KERN_PROC, KERN_PROC_PID \| KERN_PROC_INC_THREAD,
204	(int)pid};
205
206	while (`1`) {
207	error = sysctl(ctl, `4`, kp, &len, nullptr, `0`);
208	if (kp == nullptr \|\| (error != `0` && errno == ENOMEM)) {
209	// Add extra space in case threads are added before next call.
210	len += sizeof(*kp) + len / `10`;
211	nkp = (struct kinfo_proc *)::realloc(kp, len);
212	if (nkp == nullptr) {
213	free(kp);
214	return;
215	}
216	kp = nkp;
217	continue;
218	}
219	if (error != `0`)
220	len = `0`;
221	break;
222	}
223
224	for (size_t i = `0`; i < len / sizeof(*kp); i++) {
225	if (kp[i].ki_tid == (lwpid_t)tid) {
226	Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
227	break;
228	}
229	}
230	free(kp);
231	return;
232	#elif defined(__NetBSD__)
233	constexpr uint32_t len = get_max_thread_name_length_impl();
234	char buf[len];
235	::pthread_getname_np(::pthread_self(), buf, len);
236
237	Name.append(buf, buf + strlen(buf));
238	#elif defined(__OpenBSD__)
239	constexpr uint32_t len = get_max_thread_name_length_impl();
240	char buf[len];
241	::pthread_get_name_np(::pthread_self(), buf, len);
242
243	Name.append(buf, buf + strlen(buf));
244	#elif defined(__linux__)
245	#if HAVE_PTHREAD_GETNAME_NP
246	constexpr uint32_t len = get_max_thread_name_length_impl();
247	char Buffer[len] = {`'\0'`}; // FIXME: working around MSan false positive.
248	if (`0` == ::pthread_getname_np(target_thread: ::pthread_self(), buf: Buffer, buflen: len))
249	Name.append(in_start: Buffer, in_end: Buffer + strlen(s: Buffer));
250	#endif
251	#endif
252	}
253
254	SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
255	#if defined(__linux__) && defined(SCHED_IDLE)
256	// Some really* old glibcs are missing SCHED_IDLE.*
257	// http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
258	// http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
259	sched_param priority;
260	// For each of the above policies, param->sched_priority must be 0.
261	priority.sched_priority = `0`;
262	// SCHED_IDLE for running very low priority background jobs.
263	// SCHED_OTHER the standard round-robin time-sharing policy;
264	return !pthread_setschedparam(
265	target_thread: pthread_self(),
266	// FIXME: consider SCHED_BATCH for Low
267	policy: Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
268	param: &priority)
269	? SetThreadPriorityResult::SUCCESS
270	: SetThreadPriorityResult::FAILURE;
271	#elif defined(__APPLE__)
272	// https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
273	//
274	// Background - Applies to work that isn’t visible to the user and may take
275	// significant time to complete. Examples include indexing, backing up, or
276	// synchronizing data. This class emphasizes energy efficiency.
277	//
278	// Utility - Applies to work that takes anywhere from a few seconds to a few
279	// minutes to complete. Examples include downloading a document or importing
280	// data. This class offers a balance between responsiveness, performance, and
281	// energy efficiency.
282	const auto qosClass = [&]() {
283	switch (Priority) {
284	case ThreadPriority::Background:
285	return QOS_CLASS_BACKGROUND;
286	case ThreadPriority::Low:
287	return QOS_CLASS_UTILITY;
288	case ThreadPriority::Default:
289	return QOS_CLASS_DEFAULT;
290	}
291	}();
292	return !pthread_set_qos_class_self_np(qosClass, `0`)
293	? SetThreadPriorityResult::SUCCESS
294	: SetThreadPriorityResult::FAILURE;
295	#endif
296	return SetThreadPriorityResult::FAILURE;
297	}
298
299	#include <thread>
300
301	static int computeHostNumHardwareThreads() {
302	#if defined(__FreeBSD__)
303	cpuset_t mask;
304	CPU_ZERO(&mask);
305	if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -`1`, sizeof(mask),
306	&mask) == `0`)
307	return CPU_COUNT(&mask);
308	#elif defined(__linux__)
309	cpu_set_t Set;
310	if (sched_getaffinity(pid: `0`, cpusetsize: sizeof(Set), cpuset: &Set) == `0`)
311	return CPU_COUNT(&Set);
312	#endif
313	// Guard against std::thread::hardware_concurrency() returning 0.
314	if (unsigned Val = std::thread::hardware_concurrency())
315	return Val;
316	return `1`;
317	}
318
319	void llvm::ThreadPoolStrategy::apply_thread_strategy(
320	unsigned ThreadPoolNum) const {}
321
322	llvm::BitVector llvm::get_thread_affinity_mask() {
323	// FIXME: Implement
324	llvm_unreachable("Not implemented!");
325	}
326
327	unsigned llvm::get_cpus() { return `1`; }
328
329	#if defined(__linux__) && (defined(__i386__) \|\| defined(__x86_64__))
330	// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
331	// using the number of unique physical/core id pairs. The following
332	// implementation reads the /proc/cpuinfo format on an x86_64 system.
333	static int computeHostNumPhysicalCores() {
334	// Enabled represents the number of physical id/core id pairs with at least
335	// one processor id enabled by the CPU affinity mask.
336	cpu_set_t Affinity, Enabled;
337	if (sched_getaffinity(pid: `0`, cpusetsize: sizeof(Affinity), cpuset: &Affinity) != `0`)
338	return -`1`;
339	CPU_ZERO(&Enabled);
340
341	// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
342	// mmapped because it appears to have 0 size.
343	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
344	llvm::MemoryBuffer::getFileAsStream(Filename: "/proc/cpuinfo");
345	if (std::error_code EC = Text.getError()) {
346	llvm::errs() << "Can't read "
347	<< "/proc/cpuinfo: " << EC.message() << "\n";
348	return -`1`;
349	}
350	SmallVector<StringRef, `8`> strs;
351	(Text)->getBuffer().split(A&: strs, Separator: "\n", /MaxSplit=/*MaxSplit: -`1`,
352	/KeepEmpty=/KeepEmpty: false);
353	int CurProcessor = -`1`;
354	int CurPhysicalId = -`1`;
355	int CurSiblings = -`1`;
356	int CurCoreId = -`1`;
357	for (StringRef Line : strs) {
358	std::pair<StringRef, StringRef> Data = Line.split(Separator: `':'`);
359	auto Name = Data.first.trim();
360	auto Val = Data.second.trim();
361	// These fields are available if the kernel is configured with CONFIG_SMP.
362	if (Name == "processor")
363	Val.getAsInteger(Radix: `10`, Result&: CurProcessor);
364	else if (Name == "physical id")
365	Val.getAsInteger(Radix: `10`, Result&: CurPhysicalId);
366	else if (Name == "siblings")
367	Val.getAsInteger(Radix: `10`, Result&: CurSiblings);
368	else if (Name == "core id") {
369	Val.getAsInteger(Radix: `10`, Result&: CurCoreId);
370	// The processor id corresponds to an index into cpu_set_t.
371	if (CPU_ISSET(CurProcessor, &Affinity))
372	CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
373	}
374	}
375	return CPU_COUNT(&Enabled);
376	}
377	#elif (defined(__linux__) && defined(__s390x__)) \|\| defined(_AIX)
378	static int computeHostNumPhysicalCores() {
379	return sysconf(_SC_NPROCESSORS_ONLN);
380	}
381	#elif defined(__linux__) && !defined(__ANDROID__)
382	static int computeHostNumPhysicalCores() {
383	cpu_set_t Affinity;
384	if (sched_getaffinity(`0`, sizeof(Affinity), &Affinity) == `0`)
385	return CPU_COUNT(&Affinity);
386
387	// The call to sched_getaffinity() may have failed because the Affinity
388	// mask is too small for the number of CPU's on the system (i.e. the
389	// system has more than 1024 CPUs). Allocate a mask large enough for
390	// twice as many CPUs.
391	cpu_set_t *DynAffinity;
392	DynAffinity = CPU_ALLOC(`2048`);
393	if (sched_getaffinity(`0`, CPU_ALLOC_SIZE(`2048`), DynAffinity) == `0`) {
394	int NumCPUs = CPU_COUNT(DynAffinity);
395	CPU_FREE(DynAffinity);
396	return NumCPUs;
397	}
398	return -`1`;
399	}
400	#elif defined(__APPLE__)
401	// Gets the number of physical cores* on the machine.*
402	static int computeHostNumPhysicalCores() {
403	uint32_t count;
404	size_t len = sizeof(count);
405	sysctlbyname("hw.physicalcpu", &count, &len, NULL, `0`);
406	if (count < `1`) {
407	int nm[`2`];
408	nm[`0`] = CTL_HW;
409	nm[`1`] = HW_AVAILCPU;
410	sysctl(nm, `2`, &count, &len, NULL, `0`);
411	if (count < `1`)
412	return -`1`;
413	}
414	return count;
415	}
416	#elif defined(__MVS__)
417	static int computeHostNumPhysicalCores() {
418	enum {
419	// Byte offset of the pointer to the Communications Vector Table (CVT) in
420	// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
421	// will be zero-extended to uintptr_t.
422	FLCCVT = `16`,
423	// Byte offset of the pointer to the Common System Data Area (CSD) in the
424	// CVT. The table entry is a 31-bit pointer and will be zero-extended to
425	// uintptr_t.
426	CVTCSD = `660`,
427	// Byte offset to the number of live CPs in the LPAR, stored as a signed
428	// 32-bit value in the table.
429	CSD_NUMBER_ONLINE_STANDARD_CPS = `264`,
430	};
431	char *PSA = `0`;
432	char CVT = reinterpret_cast<char* *>(
433	static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
434	char CSD = reinterpret_cast<char* *>(
435	static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
436	return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
437	}
438	#else
439	// On other systems, return -1 to indicate unknown.
440	static int computeHostNumPhysicalCores() { return -`1`; }
441	#endif
442
443	int llvm::get_physical_cores() {
444	static int NumCores = computeHostNumPhysicalCores();
445	return NumCores;
446	}
447

Browse the source code of llvm_projects/llvm/lib/Support/Unix/Threading.inc