BenchmarkRunner.cpp source code [llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp]

1	//===-- BenchmarkRunner.cpp -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "BenchmarkRunner.h"
10	#include "Assembler.h"
11	#include "Error.h"
12	#include "MCInstrDescView.h"
13	#include "MmapUtils.h"
14	#include "PerfHelper.h"
15	#include "SubprocessMemory.h"
16	#include "Target.h"
17	#include "llvm/ADT/ScopeExit.h"
18	#include "llvm/ADT/StringExtras.h"
19	#include "llvm/ADT/StringRef.h"
20	#include "llvm/ADT/Twine.h"
21	#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22	#include "llvm/Support/CrashRecoveryContext.h"
23	#include "llvm/Support/Error.h"
24	#include "llvm/Support/FileSystem.h"
25	#include "llvm/Support/MemoryBuffer.h"
26	#include "llvm/Support/Program.h"
27	#include "llvm/Support/Signals.h"
28	#include "llvm/Support/SystemZ/zOSSupport.h"
29	#include <cmath>
30	#include <memory>
31	#include <string>
32
33	#ifdef __linux__
34	#ifdef HAVE_LIBPFM
35	#include <perfmon/perf_event.h>
36	#endif
37	#include <sys/mman.h>
38	#include <sys/ptrace.h>
39	#include <sys/resource.h>
40	#include <sys/socket.h>
41	#include <sys/syscall.h>
42	#include <sys/wait.h>
43	#include <unistd.h>
44
45	#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46	#include <sys/rseq.h>
47	#if defined(RSEQ_SIG) && defined(SYS_rseq)
48	#define GLIBC_INITS_RSEQ
49	#endif
50	#endif
51	#endif // __linux__
52
53	namespace llvm {
54	namespace exegesis {
55
56	BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57	BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58	ExecutionModeE ExecutionMode,
59	ArrayRef<ValidationEvent> ValCounters)
60	: State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61	ExecutionMode(ExecutionMode), ValidationCounters (ValCounters),
62	Scratch(std::make_unique<ScratchSpace>()) {}
63
64	BenchmarkRunner::~BenchmarkRunner() = default;
65
66	void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67	const SmallVectorImpl<int64_t> &NewValues,
68	SmallVectorImpl<int64_t> *Result) {
69	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
70	if (NumValues > Result->size())
71	Result->resize(N: NumValues, NV: `0`);
72	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
73	(*Result)[I] += NewValues [I];
74	}
75
76	Expected<SmallVector<int64_t, `4`>>
77	BenchmarkRunner::FunctionExecutor::runAndSample(
78	const char Counters, ArrayRef<const* char *> ValidationCounters,
79	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80	// We sum counts when there are several counters for a single ProcRes
81	// (e.g. P23 on SandyBridge).
82	SmallVector<int64_t, `4`> CounterValues;
83	SmallVector<StringRef, `2`> CounterNames;
84	StringRef (Counters).split(A&: CounterNames, Separator: `'+'`);
85	for (auto &CounterName : CounterNames) {
86	CounterName = CounterName.trim();
87	Expected<SmallVector<int64_t, `4`>> ValueOrError = runWithCounter(
88	CounterName, ValidationCounters, ValidationCounterValues);
89	if (!ValueOrError)
90	return ValueOrError.takeError();
91	accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
92	}
93	return CounterValues;
94	}
95
96	namespace {
97	class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98	public:
99	static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101	BenchmarkRunner::ScratchSpace *Scratch,
102	std::optional<int> BenchmarkProcessCPU) {
103	Expected<ExecutableFunction> EF =
104	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
105
106	if (!EF)
107	return EF.takeError();
108
109	return std::unique_ptr<InProcessFunctionExecutorImpl>(
110	new InProcessFunctionExecutorImpl (State, std::move(*EF), Scratch));
111	}
112
113	private:
114	InProcessFunctionExecutorImpl(const LLVMState &State,
115	ExecutableFunction Function,
116	BenchmarkRunner::ScratchSpace *Scratch)
117	: State(State), Function (std::move(Function)), Scratch(Scratch) {}
118
119	static void accumulateCounterValues(const SmallVector<int64_t, `4`> &NewValues,
120	SmallVector<int64_t, `4`> *Result) {
121	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
122	if (NumValues > Result->size())
123	Result->resize(N: NumValues, NV: `0`);
124	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
125	(*Result)[I] += NewValues [I];
126	}
127
128	Expected<SmallVector<int64_t, `4`>> runWithCounter(
129	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
130	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
131	const ExegesisTarget &ET = State.getExegesisTarget();
132	char *const ScratchPtr = Scratch->ptr();
133	auto CounterOrError =
134	ET.createCounter(CounterName, State, ValidationCounters);
135
136	if (!CounterOrError)
137	return CounterOrError.takeError();
138
139	pfm::CounterGroup *Counter = CounterOrError.get().get();
140	Scratch->clear();
141	{
142	auto PS = ET.withSavedState();
143	CrashRecoveryContext CRC;
144	CrashRecoveryContext::Enable();
145	const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
146	Counter->start();
147	this->Function (ScratchPtr);
148	Counter->stop();
149	});
150	CrashRecoveryContext::Disable();
151	PS.reset();
152	if (Crashed) {
153	#ifdef LLVM_ON_UNIX
154	// See "Exit Status for Commands":
155	// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
156	constexpr const int kSigOffset = `128`;
157	return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
158	#else
159	// The exit code of the process on windows is not meaningful as a
160	// signal, so simply pass in -1 as the signal into the error.
161	return make_error<SnippetSignal>(-`1`);
162	#endif // LLVM_ON_UNIX
163	}
164	}
165
166	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
167	if (!ValidationValuesOrErr)
168	return ValidationValuesOrErr.takeError();
169
170	ArrayRef RealValidationValues = *ValidationValuesOrErr;
171	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
172	ValidationCounterValues [I] = RealValidationValues [I];
173
174	return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
175	}
176
177	const LLVMState &State;
178	const ExecutableFunction Function;
179	BenchmarkRunner::ScratchSpace *const Scratch;
180	};
181
182	#ifdef __linux__
183	// The following class implements a function executor that executes the
184	// benchmark code within a subprocess rather than within the main llvm-exegesis
185	// process. This allows for much more control over the execution context of the
186	// snippet, particularly with regard to memory. This class performs all the
187	// necessary functions to create the subprocess, execute the snippet in the
188	// subprocess, and report results/handle errors.
189	class SubProcessFunctionExecutorImpl
190	: public BenchmarkRunner::FunctionExecutor {
191	public:
192	static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
193	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
194	const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
195	Expected<ExecutableFunction> EF =
196	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
197	if (!EF)
198	return EF.takeError();
199
200	return std::unique_ptr<SubProcessFunctionExecutorImpl>(
201	new SubProcessFunctionExecutorImpl (State, std::move(*EF), Key,
202	BenchmarkProcessCPU));
203	}
204
205	private:
206	SubProcessFunctionExecutorImpl(const LLVMState &State,
207	ExecutableFunction Function,
208	const BenchmarkKey &Key,
209	std::optional<int> BenchmarkCPU)
210	: State(State), Function (std::move(Function)), Key(Key),
211	BenchmarkProcessCPU (BenchmarkCPU) {}
212
213	enum ChildProcessExitCodeE {
214	CounterFDReadFailed = `1`,
215	RSeqDisableFailed,
216	FunctionDataMappingFailed,
217	AuxiliaryMemorySetupFailed,
218	SetCPUAffinityFailed
219	};
220
221	StringRef childProcessExitCodeToString(int ExitCode) const {
222	switch (ExitCode) {
223	case ChildProcessExitCodeE::CounterFDReadFailed:
224	return "Counter file descriptor read failed";
225	case ChildProcessExitCodeE::RSeqDisableFailed:
226	return "Disabling restartable sequences failed";
227	case ChildProcessExitCodeE::FunctionDataMappingFailed:
228	return "Failed to map memory for assembled snippet";
229	case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
230	return "Failed to setup auxiliary memory";
231	case ChildProcessExitCodeE::SetCPUAffinityFailed:
232	return "Failed to set CPU affinity of the benchmarking process";
233	default:
234	return "Child process returned with unknown exit code";
235	}
236	}
237
238	Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
239	struct msghdr Message = {};
240	char Buffer[CMSG_SPACE(sizeof(FD))];
241	memset(s: Buffer, c: `0`, n: sizeof(Buffer));
242	Message.msg_control = Buffer;
243	Message.msg_controllen = sizeof(Buffer);
244
245	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
246	ControlMessage->cmsg_level = SOL_SOCKET;
247	ControlMessage->cmsg_type = SCM_RIGHTS;
248	ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
249
250	memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
251
252	Message.msg_controllen = CMSG_SPACE(sizeof(FD));
253
254	ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: `0`);
255
256	if (BytesWritten < `0`)
257	return make_error<Failure>(Args: "Failed to write FD to socket: " +
258	Twine (strerror(errno)));
259
260	return Error::success();
261	}
262
263	Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
264	struct msghdr Message = {};
265
266	char ControlBuffer[`256`];
267	Message.msg_control = ControlBuffer;
268	Message.msg_controllen = sizeof(ControlBuffer);
269
270	ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: `0`);
271
272	if (BytesRead < `0`)
273	return make_error<Failure>(Args: "Failed to read FD from socket: " +
274	Twine (strerror(errno)));
275
276	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
277
278	int FD;
279
280	if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
281	return make_error<Failure>(Args: "Failed to get correct number of bytes for "
282	"file descriptor from socket.");
283
284	memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
285
286	return FD;
287	}
288
289	Error
290	runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
291	SmallVectorImpl<int64_t> &CounterValues,
292	ArrayRef<const char *> ValidationCounters,
293	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
294	auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); });
295	const ExegesisTarget &ET = State.getExegesisTarget();
296	auto CounterOrError =
297	ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
298
299	if (!CounterOrError)
300	return CounterOrError.takeError();
301
302	pfm::CounterGroup *Counter = CounterOrError.get().get();
303
304	// Make sure to attach to the process (and wait for the sigstop to be
305	// delivered and for the process to continue) before we write to the counter
306	// file descriptor. Attaching to the process before writing to the socket
307	// ensures that the subprocess at most has blocked on the read call. If we
308	// attach afterwards, the subprocess might exit before we get to the attach
309	// call due to effects like scheduler contention, introducing transient
310	// failures.
311	if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != `0`)
312	return make_error<Failure>(Args: "Failed to attach to the child process: " +
313	Twine (strerror(errno)));
314
315	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`) {
316	return make_error<Failure>(
317	Args: "Failed to wait for child process to stop after attaching: " +
318	Twine (strerror(errno)));
319	}
320
321	if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != `0`)
322	return make_error<Failure>(
323	Args: "Failed to continue execution of the child process: " +
324	Twine (strerror(errno)));
325
326	int CounterFileDescriptor = Counter->getFileDescriptor();
327	Error SendError =
328	sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
329
330	if (SendError)
331	return SendError;
332
333	int ChildStatus;
334	if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: `0`) == -`1`) {
335	return make_error<Failure>(
336	Args: "Waiting for the child process to complete failed: " +
337	Twine (strerror(errno)));
338	}
339
340	if (WIFEXITED(ChildStatus)) {
341	int ChildExitCode = WEXITSTATUS(ChildStatus);
342	if (ChildExitCode == `0`) {
343	// The child exited succesfully, read counter values and return
344	// success.
345	auto CounterValueOrErr = Counter->readOrError();
346	if (!CounterValueOrErr)
347	return CounterValueOrErr.takeError();
348	CounterValues = std::move(*CounterValueOrErr);
349
350	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
351	if (!ValidationValuesOrErr)
352	return ValidationValuesOrErr.takeError();
353
354	ArrayRef RealValidationValues = *ValidationValuesOrErr;
355	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
356	ValidationCounterValues [I] = RealValidationValues [I];
357
358	return Error::success();
359	}
360	// The child exited, but not successfully.
361	return make_error<Failure>(
362	Args: "Child benchmarking process exited with non-zero exit code: " +
363	childProcessExitCodeToString(ExitCode: ChildExitCode));
364	}
365
366	// An error was encountered running the snippet, process it
367	siginfo_t ChildSignalInfo;
368	if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -`1`) {
369	return make_error<Failure>(Args: "Getting signal info from the child failed: " +
370	Twine (strerror(errno)));
371	}
372
373	// Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
374	// handlers to run, and calling SIGTERM would mean that ptrace will force
375	// it to block in the signal-delivery-stop for the SIGSEGV/other signals,
376	// and upon exit.
377	if (kill(pid: ChildPID, SIGKILL) == -`1`)
378	return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
379	Twine (strerror(errno)));
380
381	// Wait for the process to exit so that there are no zombie processes left
382	// around.
383	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`)
384	return make_error<Failure>(Args: "Failed to wait for process to die: " +
385	Twine (strerror(errno)));
386
387	if (ChildSignalInfo.si_signo == SIGSEGV)
388	return make_error<SnippetSegmentationFault>(
389	Args: reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
390
391	return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
392	}
393
394	static void setCPUAffinityIfRequested(int CPUToUse) {
395	// Special case this function for x86_64 for now as certain more esoteric
396	// platforms have different definitions for some of the libc functions that
397	// cause buildtime failures. Additionally, the subprocess executor mode (the
398	// sole mode where this is supported) currently only supports x86_64.
399
400	// Also check that we have the SYS_getcpu macro defined, meaning the syscall
401	// actually exists within the build environment. We manually use the syscall
402	// rather than the libc wrapper given the wrapper for getcpu is only available
403	// in glibc 2.29 and later.
404	#if defined(__x86_64__) && defined(SYS_getcpu)
405	// Set the CPU affinity for the child process, so that we ensure that if
406	// the user specified a CPU the process should run on, the benchmarking
407	// process is running on that CPU.
408	cpu_set_t CPUMask;
409	CPU_ZERO(&CPUMask);
410	CPU_SET(CPUToUse, &CPUMask);
411	// TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
412	// are available.
413	int SetAffinityReturn = sched_setaffinity(pid: `0`, cpusetsize: sizeof(CPUMask), cpuset: &CPUMask);
414	if (SetAffinityReturn == -`1`) {
415	exit(status: ChildProcessExitCodeE::SetCPUAffinityFailed);
416	}
417
418	// Check (if assertions are enabled) that we are actually running on the
419	// CPU that was specified by the user.
420	[[maybe_unused]] unsigned int CurrentCPU;
421	assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == `0` &&
422	"Expected getcpu call to succeed.");
423	assert(static_cast<int>(CurrentCPU) == CPUToUse &&
424	"Expected current CPU to equal the CPU requested by the user");
425	#else
426	exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
427	#endif // defined(__x86_64__) && defined(SYS_getcpu)
428	}
429
430	Error createSubProcessAndRunBenchmark(
431	StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
432	ArrayRef<const char *> ValidationCounters,
433	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
434	int PipeFiles[`2`];
435	int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: `0`, fds: PipeFiles);
436	if (PipeSuccessOrErr != `0`) {
437	return make_error<Failure>(
438	Args: "Failed to create a pipe for interprocess communication between "
439	"llvm-exegesis and the benchmarking subprocess: " +
440	Twine (strerror(errno)));
441	}
442
443	SubprocessMemory SPMemory;
444	Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
445	if (MemoryInitError)
446	return MemoryInitError;
447
448	Error AddMemDefError =
449	SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
450	if (AddMemDefError)
451	return AddMemDefError;
452
453	long ParentTID = SubprocessMemory::getCurrentTID();
454	pid_t ParentOrChildPID = fork();
455
456	if (ParentOrChildPID == -`1`) {
457	return make_error<Failure>(Args: "Failed to create child process: " +
458	Twine (strerror(errno)));
459	}
460
461	if (ParentOrChildPID == `0`) {
462	if (BenchmarkProcessCPU.has_value()) {
463	setCPUAffinityIfRequested(*BenchmarkProcessCPU);
464	}
465
466	// We are in the child process, close the write end of the pipe.
467	close(fd: PipeFiles[`1`]);
468	// Unregister handlers, signal handling is now handled through ptrace in
469	// the host process.
470	sys::unregisterHandlers();
471	runChildSubprocess(Pipe: PipeFiles[`0`], Key, ParentTID);
472	// The child process terminates in the above function, so we should never
473	// get to this point.
474	llvm_unreachable("Child process didn't exit when expected.");
475	}
476
477	// Close the read end of the pipe as we only need to write to the subprocess
478	// from the parent process.
479	close(fd: PipeFiles[`0`]);
480	return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[`1`], CounterName,
481	CounterValues, ValidationCounters,
482	ValidationCounterValues);
483	}
484
485	void disableCoreDumps() const {
486	struct rlimit rlim;
487
488	rlim.rlim_cur = `0`;
489	setrlimit(RLIMIT_CORE, rlimits: &rlim);
490	}
491
492	[[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
493	long ParentTID) const {
494	// Disable core dumps in the child process as otherwise everytime we
495	// encounter an execution failure like a segmentation fault, we will create
496	// a core dump. We report the information directly rather than require the
497	// user inspect a core dump.
498	disableCoreDumps();
499
500	// The following occurs within the benchmarking subprocess.
501	pid_t ParentPID = getppid();
502
503	Expected<int> CounterFileDescriptorOrError =
504	getFileDescriptorFromSocket(SocketFD: Pipe);
505
506	if (!CounterFileDescriptorOrError)
507	exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
508
509	int CounterFileDescriptor = *CounterFileDescriptorOrError;
510
511	// Glibc versions greater than 2.35 automatically call rseq during
512	// initialization. Unmapping the region that glibc sets up for this causes
513	// segfaults in the program. Unregister the rseq region so that we can safely
514	// unmap it later
515	#ifdef GLIBC_INITS_RSEQ
516	unsigned int RseqStructSize = __rseq_size;
517
518	// Glibc v2.40 (the change is also expected to be backported to v2.35)
519	// changes the definition of __rseq_size to be the usable area of the struct
520	// rather than the actual size of the struct. v2.35 uses only 20 bytes of
521	// the 32 byte struct. For now, it should be safe to assume that if the
522	// usable size is less than 32, the actual size of the struct will be 32
523	// bytes given alignment requirements.
524	if (__rseq_size < `32`)
525	RseqStructSize = `32`;
526
527	long RseqDisableOutput = syscall(
528	SYS_rseq,
529	reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
530	RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
531	if (RseqDisableOutput != `0`)
532	exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
533	#endif // GLIBC_INITS_RSEQ
534
535	// The frontend that generates the memory annotation structures should
536	// validate that the address to map the snippet in at is a multiple of
537	// the page size. Assert that this is true here.
538	assert(Key.SnippetAddress % getpagesize() == `0` &&
539	"The snippet address needs to be aligned to a page boundary.");
540
541	size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
542	void *MapAddress = NULL;
543	int MapFlags = MAP_PRIVATE \| MAP_ANONYMOUS;
544
545	if (Key.SnippetAddress != `0`) {
546	MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
547	MapFlags \|= MAP_FIXED_NOREPLACE;
548	}
549
550	char *FunctionDataCopy =
551	(char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ \| PROT_WRITE,
552	flags: MapFlags, fd: `0`, offset: `0`);
553	if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -`1`)
554	exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
555
556	memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
557	n: this->Function.FunctionBytes.size());
558	mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ \| PROT_EXEC);
559
560	Expected<int> AuxMemFDOrError =
561	SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
562	MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
563	if (!AuxMemFDOrError)
564	exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
565
566	((void ()(size_t, int*))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
567	*AuxMemFDOrError);
568
569	exit(status: `0`);
570	}
571
572	Expected<SmallVector<int64_t, `4`>> runWithCounter(
573	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
574	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
575	SmallVector<int64_t, `4`> Value(`1`, `0`);
576	Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
577	CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
578
579	if (PossibleBenchmarkError)
580	return std::move(PossibleBenchmarkError);
581
582	return Value;
583	}
584
585	const LLVMState &State;
586	const ExecutableFunction Function;
587	const BenchmarkKey &Key;
588	const std::optional<int> BenchmarkProcessCPU;
589	};
590	#endif // __linux__
591	} // namespace
592
593	Expected<SmallString<`0`>> BenchmarkRunner::assembleSnippet(
594	const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
595	unsigned MinInstructions, unsigned LoopBodySize,
596	bool GenerateMemoryInstructions) const {
597	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
598	SmallString<`0`> Buffer;
599	raw_svector_ostream OS(Buffer);
600	if (Error E = assembleToStream(
601	ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
602	Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
603	CleanupMemory: GenerateMemoryInstructions),
604	AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
605	return std::move(E);
606	}
607	return Buffer;
608	}
609
610	Expected<BenchmarkRunner::RunnableConfiguration>
611	BenchmarkRunner::getRunnableConfiguration(
612	const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
613	const SnippetRepetitor &Repetitor) const {
614	RunnableConfiguration RC;
615
616	Benchmark &BenchmarkResult = RC.BenchmarkResult;
617	BenchmarkResult.Mode = Mode;
618	BenchmarkResult.CpuName =
619	std::string (State.getTargetMachine().getTargetCPU());
620	BenchmarkResult.LLVMTriple =
621	State.getTargetMachine().getTargetTriple().normalize();
622	BenchmarkResult.MinInstructions = MinInstructions;
623	BenchmarkResult.Info = BC.Info;
624
625	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
626
627	bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
628
629	BenchmarkResult.Key = BC.Key;
630
631	// Assemble at least kMinInstructionsForSnippet instructions by repeating
632	// the snippet for debug/analysis. This is so that the user clearly
633	// understands that the inside instructions are repeated.
634	if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
635	const int MinInstructionsForSnippet = `4` * Instructions.size();
636	const int LoopBodySizeForSnippet = `2` * Instructions.size();
637	auto Snippet =
638	assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
639	LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
640	if (Error E = Snippet.takeError())
641	return std::move(E);
642
643	if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
644	Bytes&: BenchmarkResult.AssembledSnippet))
645	return std::move(Err);
646	}
647
648	// Assemble enough repetitions of the snippet so we have at least
649	// MinInstructions instructions.
650	if (BenchmarkPhaseSelector >
651	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
652	auto Snippet =
653	assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
654	LoopBodySize, GenerateMemoryInstructions);
655	if (Error E = Snippet.takeError())
656	return std::move(E);
657	RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
658	}
659
660	return std::move(RC);
661	}
662
663	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
664	BenchmarkRunner::createFunctionExecutor(
665	object::OwningBinary<object::ObjectFile> ObjectFile,
666	const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
667	switch (ExecutionMode) {
668	case ExecutionModeE::InProcess: {
669	if (BenchmarkProcessCPU.has_value())
670	return make_error<Failure>(Args: "The inprocess execution mode does not "
671	"support benchmark core pinning.");
672
673	auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
674	State, Obj: std::move(ObjectFile), Scratch: Scratch.get(), BenchmarkProcessCPU);
675	if (!InProcessExecutorOrErr)
676	return InProcessExecutorOrErr.takeError();
677
678	return std::move(*InProcessExecutorOrErr);
679	}
680	case ExecutionModeE::SubProcess: {
681	#ifdef __linux__
682	auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
683	State, Obj: std::move(ObjectFile), Key, BenchmarkProcessCPU);
684	if (!SubProcessExecutorOrErr)
685	return SubProcessExecutorOrErr.takeError();
686
687	return std::move(*SubProcessExecutorOrErr);
688	#else
689	return make_error<Failure>(
690	"The subprocess execution mode is only supported on Linux");
691	#endif
692	}
693	}
694	llvm_unreachable("ExecutionMode is outside expected range");
695	}
696
697	std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
698	RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
699	std::optional<int> BenchmarkProcessCPU) const {
700	Benchmark &BenchmarkResult = RC.BenchmarkResult;
701	object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
702
703	if (DumpFile && BenchmarkPhaseSelector >
704	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
705	auto ObjectFilePath =
706	writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
707	if (Error E = ObjectFilePath.takeError()) {
708	return {std::move(E), std::move(BenchmarkResult)};
709	}
710	outs() << "Check generated assembly with: /usr/bin/objdump -d "
711	<< *ObjectFilePath << "\n";
712	}
713
714	if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
715	BenchmarkResult.Error = "actual measurements skipped.";
716	return {Error::success(), std::move(BenchmarkResult)};
717	}
718
719	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
720	createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key,
721	BenchmarkProcessCPU);
722	if (!Executor)
723	return {Executor.takeError(), std::move(BenchmarkResult)};
724	auto NewMeasurements = runMeasurements(Executor: **Executor);
725
726	if (Error E = NewMeasurements.takeError()) {
727	return {std::move(E), std::move(BenchmarkResult)};
728	}
729	assert(BenchmarkResult.MinInstructions > `0` && "invalid MinInstructions");
730	for (BenchmarkMeasure &BM : *NewMeasurements) {
731	// Scale the measurements by the number of instructions.
732	BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
733	// Scale the measurements by the number of times the entire snippet is
734	// repeated.
735	BM.PerSnippetValue /=
736	std::ceil(x: BenchmarkResult.MinInstructions /
737	static_cast<double>(BenchmarkResult.Key.Instructions.size()));
738	}
739	BenchmarkResult.Measurements = std::move(*NewMeasurements);
740
741	return {Error::success(), std::move(BenchmarkResult)};
742	}
743
744	Expected<std::string>
745	BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
746	int ResultFD = `0`;
747	SmallString<`256`> ResultPath = FileName;
748	if (Error E = errorCodeToError(
749	EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
750	ResultFD, ResultPath)
751	: sys::fs::openFileForReadWrite(
752	Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
753	Flags: sys::fs::OF_None)))
754	return std::move(E);
755	raw_fd_ostream OFS(ResultFD, true /ShouldClose/);
756	OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
757	OFS.flush();
758	return std::string(ResultPath);
759	}
760
761	static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
762	const ValidationEvent RHS) {
763	return static_cast<int>(LHS.first) < static_cast<int>(RHS);
764	}
765
766	Error BenchmarkRunner::getValidationCountersToRun(
767	SmallVector<const char > &ValCountersToRun) const* {
768	const PfmCountersInfo &PCI = State.getPfmCounters();
769	ValCountersToRun.reserve(N: ValidationCounters.size());
770
771	ValCountersToRun.reserve(N: ValidationCounters.size());
772	ArrayRef TargetValidationEvents(PCI.ValidationEvents,
773	PCI.NumValidationEvents);
774	for (const ValidationEvent RequestedValEvent : ValidationCounters) {
775	auto ValCounterIt =
776	lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
777	if (ValCounterIt == TargetValidationEvents.end() \|\|
778	ValCounterIt->first != RequestedValEvent)
779	return make_error<Failure>(Args: "Cannot create validation counter");
780
781	assert(ValCounterIt->first == RequestedValEvent &&
782	"The array of validation events from the target should be sorted");
783	ValCountersToRun.push_back(Elt: ValCounterIt->second);
784	}
785
786	return Error::success();
787	}
788
789	BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
790
791	} // namespace exegesis
792	} // namespace llvm
793

Browse the source code of llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp