BenchmarkRunner.cpp source code [llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp]

1	//===-- BenchmarkRunner.cpp -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "BenchmarkRunner.h"
10	#include "Assembler.h"
11	#include "DisassemblerHelper.h"
12	#include "Error.h"
13	#include "MCInstrDescView.h"
14	#include "MmapUtils.h"
15	#include "PerfHelper.h"
16	#include "SubprocessMemory.h"
17	#include "Target.h"
18	#include "llvm/ADT/ScopeExit.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/ADT/StringRef.h"
21	#include "llvm/ADT/Twine.h"
22	#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
23	#include "llvm/Support/CrashRecoveryContext.h"
24	#include "llvm/Support/Debug.h"
25	#include "llvm/Support/Error.h"
26	#include "llvm/Support/FileSystem.h"
27	#include "llvm/Support/MemoryBuffer.h"
28	#include "llvm/Support/Program.h"
29	#include "llvm/Support/Signals.h"
30	#include <cmath>
31	#include <memory>
32	#include <string>
33
34	#ifdef __linux__
35	#ifdef HAVE_LIBPFM
36	#include <perfmon/perf_event.h>
37	#endif
38	#include <sys/mman.h>
39	#include <sys/ptrace.h>
40	#include <sys/resource.h>
41	#include <sys/socket.h>
42	#include <sys/syscall.h>
43	#include <sys/wait.h>
44	#include <unistd.h>
45
46	#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
47	#include <sys/rseq.h>
48	#if defined(RSEQ_SIG) && defined(SYS_rseq)
49	#define GLIBC_INITS_RSEQ
50	#endif
51	#endif
52	#endif // __linux__
53
54	namespace llvm {
55	namespace exegesis {
56
57	BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
58	BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
59	ExecutionModeE ExecutionMode,
60	ArrayRef<ValidationEvent> ValCounters)
61	: State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
62	ExecutionMode(ExecutionMode), ValidationCounters (ValCounters),
63	Scratch(std::make_unique<ScratchSpace>()) {}
64
65	BenchmarkRunner::~BenchmarkRunner() = default;
66
67	void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
68	const SmallVectorImpl<int64_t> &NewValues,
69	SmallVectorImpl<int64_t> *Result) {
70	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
71	if (NumValues > Result->size())
72	Result->resize(N: NumValues, NV: `0`);
73	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
74	(*Result)[I] += NewValues [I];
75	}
76
77	Expected<SmallVector<int64_t, `4`>>
78	BenchmarkRunner::FunctionExecutor::runAndSample(
79	const char Counters, ArrayRef<const* char *> ValidationCounters,
80	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
81	// We sum counts when there are several counters for a single ProcRes
82	// (e.g. P23 on SandyBridge).
83	SmallVector<int64_t, `4`> CounterValues;
84	SmallVector<StringRef, `2`> CounterNames;
85	StringRef (Counters).split(A&: CounterNames, Separator: `'+'`);
86	for (auto &CounterName : CounterNames) {
87	CounterName = CounterName.trim();
88	Expected<SmallVector<int64_t, `4`>> ValueOrError = runWithCounter(
89	CounterName, ValidationCounters, ValidationCounterValues);
90	if (!ValueOrError)
91	return ValueOrError.takeError();
92	accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
93	}
94	return CounterValues;
95	}
96
97	namespace {
98	class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
99	public:
100	static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
101	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
102	BenchmarkRunner::ScratchSpace *Scratch,
103	std::optional<int> BenchmarkProcessCPU) {
104	Expected<ExecutableFunction> EF =
105	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
106
107	if (!EF)
108	return EF.takeError();
109
110	return std::unique_ptr<InProcessFunctionExecutorImpl>(
111	new InProcessFunctionExecutorImpl (State, std::move(*EF), Scratch));
112	}
113
114	private:
115	InProcessFunctionExecutorImpl(const LLVMState &State,
116	ExecutableFunction Function,
117	BenchmarkRunner::ScratchSpace *Scratch)
118	: State(State), Function (std::move(Function)), Scratch(Scratch) {}
119
120	static void accumulateCounterValues(const SmallVector<int64_t, `4`> &NewValues,
121	SmallVector<int64_t, `4`> *Result) {
122	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
123	if (NumValues > Result->size())
124	Result->resize(N: NumValues, NV: `0`);
125	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
126	(*Result)[I] += NewValues [I];
127	}
128
129	Expected<SmallVector<int64_t, `4`>> runWithCounter(
130	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
131	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
132	const ExegesisTarget &ET = State.getExegesisTarget();
133	char *const ScratchPtr = Scratch->ptr();
134	auto CounterOrError =
135	ET.createCounter(CounterName, State, ValidationCounters);
136
137	if (!CounterOrError)
138	return CounterOrError.takeError();
139
140	pfm::CounterGroup *Counter = CounterOrError.get().get();
141	Scratch->clear();
142	{
143	auto PS = ET.withSavedState();
144	CrashRecoveryContext CRC;
145	CrashRecoveryContext::Enable();
146	const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
147	Counter->start();
148	this->Function (ScratchPtr);
149	Counter->stop();
150	});
151	CrashRecoveryContext::Disable();
152	PS.reset();
153	if (Crashed) {
154	#ifdef LLVM_ON_UNIX
155	// See "Exit Status for Commands":
156	// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
157	constexpr int kSigOffset = `128`;
158	return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
159	#else
160	// The exit code of the process on windows is not meaningful as a
161	// signal, so simply pass in -1 as the signal into the error.
162	return make_error<SnippetSignal>(-`1`);
163	#endif // LLVM_ON_UNIX
164	}
165	}
166
167	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
168	if (!ValidationValuesOrErr)
169	return ValidationValuesOrErr.takeError();
170
171	ArrayRef RealValidationValues = *ValidationValuesOrErr;
172	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
173	ValidationCounterValues [I] = RealValidationValues [I];
174
175	return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
176	}
177
178	const LLVMState &State;
179	const ExecutableFunction Function;
180	BenchmarkRunner::ScratchSpace *const Scratch;
181	};
182
183	#ifdef __linux__
184	// The following class implements a function executor that executes the
185	// benchmark code within a subprocess rather than within the main llvm-exegesis
186	// process. This allows for much more control over the execution context of the
187	// snippet, particularly with regard to memory. This class performs all the
188	// necessary functions to create the subprocess, execute the snippet in the
189	// subprocess, and report results/handle errors.
190	class SubProcessFunctionExecutorImpl
191	: public BenchmarkRunner::FunctionExecutor {
192	public:
193	static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
194	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
195	const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
196	Expected<ExecutableFunction> EF =
197	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
198	if (!EF)
199	return EF.takeError();
200
201	return std::unique_ptr<SubProcessFunctionExecutorImpl>(
202	new SubProcessFunctionExecutorImpl (State, std::move(*EF), Key,
203	BenchmarkProcessCPU));
204	}
205
206	private:
207	SubProcessFunctionExecutorImpl(const LLVMState &State,
208	ExecutableFunction Function,
209	const BenchmarkKey &Key,
210	std::optional<int> BenchmarkCPU)
211	: State(State), Function (std::move(Function)), Key(Key),
212	BenchmarkProcessCPU (BenchmarkCPU) {}
213
214	enum ChildProcessExitCodeE {
215	CounterFDReadFailed = `1`,
216	RSeqDisableFailed,
217	FunctionDataMappingFailed,
218	AuxiliaryMemorySetupFailed,
219	SetCPUAffinityFailed
220	};
221
222	StringRef childProcessExitCodeToString(int ExitCode) const {
223	switch (ExitCode) {
224	case ChildProcessExitCodeE::CounterFDReadFailed:
225	return "Counter file descriptor read failed";
226	case ChildProcessExitCodeE::RSeqDisableFailed:
227	return "Disabling restartable sequences failed";
228	case ChildProcessExitCodeE::FunctionDataMappingFailed:
229	return "Failed to map memory for assembled snippet";
230	case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
231	return "Failed to setup auxiliary memory";
232	case ChildProcessExitCodeE::SetCPUAffinityFailed:
233	return "Failed to set CPU affinity of the benchmarking process";
234	default:
235	return "Child process returned with unknown exit code";
236	}
237	}
238
239	Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
240	struct msghdr Message = {};
241	char Buffer[CMSG_SPACE(sizeof(FD))];
242	memset(s: Buffer, c: `0`, n: sizeof(Buffer));
243	Message.msg_control = Buffer;
244	Message.msg_controllen = sizeof(Buffer);
245
246	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
247	ControlMessage->cmsg_level = SOL_SOCKET;
248	ControlMessage->cmsg_type = SCM_RIGHTS;
249	ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
250
251	memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
252
253	Message.msg_controllen = CMSG_SPACE(sizeof(FD));
254
255	ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: `0`);
256
257	if (BytesWritten < `0`)
258	return make_error<Failure>(Args: "Failed to write FD to socket: " +
259	Twine (strerror(errno)));
260
261	return Error::success();
262	}
263
264	Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
265	struct msghdr Message = {};
266
267	char ControlBuffer[`256`];
268	Message.msg_control = ControlBuffer;
269	Message.msg_controllen = sizeof(ControlBuffer);
270
271	ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: `0`);
272
273	if (BytesRead < `0`)
274	return make_error<Failure>(Args: "Failed to read FD from socket: " +
275	Twine (strerror(errno)));
276
277	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
278
279	int FD;
280
281	if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
282	return make_error<Failure>(Args: "Failed to get correct number of bytes for "
283	"file descriptor from socket.");
284
285	memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
286
287	return FD;
288	}
289
290	Error
291	runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
292	SmallVectorImpl<int64_t> &CounterValues,
293	ArrayRef<const char *> ValidationCounters,
294	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
295	scope_exit WriteFDClose([WriteFD]() { close(fd: WriteFD); });
296	const ExegesisTarget &ET = State.getExegesisTarget();
297	auto CounterOrError =
298	ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
299
300	if (!CounterOrError)
301	return CounterOrError.takeError();
302
303	pfm::CounterGroup *Counter = CounterOrError.get().get();
304
305	// Make sure to attach to the process (and wait for the sigstop to be
306	// delivered and for the process to continue) before we write to the counter
307	// file descriptor. Attaching to the process before writing to the socket
308	// ensures that the subprocess at most has blocked on the read call. If we
309	// attach afterwards, the subprocess might exit before we get to the attach
310	// call due to effects like scheduler contention, introducing transient
311	// failures.
312	if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != `0`)
313	return make_error<Failure>(Args: "Failed to attach to the child process: " +
314	Twine (strerror(errno)));
315
316	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`) {
317	return make_error<Failure>(
318	Args: "Failed to wait for child process to stop after attaching: " +
319	Twine (strerror(errno)));
320	}
321
322	if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != `0`)
323	return make_error<Failure>(
324	Args: "Failed to continue execution of the child process: " +
325	Twine (strerror(errno)));
326
327	int CounterFileDescriptor = Counter->getFileDescriptor();
328	Error SendError =
329	sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
330
331	if (SendError)
332	return SendError;
333
334	int ChildStatus;
335	if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: `0`) == -`1`) {
336	return make_error<Failure>(
337	Args: "Waiting for the child process to complete failed: " +
338	Twine (strerror(errno)));
339	}
340
341	if (WIFEXITED(ChildStatus)) {
342	int ChildExitCode = WEXITSTATUS(ChildStatus);
343	if (ChildExitCode == `0`) {
344	// The child exited succesfully, read counter values and return
345	// success.
346	auto CounterValueOrErr = Counter->readOrError();
347	if (!CounterValueOrErr)
348	return CounterValueOrErr.takeError();
349	CounterValues = std::move(*CounterValueOrErr);
350
351	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
352	if (!ValidationValuesOrErr)
353	return ValidationValuesOrErr.takeError();
354
355	ArrayRef RealValidationValues = *ValidationValuesOrErr;
356	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
357	ValidationCounterValues [I] = RealValidationValues [I];
358
359	return Error::success();
360	}
361	// The child exited, but not successfully.
362	return make_error<Failure>(
363	Args: "Child benchmarking process exited with non-zero exit code: " +
364	childProcessExitCodeToString(ExitCode: ChildExitCode));
365	}
366
367	// An error was encountered running the snippet, process it
368	siginfo_t ChildSignalInfo;
369	if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -`1`) {
370	return make_error<Failure>(Args: "Getting signal info from the child failed: " +
371	Twine (strerror(errno)));
372	}
373
374	// Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
375	// handlers to run, and calling SIGTERM would mean that ptrace will force
376	// it to block in the signal-delivery-stop for the SIGSEGV/other signals,
377	// and upon exit.
378	if (kill(pid: ChildPID, SIGKILL) == -`1`)
379	return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
380	Twine (strerror(errno)));
381
382	// Wait for the process to exit so that there are no zombie processes left
383	// around.
384	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`)
385	return make_error<Failure>(Args: "Failed to wait for process to die: " +
386	Twine (strerror(errno)));
387
388	if (ChildSignalInfo.si_signo == SIGSEGV)
389	return make_error<SnippetSegmentationFault>(
390	Args: reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
391
392	return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
393	}
394
395	static void setCPUAffinityIfRequested(int CPUToUse) {
396	// Special case this function for x86_64 for now as certain more esoteric
397	// platforms have different definitions for some of the libc functions that
398	// cause buildtime failures. Additionally, the subprocess executor mode (the
399	// sole mode where this is supported) currently only supports x86_64.
400
401	// Also check that we have the SYS_getcpu macro defined, meaning the syscall
402	// actually exists within the build environment. We manually use the syscall
403	// rather than the libc wrapper given the wrapper for getcpu is only available
404	// in glibc 2.29 and later.
405	#if defined(__x86_64__) && defined(SYS_getcpu)
406	// Set the CPU affinity for the child process, so that we ensure that if
407	// the user specified a CPU the process should run on, the benchmarking
408	// process is running on that CPU.
409	cpu_set_t CPUMask;
410	CPU_ZERO(&CPUMask);
411	CPU_SET(CPUToUse, &CPUMask);
412	// TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
413	// are available.
414	int SetAffinityReturn = sched_setaffinity(pid: `0`, cpusetsize: sizeof(CPUMask), cpuset: &CPUMask);
415	if (SetAffinityReturn == -`1`) {
416	exit(status: ChildProcessExitCodeE::SetCPUAffinityFailed);
417	}
418
419	// Check (if assertions are enabled) that we are actually running on the
420	// CPU that was specified by the user.
421	[[maybe_unused]] unsigned int CurrentCPU;
422	assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == `0` &&
423	"Expected getcpu call to succeed.");
424	assert(static_cast<int>(CurrentCPU) == CPUToUse &&
425	"Expected current CPU to equal the CPU requested by the user");
426	#else
427	exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
428	#endif // defined(__x86_64__) && defined(SYS_getcpu)
429	}
430
431	Error createSubProcessAndRunBenchmark(
432	StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
433	ArrayRef<const char *> ValidationCounters,
434	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
435	int PipeFiles[`2`];
436	int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: `0`, fds: PipeFiles);
437	if (PipeSuccessOrErr != `0`) {
438	return make_error<Failure>(
439	Args: "Failed to create a pipe for interprocess communication between "
440	"llvm-exegesis and the benchmarking subprocess: " +
441	Twine (strerror(errno)));
442	}
443
444	SubprocessMemory SPMemory;
445	Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
446	if (MemoryInitError)
447	return MemoryInitError;
448
449	Error AddMemDefError =
450	SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
451	if (AddMemDefError)
452	return AddMemDefError;
453
454	long ParentTID = SubprocessMemory::getCurrentTID();
455	pid_t ParentOrChildPID = fork();
456
457	if (ParentOrChildPID == -`1`) {
458	return make_error<Failure>(Args: "Failed to create child process: " +
459	Twine (strerror(errno)));
460	}
461
462	if (ParentOrChildPID == `0`) {
463	if (BenchmarkProcessCPU.has_value()) {
464	setCPUAffinityIfRequested(*BenchmarkProcessCPU);
465	}
466
467	// We are in the child process, close the write end of the pipe.
468	close(fd: PipeFiles[`1`]);
469	// Unregister handlers, signal handling is now handled through ptrace in
470	// the host process.
471	sys::unregisterHandlers();
472	runChildSubprocess(Pipe: PipeFiles[`0`], Key, ParentTID);
473	// The child process terminates in the above function, so we should never
474	// get to this point.
475	llvm_unreachable("Child process didn't exit when expected.");
476	}
477
478	// Close the read end of the pipe as we only need to write to the subprocess
479	// from the parent process.
480	close(fd: PipeFiles[`0`]);
481	return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[`1`], CounterName,
482	CounterValues, ValidationCounters,
483	ValidationCounterValues);
484	}
485
486	void disableCoreDumps() const {
487	struct rlimit rlim;
488
489	rlim.rlim_cur = `0`;
490	setrlimit(RLIMIT_CORE, rlimits: &rlim);
491	}
492
493	[[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
494	long ParentTID) const {
495	// Disable core dumps in the child process as otherwise everytime we
496	// encounter an execution failure like a segmentation fault, we will create
497	// a core dump. We report the information directly rather than require the
498	// user inspect a core dump.
499	disableCoreDumps();
500
501	// The following occurs within the benchmarking subprocess.
502	pid_t ParentPID = getppid();
503
504	Expected<int> CounterFileDescriptorOrError =
505	getFileDescriptorFromSocket(SocketFD: Pipe);
506
507	if (!CounterFileDescriptorOrError)
508	exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
509
510	int CounterFileDescriptor = *CounterFileDescriptorOrError;
511
512	// Glibc versions greater than 2.35 automatically call rseq during
513	// initialization. Unmapping the region that glibc sets up for this causes
514	// segfaults in the program. Unregister the rseq region so that we can safely
515	// unmap it later
516	#ifdef GLIBC_INITS_RSEQ
517	unsigned int RseqStructSize = __rseq_size;
518
519	// Glibc v2.40 (the change is also expected to be backported to v2.35)
520	// changes the definition of __rseq_size to be the usable area of the struct
521	// rather than the actual size of the struct. v2.35 uses only 20 bytes of
522	// the 32 byte struct. For now, it should be safe to assume that if the
523	// usable size is less than 32, the actual size of the struct will be 32
524	// bytes given alignment requirements.
525	if (__rseq_size < `32`)
526	RseqStructSize = `32`;
527
528	long RseqDisableOutput = syscall(
529	SYS_rseq,
530	reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
531	RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
532	if (RseqDisableOutput != `0`)
533	exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
534	#endif // GLIBC_INITS_RSEQ
535
536	// The frontend that generates the memory annotation structures should
537	// validate that the address to map the snippet in at is a multiple of
538	// the page size. Assert that this is true here.
539	assert(Key.SnippetAddress % getpagesize() == `0` &&
540	"The snippet address needs to be aligned to a page boundary.");
541
542	size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
543	void *MapAddress = NULL;
544	int MapFlags = MAP_PRIVATE \| MAP_ANONYMOUS;
545
546	if (Key.SnippetAddress != `0`) {
547	MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
548	MapFlags \|= MAP_FIXED_NOREPLACE;
549	}
550
551	char *FunctionDataCopy =
552	(char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ \| PROT_WRITE,
553	flags: MapFlags, fd: `0`, offset: `0`);
554	if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -`1`)
555	exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
556
557	memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
558	n: this->Function.FunctionBytes.size());
559	mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ \| PROT_EXEC);
560
561	Expected<int> AuxMemFDOrError =
562	SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
563	MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
564	if (!AuxMemFDOrError)
565	exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
566
567	((void ()(size_t, int*))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
568	*AuxMemFDOrError);
569
570	exit(status: `0`);
571	}
572
573	Expected<SmallVector<int64_t, `4`>> runWithCounter(
574	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
575	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
576	SmallVector<int64_t, `4`> Value(`1`, `0`);
577	Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
578	CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
579
580	if (PossibleBenchmarkError)
581	return std::move(PossibleBenchmarkError);
582
583	return Value;
584	}
585
586	const LLVMState &State;
587	const ExecutableFunction Function;
588	const BenchmarkKey &Key;
589	const std::optional<int> BenchmarkProcessCPU;
590	};
591	#endif // __linux__
592
593	// Structure to hold instruction information for assembly printing
594	struct InstructionInfo {
595	std::string Text;
596	uint64_t Address;
597	std::string HexBytes;
598	};
599
600	#ifndef NDEBUG
601	// Helper function to print generated assembly snippets
602	void printInstructions(const std::vector<InstructionInfo> &Instructions,
603	int InitialLinesCount, int LastLinesCount) {
604	int N = Instructions.size();
605	dbgs() << "Generated assembly snippet:\n```\n";
606
607	// Print initial lines
608	for (int i = `0`; i < InitialLinesCount; ++i)
609	dbgs() << format_hex_no_prefix(Instructions[i].Address, `0`) << ":\t"
610	<< Instructions[i].HexBytes << Instructions[i].Text << `'\n'`;
611
612	// Show truncation message if needed
613	int SkippedInstructions = N - InitialLinesCount - LastLinesCount;
614	if (SkippedInstructions > `0`)
615	dbgs() << "...\t(" << SkippedInstructions << " more instructions)\n";
616
617	// Print last min(PreviewLast, N - PreviewFirst) lines
618	int LastLinesToPrint = std::min(
619	LastLinesCount, N > InitialLinesCount ? N - InitialLinesCount : `0`);
620	for (int i = N - LastLinesToPrint; i < N; ++i)
621	dbgs() << format_hex_no_prefix(Instructions[i].Address, `0`) << ":\t"
622	<< Instructions[i].HexBytes << Instructions[i].Text << `'\n'`;
623	dbgs() << "```\n";
624	}
625	#endif // NDEBUG
626
627	// Function to extract and print assembly from snippet
628	Error printAssembledSnippet(const LLVMState &State,
629	const SmallString<`0`> &Snippet) {
630	// Extract the actual function bytes from the object file
631	std::vector<uint8_t> FunctionBytes;
632	if (auto Err = getBenchmarkFunctionBytes(InputData: Snippet, Bytes&: FunctionBytes))
633	return make_error<Failure>(Args: "Failed to extract function bytes: " +
634	toString(E: std::move(Err)));
635
636	// Decode all instructions first
637	DisassemblerHelper DisHelper(State);
638	uint64_t Address = `0`;
639	std::vector<InstructionInfo> Instructions;
640	const size_t FunctionBytesSize = FunctionBytes.size();
641
642	while (Address < FunctionBytesSize) {
643	MCInst Inst;
644	uint64_t Size;
645	ArrayRef<uint8_t> Bytes(FunctionBytes.data() + Address,
646	FunctionBytesSize - Address);
647
648	if (!DisHelper.decodeInst(MI&: Inst, MISize&: Size, Bytes)) {
649	Instructions.push_back(x: {.Text: "<decode error>", .Address: Address, .HexBytes: ""});
650	break;
651	}
652
653	// Format instruction text
654	std::string InstStr;
655	raw_string_ostream OS(InstStr);
656	DisHelper.printInst(MI: &Inst, OS);
657
658	// Create hex string for this instruction (big-endian order)
659	std::string HexStr;
660	raw_string_ostream HexOS(HexStr);
661	for (int i = Size - `1`; i >= `0`; --i)
662	HexOS << format_hex_no_prefix(N: Bytes [i], Width: `2`);
663
664	Instructions.push_back(x: {.Text: OS.str(), .Address: Address, .HexBytes: HexOS.str()});
665	Address += Size;
666	}
667
668	#undef DEBUG_TYPE
669	#define DEBUG_TYPE "preview-gen-assembly"
670	LLVM_DEBUG(printInstructions(Instructions, `10`, `3`));
671	#undef DEBUG_TYPE
672	#define DEBUG_TYPE "print-gen-assembly"
673	LLVM_DEBUG(printInstructions(Instructions, Instructions.size(), `0`));
674	#undef DEBUG_TYPE
675	return Error::success();
676	}
677	} // namespace
678
679	Expected<SmallString<`0`>> BenchmarkRunner::assembleSnippet(
680	const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
681	unsigned MinInstructions, unsigned LoopBodySize,
682	bool GenerateMemoryInstructions) const {
683	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
684	SmallString<`0`> Buffer;
685	raw_svector_ostream OS(Buffer);
686	if (Error E = assembleToStream(
687	ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
688	Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
689	CleanupMemory: GenerateMemoryInstructions),
690	AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
691	return std::move(E);
692	}
693	return Buffer;
694	}
695
696	Expected<BenchmarkRunner::RunnableConfiguration>
697	BenchmarkRunner::getRunnableConfiguration(
698	const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
699	const SnippetRepetitor &Repetitor) const {
700	RunnableConfiguration RC;
701
702	Benchmark &BenchmarkResult = RC.BenchmarkResult;
703	BenchmarkResult.Mode = Mode;
704	BenchmarkResult.CpuName =
705	std::string (State.getTargetMachine().getTargetCPU());
706	BenchmarkResult.LLVMTriple =
707	State.getTargetMachine().getTargetTriple().normalize();
708	BenchmarkResult.MinInstructions = MinInstructions;
709	BenchmarkResult.Info = BC.Info;
710
711	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
712
713	bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
714
715	BenchmarkResult.Key = BC.Key;
716
717	// Assemble at least kMinInstructionsForSnippet instructions by repeating
718	// the snippet for debug/analysis. This is so that the user clearly
719	// understands that the inside instructions are repeated.
720	if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
721	const int MinInstructionsForSnippet = `4` * Instructions.size();
722	const int LoopBodySizeForSnippet = `2` * Instructions.size();
723	auto Snippet =
724	assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
725	LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
726	if (Error E = Snippet.takeError())
727	return std::move(E);
728
729	if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
730	Bytes&: BenchmarkResult.AssembledSnippet))
731	return std::move(Err);
732	}
733
734	// Assemble enough repetitions of the snippet so we have at least
735	// MinInstructions instructions.
736	if (BenchmarkPhaseSelector >
737	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
738	auto Snippet =
739	assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
740	LoopBodySize, GenerateMemoryInstructions);
741	if (Error E = Snippet.takeError())
742	return std::move(E);
743	RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
744
745	// Print the assembled snippet by disassembling the binary data
746	if (Error E = printAssembledSnippet(State, Snippet: *Snippet))
747	return std::move(E);
748	}
749
750	return std::move(RC);
751	}
752
753	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
754	BenchmarkRunner::createFunctionExecutor(
755	object::OwningBinary<object::ObjectFile> ObjectFile,
756	const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
757	switch (ExecutionMode) {
758	case ExecutionModeE::InProcess: {
759	if (BenchmarkProcessCPU.has_value())
760	return make_error<Failure>(Args: "The inprocess execution mode does not "
761	"support benchmark core pinning.");
762
763	auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
764	State, Obj: std::move(ObjectFile), Scratch: Scratch.get(), BenchmarkProcessCPU);
765	if (!InProcessExecutorOrErr)
766	return InProcessExecutorOrErr.takeError();
767
768	return std::move(*InProcessExecutorOrErr);
769	}
770	case ExecutionModeE::SubProcess: {
771	#ifdef __linux__
772	auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
773	State, Obj: std::move(ObjectFile), Key, BenchmarkProcessCPU);
774	if (!SubProcessExecutorOrErr)
775	return SubProcessExecutorOrErr.takeError();
776
777	return std::move(*SubProcessExecutorOrErr);
778	#else
779	return make_error<Failure>(
780	"The subprocess execution mode is only supported on Linux");
781	#endif
782	}
783	}
784	llvm_unreachable("ExecutionMode is outside expected range");
785	}
786
787	std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
788	RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
789	std::optional<int> BenchmarkProcessCPU) const {
790	Benchmark &BenchmarkResult = RC.BenchmarkResult;
791	object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
792
793	if (DumpFile && BenchmarkPhaseSelector >
794	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
795	auto ObjectFilePath =
796	writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
797	if (Error E = ObjectFilePath.takeError()) {
798	return {std::move(E), std::move(BenchmarkResult)};
799	}
800	outs() << "Check generated assembly with: /usr/bin/objdump -d "
801	<< *ObjectFilePath << "\n";
802	}
803
804	if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
805	BenchmarkResult.Error = "actual measurements skipped.";
806	return {Error::success(), std::move(BenchmarkResult)};
807	}
808
809	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
810	createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key,
811	BenchmarkProcessCPU);
812	if (!Executor)
813	return {Executor.takeError(), std::move(BenchmarkResult)};
814	auto NewMeasurements = runMeasurements(Executor: **Executor);
815
816	if (Error E = NewMeasurements.takeError()) {
817	return {std::move(E), std::move(BenchmarkResult)};
818	}
819	assert(BenchmarkResult.MinInstructions > `0` && "invalid MinInstructions");
820	for (BenchmarkMeasure &BM : *NewMeasurements) {
821	// Scale the measurements by the number of instructions.
822	BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
823	// Scale the measurements by the number of times the entire snippet is
824	// repeated.
825	BM.PerSnippetValue /=
826	std::ceil(x: BenchmarkResult.MinInstructions /
827	static_cast<double>(BenchmarkResult.Key.Instructions.size()));
828	}
829	BenchmarkResult.Measurements = std::move(*NewMeasurements);
830
831	return {Error::success(), std::move(BenchmarkResult)};
832	}
833
834	Expected<std::string>
835	BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
836	int ResultFD = `0`;
837	SmallString<`256`> ResultPath = FileName;
838	if (Error E = errorCodeToError(
839	EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
840	ResultFD, ResultPath)
841	: sys::fs::openFileForReadWrite(
842	Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
843	Flags: sys::fs::OF_None)))
844	return std::move(E);
845	raw_fd_ostream OFS(ResultFD, true /ShouldClose/);
846	OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
847	OFS.flush();
848	return std::string(ResultPath);
849	}
850
851	static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
852	const ValidationEvent RHS) {
853	return static_cast<int>(LHS.first) < static_cast<int>(RHS);
854	}
855
856	Error BenchmarkRunner::getValidationCountersToRun(
857	SmallVector<const char > &ValCountersToRun) const* {
858	const PfmCountersInfo &PCI = State.getPfmCounters();
859	ValCountersToRun.reserve(N: ValidationCounters.size());
860
861	ValCountersToRun.reserve(N: ValidationCounters.size());
862	ArrayRef TargetValidationEvents(PCI.ValidationEvents,
863	PCI.NumValidationEvents);
864	for (const ValidationEvent RequestedValEvent : ValidationCounters) {
865	auto ValCounterIt =
866	lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
867	if (ValCounterIt == TargetValidationEvents.end() \|\|
868	ValCounterIt->first != RequestedValEvent)
869	return make_error<Failure>(Args: "Cannot create validation counter");
870
871	assert(ValCounterIt->first == RequestedValEvent &&
872	"The array of validation events from the target should be sorted");
873	ValCountersToRun.push_back(Elt: ValCounterIt->second);
874	}
875
876	return Error::success();
877	}
878
879	BenchmarkRunner::FunctionExecutor::~FunctionExecutor() = default;
880
881	} // namespace exegesis
882	} // namespace llvm
883

Browse the source code of llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp