BenchmarkRunner.cpp source code [llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp]

1	//===-- BenchmarkRunner.cpp -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include <cmath>
10	#include <memory>
11	#include <string>
12
13	#include "Assembler.h"
14	#include "BenchmarkRunner.h"
15	#include "Error.h"
16	#include "MCInstrDescView.h"
17	#include "MmapUtils.h"
18	#include "PerfHelper.h"
19	#include "SubprocessMemory.h"
20	#include "Target.h"
21	#include "llvm/ADT/ScopeExit.h"
22	#include "llvm/ADT/StringExtras.h"
23	#include "llvm/ADT/StringRef.h"
24	#include "llvm/ADT/Twine.h"
25	#include "llvm/Support/CrashRecoveryContext.h"
26	#include "llvm/Support/Error.h"
27	#include "llvm/Support/FileSystem.h"
28	#include "llvm/Support/MemoryBuffer.h"
29	#include "llvm/Support/Program.h"
30	#include "llvm/Support/Signals.h"
31	#include "llvm/Support/SystemZ/zOSSupport.h"
32
33	#ifdef __linux__
34	#ifdef HAVE_LIBPFM
35	#include <perfmon/perf_event.h>
36	#endif
37	#include <sys/mman.h>
38	#include <sys/ptrace.h>
39	#include <sys/resource.h>
40	#include <sys/socket.h>
41	#include <sys/syscall.h>
42	#include <sys/wait.h>
43	#include <unistd.h>
44
45	#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46	#include <sys/rseq.h>
47	#if defined(RSEQ_SIG) && defined(SYS_rseq)
48	#define GLIBC_INITS_RSEQ
49	#endif
50	#endif
51	#endif // __linux__
52
53	namespace llvm {
54	namespace exegesis {
55
56	BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57	BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58	ExecutionModeE ExecutionMode,
59	ArrayRef<ValidationEvent> ValCounters)
60	: State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61	ExecutionMode(ExecutionMode), ValidationCounters (ValCounters),
62	Scratch(std::make_unique<ScratchSpace>()) {}
63
64	BenchmarkRunner::~BenchmarkRunner() = default;
65
66	void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67	const SmallVectorImpl<int64_t> &NewValues,
68	SmallVectorImpl<int64_t> *Result) {
69	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
70	if (NumValues > Result->size())
71	Result->resize(N: NumValues, NV: `0`);
72	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
73	(*Result)[I] += NewValues [I];
74	}
75
76	Expected<SmallVector<int64_t, `4`>>
77	BenchmarkRunner::FunctionExecutor::runAndSample(
78	const char Counters, ArrayRef<const* char *> ValidationCounters,
79	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80	// We sum counts when there are several counters for a single ProcRes
81	// (e.g. P23 on SandyBridge).
82	SmallVector<int64_t, `4`> CounterValues;
83	SmallVector<StringRef, `2`> CounterNames;
84	StringRef (Counters).split(A&: CounterNames, Separator: `'+'`);
85	for (auto &CounterName : CounterNames) {
86	CounterName = CounterName.trim();
87	Expected<SmallVector<int64_t, `4`>> ValueOrError = runWithCounter(
88	CounterName, ValidationCounters, ValidationCounterValues);
89	if (!ValueOrError)
90	return ValueOrError.takeError();
91	accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
92	}
93	return CounterValues;
94	}
95
96	namespace {
97	class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98	public:
99	static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101	BenchmarkRunner::ScratchSpace *Scratch) {
102	Expected<ExecutableFunction> EF =
103	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
104
105	if (!EF)
106	return EF.takeError();
107
108	return std::unique_ptr<InProcessFunctionExecutorImpl>(
109	new InProcessFunctionExecutorImpl (State, std::move(*EF), Scratch));
110	}
111
112	private:
113	InProcessFunctionExecutorImpl(const LLVMState &State,
114	ExecutableFunction Function,
115	BenchmarkRunner::ScratchSpace *Scratch)
116	: State(State), Function (std::move(Function)), Scratch(Scratch) {}
117
118	static void accumulateCounterValues(const SmallVector<int64_t, `4`> &NewValues,
119	SmallVector<int64_t, `4`> *Result) {
120	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
121	if (NumValues > Result->size())
122	Result->resize(N: NumValues, NV: `0`);
123	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
124	(*Result)[I] += NewValues [I];
125	}
126
127	Expected<SmallVector<int64_t, `4`>> runWithCounter(
128	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
129	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
130	const ExegesisTarget &ET = State.getExegesisTarget();
131	char *const ScratchPtr = Scratch->ptr();
132	auto CounterOrError =
133	ET.createCounter(CounterName, State, ValidationCounters);
134
135	if (!CounterOrError)
136	return CounterOrError.takeError();
137
138	pfm::CounterGroup *Counter = CounterOrError.get().get();
139	Scratch->clear();
140	{
141	auto PS = ET.withSavedState();
142	CrashRecoveryContext CRC;
143	CrashRecoveryContext::Enable();
144	const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
145	Counter->start();
146	this->Function (ScratchPtr);
147	Counter->stop();
148	});
149	CrashRecoveryContext::Disable();
150	PS.reset();
151	if (Crashed) {
152	#ifdef LLVM_ON_UNIX
153	// See "Exit Status for Commands":
154	// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
155	constexpr const int kSigOffset = `128`;
156	return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
157	#else
158	// The exit code of the process on windows is not meaningful as a
159	// signal, so simply pass in -1 as the signal into the error.
160	return make_error<SnippetSignal>(-`1`);
161	#endif // LLVM_ON_UNIX
162	}
163	}
164
165	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
166	if (!ValidationValuesOrErr)
167	return ValidationValuesOrErr.takeError();
168
169	ArrayRef RealValidationValues = *ValidationValuesOrErr;
170	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
171	ValidationCounterValues [I] = RealValidationValues [I];
172
173	return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
174	}
175
176	const LLVMState &State;
177	const ExecutableFunction Function;
178	BenchmarkRunner::ScratchSpace *const Scratch;
179	};
180
181	#ifdef __linux__
182	// The following class implements a function executor that executes the
183	// benchmark code within a subprocess rather than within the main llvm-exegesis
184	// process. This allows for much more control over the execution context of the
185	// snippet, particularly with regard to memory. This class performs all the
186	// necessary functions to create the subprocess, execute the snippet in the
187	// subprocess, and report results/handle errors.
188	class SubProcessFunctionExecutorImpl
189	: public BenchmarkRunner::FunctionExecutor {
190	public:
191	static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
192	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
193	const BenchmarkKey &Key) {
194	Expected<ExecutableFunction> EF =
195	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
196	if (!EF)
197	return EF.takeError();
198
199	return std::unique_ptr<SubProcessFunctionExecutorImpl>(
200	new SubProcessFunctionExecutorImpl (State, std::move(*EF), Key));
201	}
202
203	private:
204	SubProcessFunctionExecutorImpl(const LLVMState &State,
205	ExecutableFunction Function,
206	const BenchmarkKey &Key)
207	: State(State), Function (std::move(Function)), Key(Key) {}
208
209	enum ChildProcessExitCodeE {
210	CounterFDReadFailed = `1`,
211	RSeqDisableFailed,
212	FunctionDataMappingFailed,
213	AuxiliaryMemorySetupFailed
214	};
215
216	StringRef childProcessExitCodeToString(int ExitCode) const {
217	switch (ExitCode) {
218	case ChildProcessExitCodeE::CounterFDReadFailed:
219	return "Counter file descriptor read failed";
220	case ChildProcessExitCodeE::RSeqDisableFailed:
221	return "Disabling restartable sequences failed";
222	case ChildProcessExitCodeE::FunctionDataMappingFailed:
223	return "Failed to map memory for assembled snippet";
224	case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
225	return "Failed to setup auxiliary memory";
226	default:
227	return "Child process returned with unknown exit code";
228	}
229	}
230
231	Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
232	struct msghdr Message = {};
233	char Buffer[CMSG_SPACE(sizeof(FD))];
234	memset(s: Buffer, c: `0`, n: sizeof(Buffer));
235	Message.msg_control = Buffer;
236	Message.msg_controllen = sizeof(Buffer);
237
238	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
239	ControlMessage->cmsg_level = SOL_SOCKET;
240	ControlMessage->cmsg_type = SCM_RIGHTS;
241	ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
242
243	memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
244
245	Message.msg_controllen = CMSG_SPACE(sizeof(FD));
246
247	ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: `0`);
248
249	if (BytesWritten < `0`)
250	return make_error<Failure>(Args: "Failed to write FD to socket: " +
251	Twine (strerror(errno)));
252
253	return Error::success();
254	}
255
256	Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
257	struct msghdr Message = {};
258
259	char ControlBuffer[`256`];
260	Message.msg_control = ControlBuffer;
261	Message.msg_controllen = sizeof(ControlBuffer);
262
263	ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: `0`);
264
265	if (BytesRead < `0`)
266	return make_error<Failure>(Args: "Failed to read FD from socket: " +
267	Twine (strerror(errno)));
268
269	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
270
271	int FD;
272
273	if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
274	return make_error<Failure>(Args: "Failed to get correct number of bytes for "
275	"file descriptor from socket.");
276
277	memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
278
279	return FD;
280	}
281
282	Error
283	runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
284	SmallVectorImpl<int64_t> &CounterValues,
285	ArrayRef<const char *> ValidationCounters,
286	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
287	auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); });
288	const ExegesisTarget &ET = State.getExegesisTarget();
289	auto CounterOrError =
290	ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
291
292	if (!CounterOrError)
293	return CounterOrError.takeError();
294
295	pfm::CounterGroup *Counter = CounterOrError.get().get();
296
297	// Make sure to attach to the process (and wait for the sigstop to be
298	// delivered and for the process to continue) before we write to the counter
299	// file descriptor. Attaching to the process before writing to the socket
300	// ensures that the subprocess at most has blocked on the read call. If we
301	// attach afterwards, the subprocess might exit before we get to the attach
302	// call due to effects like scheduler contention, introducing transient
303	// failures.
304	if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != `0`)
305	return make_error<Failure>(Args: "Failed to attach to the child process: " +
306	Twine (strerror(errno)));
307
308	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`) {
309	return make_error<Failure>(
310	Args: "Failed to wait for child process to stop after attaching: " +
311	Twine (strerror(errno)));
312	}
313
314	if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != `0`)
315	return make_error<Failure>(
316	Args: "Failed to continue execution of the child process: " +
317	Twine (strerror(errno)));
318
319	int CounterFileDescriptor = Counter->getFileDescriptor();
320	Error SendError =
321	sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
322
323	if (SendError)
324	return SendError;
325
326	int ChildStatus;
327	if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: `0`) == -`1`) {
328	return make_error<Failure>(
329	Args: "Waiting for the child process to complete failed: " +
330	Twine (strerror(errno)));
331	}
332
333	if (WIFEXITED(ChildStatus)) {
334	int ChildExitCode = WEXITSTATUS(ChildStatus);
335	if (ChildExitCode == `0`) {
336	// The child exited succesfully, read counter values and return
337	// success.
338	auto CounterValueOrErr = Counter->readOrError();
339	if (!CounterValueOrErr)
340	return CounterValueOrErr.takeError();
341	CounterValues = std::move(*CounterValueOrErr);
342
343	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
344	if (!ValidationValuesOrErr)
345	return ValidationValuesOrErr.takeError();
346
347	ArrayRef RealValidationValues = *ValidationValuesOrErr;
348	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
349	ValidationCounterValues [I] = RealValidationValues [I];
350
351	return Error::success();
352	}
353	// The child exited, but not successfully.
354	return make_error<Failure>(
355	Args: "Child benchmarking process exited with non-zero exit code: " +
356	childProcessExitCodeToString(ExitCode: ChildExitCode));
357	}
358
359	// An error was encountered running the snippet, process it
360	siginfo_t ChildSignalInfo;
361	if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -`1`) {
362	return make_error<Failure>(Args: "Getting signal info from the child failed: " +
363	Twine (strerror(errno)));
364	}
365
366	// Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
367	// handlers to run, and calling SIGTERM would mean that ptrace will force
368	// it to block in the signal-delivery-stop for the SIGSEGV/other signals,
369	// and upon exit.
370	if (kill(pid: ChildPID, SIGKILL) == -`1`)
371	return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
372	Twine (strerror(errno)));
373
374	// Wait for the process to exit so that there are no zombie processes left
375	// around.
376	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`)
377	return make_error<Failure>(Args: "Failed to wait for process to die: " +
378	Twine (strerror(errno)));
379
380	if (ChildSignalInfo.si_signo == SIGSEGV)
381	return make_error<SnippetSegmentationFault>(
382	Args: reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
383
384	return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
385	}
386
387	Error createSubProcessAndRunBenchmark(
388	StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
389	ArrayRef<const char *> ValidationCounters,
390	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
391	int PipeFiles[`2`];
392	int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: `0`, fds: PipeFiles);
393	if (PipeSuccessOrErr != `0`) {
394	return make_error<Failure>(
395	Args: "Failed to create a pipe for interprocess communication between "
396	"llvm-exegesis and the benchmarking subprocess: " +
397	Twine (strerror(errno)));
398	}
399
400	SubprocessMemory SPMemory;
401	Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
402	if (MemoryInitError)
403	return MemoryInitError;
404
405	Error AddMemDefError =
406	SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
407	if (AddMemDefError)
408	return AddMemDefError;
409
410	long ParentTID = SubprocessMemory::getCurrentTID();
411	pid_t ParentOrChildPID = fork();
412
413	if (ParentOrChildPID == -`1`) {
414	return make_error<Failure>(Args: "Failed to create child process: " +
415	Twine (strerror(errno)));
416	}
417
418	if (ParentOrChildPID == `0`) {
419	// We are in the child process, close the write end of the pipe.
420	close(fd: PipeFiles[`1`]);
421	// Unregister handlers, signal handling is now handled through ptrace in
422	// the host process.
423	sys::unregisterHandlers();
424	runChildSubprocess(Pipe: PipeFiles[`0`], Key, ParentTID);
425	// The child process terminates in the above function, so we should never
426	// get to this point.
427	llvm_unreachable("Child process didn't exit when expected.");
428	}
429
430	// Close the read end of the pipe as we only need to write to the subprocess
431	// from the parent process.
432	close(fd: PipeFiles[`0`]);
433	return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[`1`], CounterName,
434	CounterValues, ValidationCounters,
435	ValidationCounterValues);
436	}
437
438	void disableCoreDumps() const {
439	struct rlimit rlim;
440
441	rlim.rlim_cur = `0`;
442	setrlimit(RLIMIT_CORE, rlimits: &rlim);
443	}
444
445	[[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
446	long ParentTID) const {
447	// Disable core dumps in the child process as otherwise everytime we
448	// encounter an execution failure like a segmentation fault, we will create
449	// a core dump. We report the information directly rather than require the
450	// user inspect a core dump.
451	disableCoreDumps();
452
453	// The following occurs within the benchmarking subprocess.
454	pid_t ParentPID = getppid();
455
456	Expected<int> CounterFileDescriptorOrError =
457	getFileDescriptorFromSocket(SocketFD: Pipe);
458
459	if (!CounterFileDescriptorOrError)
460	exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
461
462	int CounterFileDescriptor = *CounterFileDescriptorOrError;
463
464	// Glibc versions greater than 2.35 automatically call rseq during
465	// initialization. Unmapping the region that glibc sets up for this causes
466	// segfaults in the program. Unregister the rseq region so that we can safely
467	// unmap it later
468	#ifdef GLIBC_INITS_RSEQ
469	unsigned int RseqStructSize = __rseq_size;
470
471	// Glibc v2.40 (the change is also expected to be backported to v2.35)
472	// changes the definition of __rseq_size to be the usable area of the struct
473	// rather than the actual size of the struct. v2.35 uses only 20 bytes of
474	// the 32 byte struct. For now, it should be safe to assume that if the
475	// usable size is less than 32, the actual size of the struct will be 32
476	// bytes given alignment requirements.
477	if (__rseq_size < `32`)
478	RseqStructSize = `32`;
479
480	long RseqDisableOutput =
481	syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
482	RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
483	if (RseqDisableOutput != `0`)
484	exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
485	#endif // GLIBC_INITS_RSEQ
486
487	// The frontend that generates the memory annotation structures should
488	// validate that the address to map the snippet in at is a multiple of
489	// the page size. Assert that this is true here.
490	assert(Key.SnippetAddress % getpagesize() == `0` &&
491	"The snippet address needs to be aligned to a page boundary.");
492
493	size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
494	void *MapAddress = NULL;
495	int MapFlags = MAP_PRIVATE \| MAP_ANONYMOUS;
496
497	if (Key.SnippetAddress != `0`) {
498	MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
499	MapFlags \|= MAP_FIXED_NOREPLACE;
500	}
501
502	char *FunctionDataCopy =
503	(char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ \| PROT_WRITE,
504	flags: MapFlags, fd: `0`, offset: `0`);
505	if ((intptr_t)FunctionDataCopy == -`1`)
506	exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
507
508	memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
509	n: this->Function.FunctionBytes.size());
510	mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ \| PROT_EXEC);
511
512	Expected<int> AuxMemFDOrError =
513	SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
514	MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
515	if (!AuxMemFDOrError)
516	exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
517
518	((void ()(size_t, int*))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
519	*AuxMemFDOrError);
520
521	exit(status: `0`);
522	}
523
524	Expected<SmallVector<int64_t, `4`>> runWithCounter(
525	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
526	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
527	SmallVector<int64_t, `4`> Value(`1`, `0`);
528	Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
529	CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
530
531	if (PossibleBenchmarkError)
532	return std::move(PossibleBenchmarkError);
533
534	return Value;
535	}
536
537	const LLVMState &State;
538	const ExecutableFunction Function;
539	const BenchmarkKey &Key;
540	};
541	#endif // __linux__
542	} // namespace
543
544	Expected<SmallString<`0`>> BenchmarkRunner::assembleSnippet(
545	const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
546	unsigned MinInstructions, unsigned LoopBodySize,
547	bool GenerateMemoryInstructions) const {
548	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
549	SmallString<`0`> Buffer;
550	raw_svector_ostream OS(Buffer);
551	if (Error E = assembleToStream(
552	ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
553	Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
554	CleanupMemory: GenerateMemoryInstructions),
555	AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
556	return std::move(E);
557	}
558	return Buffer;
559	}
560
561	Expected<BenchmarkRunner::RunnableConfiguration>
562	BenchmarkRunner::getRunnableConfiguration(
563	const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
564	const SnippetRepetitor &Repetitor) const {
565	RunnableConfiguration RC;
566
567	Benchmark &BenchmarkResult = RC.BenchmarkResult;
568	BenchmarkResult.Mode = Mode;
569	BenchmarkResult.CpuName =
570	std::string (State.getTargetMachine().getTargetCPU());
571	BenchmarkResult.LLVMTriple =
572	State.getTargetMachine().getTargetTriple().normalize();
573	BenchmarkResult.MinInstructions = MinInstructions;
574	BenchmarkResult.Info = BC.Info;
575
576	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
577
578	bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
579
580	BenchmarkResult.Key = BC.Key;
581
582	// Assemble at least kMinInstructionsForSnippet instructions by repeating
583	// the snippet for debug/analysis. This is so that the user clearly
584	// understands that the inside instructions are repeated.
585	if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
586	const int MinInstructionsForSnippet = `4` * Instructions.size();
587	const int LoopBodySizeForSnippet = `2` * Instructions.size();
588	auto Snippet =
589	assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
590	LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
591	if (Error E = Snippet.takeError())
592	return std::move(E);
593
594	if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
595	Bytes&: BenchmarkResult.AssembledSnippet))
596	return std::move(Err);
597	}
598
599	// Assemble enough repetitions of the snippet so we have at least
600	// MinInstructions instructions.
601	if (BenchmarkPhaseSelector >
602	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
603	auto Snippet =
604	assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
605	LoopBodySize, GenerateMemoryInstructions);
606	if (Error E = Snippet.takeError())
607	return std::move(E);
608	RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
609	}
610
611	return std::move(RC);
612	}
613
614	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
615	BenchmarkRunner::createFunctionExecutor(
616	object::OwningBinary<object::ObjectFile> ObjectFile,
617	const BenchmarkKey &Key) const {
618	switch (ExecutionMode) {
619	case ExecutionModeE::InProcess: {
620	auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
621	State, Obj: std::move(ObjectFile), Scratch: Scratch.get());
622	if (!InProcessExecutorOrErr)
623	return InProcessExecutorOrErr.takeError();
624
625	return std::move(*InProcessExecutorOrErr);
626	}
627	case ExecutionModeE::SubProcess: {
628	#ifdef __linux__
629	auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
630	State, Obj: std::move(ObjectFile), Key);
631	if (!SubProcessExecutorOrErr)
632	return SubProcessExecutorOrErr.takeError();
633
634	return std::move(*SubProcessExecutorOrErr);
635	#else
636	return make_error<Failure>(
637	"The subprocess execution mode is only supported on Linux");
638	#endif
639	}
640	}
641	llvm_unreachable("ExecutionMode is outside expected range");
642	}
643
644	std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
645	RunnableConfiguration &&RC,
646	const std::optional<StringRef> &DumpFile) const {
647	Benchmark &BenchmarkResult = RC.BenchmarkResult;
648	object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
649
650	if (DumpFile && BenchmarkPhaseSelector >
651	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
652	auto ObjectFilePath =
653	writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
654	if (Error E = ObjectFilePath.takeError()) {
655	return {std::move(E), std::move(BenchmarkResult)};
656	}
657	outs() << "Check generated assembly with: /usr/bin/objdump -d "
658	<< *ObjectFilePath << "\n";
659	}
660
661	if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
662	BenchmarkResult.Error = "actual measurements skipped.";
663	return {Error::success(), std::move(BenchmarkResult)};
664	}
665
666	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
667	createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key);
668	if (!Executor)
669	return {Executor.takeError(), std::move(BenchmarkResult)};
670	auto NewMeasurements = runMeasurements(Executor: **Executor);
671
672	if (Error E = NewMeasurements.takeError()) {
673	return {std::move(E), std::move(BenchmarkResult)};
674	}
675	assert(BenchmarkResult.MinInstructions > `0` && "invalid MinInstructions");
676	for (BenchmarkMeasure &BM : *NewMeasurements) {
677	// Scale the measurements by the number of instructions.
678	BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
679	// Scale the measurements by the number of times the entire snippet is
680	// repeated.
681	BM.PerSnippetValue /=
682	std::ceil(x: BenchmarkResult.MinInstructions /
683	static_cast<double>(BenchmarkResult.Key.Instructions.size()));
684	}
685	BenchmarkResult.Measurements = std::move(*NewMeasurements);
686
687	return {Error::success(), std::move(BenchmarkResult)};
688	}
689
690	Expected<std::string>
691	BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
692	int ResultFD = `0`;
693	SmallString<`256`> ResultPath = FileName;
694	if (Error E = errorCodeToError(
695	EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
696	ResultFD, ResultPath)
697	: sys::fs::openFileForReadWrite(
698	Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
699	Flags: sys::fs::OF_None)))
700	return std::move(E);
701	raw_fd_ostream OFS(ResultFD, true /ShouldClose/);
702	OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
703	OFS.flush();
704	return std::string(ResultPath);
705	}
706
707	static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
708	const ValidationEvent RHS) {
709	return static_cast<int>(LHS.first) < static_cast<int>(RHS);
710	}
711
712	Error BenchmarkRunner::getValidationCountersToRun(
713	SmallVector<const char > &ValCountersToRun) const* {
714	const PfmCountersInfo &PCI = State.getPfmCounters();
715	ValCountersToRun.reserve(N: ValidationCounters.size());
716
717	ValCountersToRun.reserve(N: ValidationCounters.size());
718	ArrayRef TargetValidationEvents(PCI.ValidationEvents,
719	PCI.NumValidationEvents);
720	for (const ValidationEvent RequestedValEvent : ValidationCounters) {
721	auto ValCounterIt =
722	lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
723	if (ValCounterIt == TargetValidationEvents.end() \|\|
724	ValCounterIt->first != RequestedValEvent)
725	return make_error<Failure>(Args: "Cannot create validation counter");
726
727	assert(ValCounterIt->first == RequestedValEvent &&
728	"The array of validation events from the target should be sorted");
729	ValCountersToRun.push_back(Elt: ValCounterIt->second);
730	}
731
732	return Error::success();
733	}
734
735	BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
736
737	} // namespace exegesis
738	} // namespace llvm
739

Browse the source code of llvm_projects/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp