1//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BenchmarkRunner.h"
10#include "Assembler.h"
11#include "Error.h"
12#include "MCInstrDescView.h"
13#include "MmapUtils.h"
14#include "PerfHelper.h"
15#include "SubprocessMemory.h"
16#include "Target.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22#include "llvm/Support/CrashRecoveryContext.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/MemoryBuffer.h"
26#include "llvm/Support/Program.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/SystemZ/zOSSupport.h"
29#include <cmath>
30#include <memory>
31#include <string>
32
33#ifdef __linux__
34#ifdef HAVE_LIBPFM
35#include <perfmon/perf_event.h>
36#endif
37#include <sys/mman.h>
38#include <sys/ptrace.h>
39#include <sys/resource.h>
40#include <sys/socket.h>
41#include <sys/syscall.h>
42#include <sys/wait.h>
43#include <unistd.h>
44
45#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46#include <sys/rseq.h>
47#if defined(RSEQ_SIG) && defined(SYS_rseq)
48#define GLIBC_INITS_RSEQ
49#endif
50#endif
51#endif // __linux__
52
53namespace llvm {
54namespace exegesis {
55
56BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58 ExecutionModeE ExecutionMode,
59 ArrayRef<ValidationEvent> ValCounters)
60 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61 ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
62 Scratch(std::make_unique<ScratchSpace>()) {}
63
64BenchmarkRunner::~BenchmarkRunner() = default;
65
66void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67 const SmallVectorImpl<int64_t> &NewValues,
68 SmallVectorImpl<int64_t> *Result) {
69 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
70 if (NumValues > Result->size())
71 Result->resize(N: NumValues, NV: 0);
72 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
73 (*Result)[I] += NewValues[I];
74}
75
76Expected<SmallVector<int64_t, 4>>
77BenchmarkRunner::FunctionExecutor::runAndSample(
78 const char *Counters, ArrayRef<const char *> ValidationCounters,
79 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80 // We sum counts when there are several counters for a single ProcRes
81 // (e.g. P23 on SandyBridge).
82 SmallVector<int64_t, 4> CounterValues;
83 SmallVector<StringRef, 2> CounterNames;
84 StringRef(Counters).split(A&: CounterNames, Separator: '+');
85 for (auto &CounterName : CounterNames) {
86 CounterName = CounterName.trim();
87 Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
88 CounterName, ValidationCounters, ValidationCounterValues);
89 if (!ValueOrError)
90 return ValueOrError.takeError();
91 accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
92 }
93 return CounterValues;
94}
95
96namespace {
97class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98public:
99 static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101 BenchmarkRunner::ScratchSpace *Scratch,
102 std::optional<int> BenchmarkProcessCPU) {
103 Expected<ExecutableFunction> EF =
104 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
105
106 if (!EF)
107 return EF.takeError();
108
109 return std::unique_ptr<InProcessFunctionExecutorImpl>(
110 new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
111 }
112
113private:
114 InProcessFunctionExecutorImpl(const LLVMState &State,
115 ExecutableFunction Function,
116 BenchmarkRunner::ScratchSpace *Scratch)
117 : State(State), Function(std::move(Function)), Scratch(Scratch) {}
118
119 static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
120 SmallVector<int64_t, 4> *Result) {
121 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
122 if (NumValues > Result->size())
123 Result->resize(N: NumValues, NV: 0);
124 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
125 (*Result)[I] += NewValues[I];
126 }
127
128 Expected<SmallVector<int64_t, 4>> runWithCounter(
129 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
130 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
131 const ExegesisTarget &ET = State.getExegesisTarget();
132 char *const ScratchPtr = Scratch->ptr();
133 auto CounterOrError =
134 ET.createCounter(CounterName, State, ValidationCounters);
135
136 if (!CounterOrError)
137 return CounterOrError.takeError();
138
139 pfm::CounterGroup *Counter = CounterOrError.get().get();
140 Scratch->clear();
141 {
142 auto PS = ET.withSavedState();
143 CrashRecoveryContext CRC;
144 CrashRecoveryContext::Enable();
145 const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
146 Counter->start();
147 this->Function(ScratchPtr);
148 Counter->stop();
149 });
150 CrashRecoveryContext::Disable();
151 PS.reset();
152 if (Crashed) {
153#ifdef LLVM_ON_UNIX
154 // See "Exit Status for Commands":
155 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
156 constexpr const int kSigOffset = 128;
157 return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
158#else
159 // The exit code of the process on windows is not meaningful as a
160 // signal, so simply pass in -1 as the signal into the error.
161 return make_error<SnippetSignal>(-1);
162#endif // LLVM_ON_UNIX
163 }
164 }
165
166 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
167 if (!ValidationValuesOrErr)
168 return ValidationValuesOrErr.takeError();
169
170 ArrayRef RealValidationValues = *ValidationValuesOrErr;
171 for (size_t I = 0; I < RealValidationValues.size(); ++I)
172 ValidationCounterValues[I] = RealValidationValues[I];
173
174 return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
175 }
176
177 const LLVMState &State;
178 const ExecutableFunction Function;
179 BenchmarkRunner::ScratchSpace *const Scratch;
180};
181
182#ifdef __linux__
183// The following class implements a function executor that executes the
184// benchmark code within a subprocess rather than within the main llvm-exegesis
185// process. This allows for much more control over the execution context of the
186// snippet, particularly with regard to memory. This class performs all the
187// necessary functions to create the subprocess, execute the snippet in the
188// subprocess, and report results/handle errors.
189class SubProcessFunctionExecutorImpl
190 : public BenchmarkRunner::FunctionExecutor {
191public:
192 static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
193 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
194 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
195 Expected<ExecutableFunction> EF =
196 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
197 if (!EF)
198 return EF.takeError();
199
200 return std::unique_ptr<SubProcessFunctionExecutorImpl>(
201 new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key,
202 BenchmarkProcessCPU));
203 }
204
205private:
206 SubProcessFunctionExecutorImpl(const LLVMState &State,
207 ExecutableFunction Function,
208 const BenchmarkKey &Key,
209 std::optional<int> BenchmarkCPU)
210 : State(State), Function(std::move(Function)), Key(Key),
211 BenchmarkProcessCPU(BenchmarkCPU) {}
212
213 enum ChildProcessExitCodeE {
214 CounterFDReadFailed = 1,
215 RSeqDisableFailed,
216 FunctionDataMappingFailed,
217 AuxiliaryMemorySetupFailed,
218 SetCPUAffinityFailed
219 };
220
221 StringRef childProcessExitCodeToString(int ExitCode) const {
222 switch (ExitCode) {
223 case ChildProcessExitCodeE::CounterFDReadFailed:
224 return "Counter file descriptor read failed";
225 case ChildProcessExitCodeE::RSeqDisableFailed:
226 return "Disabling restartable sequences failed";
227 case ChildProcessExitCodeE::FunctionDataMappingFailed:
228 return "Failed to map memory for assembled snippet";
229 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
230 return "Failed to setup auxiliary memory";
231 case ChildProcessExitCodeE::SetCPUAffinityFailed:
232 return "Failed to set CPU affinity of the benchmarking process";
233 default:
234 return "Child process returned with unknown exit code";
235 }
236 }
237
238 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
239 struct msghdr Message = {};
240 char Buffer[CMSG_SPACE(sizeof(FD))];
241 memset(s: Buffer, c: 0, n: sizeof(Buffer));
242 Message.msg_control = Buffer;
243 Message.msg_controllen = sizeof(Buffer);
244
245 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
246 ControlMessage->cmsg_level = SOL_SOCKET;
247 ControlMessage->cmsg_type = SCM_RIGHTS;
248 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
249
250 memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
251
252 Message.msg_controllen = CMSG_SPACE(sizeof(FD));
253
254 ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: 0);
255
256 if (BytesWritten < 0)
257 return make_error<Failure>(Args: "Failed to write FD to socket: " +
258 Twine(strerror(errno)));
259
260 return Error::success();
261 }
262
263 Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
264 struct msghdr Message = {};
265
266 char ControlBuffer[256];
267 Message.msg_control = ControlBuffer;
268 Message.msg_controllen = sizeof(ControlBuffer);
269
270 ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: 0);
271
272 if (BytesRead < 0)
273 return make_error<Failure>(Args: "Failed to read FD from socket: " +
274 Twine(strerror(errno)));
275
276 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
277
278 int FD;
279
280 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
281 return make_error<Failure>(Args: "Failed to get correct number of bytes for "
282 "file descriptor from socket.");
283
284 memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
285
286 return FD;
287 }
288
289 Error
290 runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
291 SmallVectorImpl<int64_t> &CounterValues,
292 ArrayRef<const char *> ValidationCounters,
293 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
294 auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); });
295 const ExegesisTarget &ET = State.getExegesisTarget();
296 auto CounterOrError =
297 ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
298
299 if (!CounterOrError)
300 return CounterOrError.takeError();
301
302 pfm::CounterGroup *Counter = CounterOrError.get().get();
303
304 // Make sure to attach to the process (and wait for the sigstop to be
305 // delivered and for the process to continue) before we write to the counter
306 // file descriptor. Attaching to the process before writing to the socket
307 // ensures that the subprocess at most has blocked on the read call. If we
308 // attach afterwards, the subprocess might exit before we get to the attach
309 // call due to effects like scheduler contention, introducing transient
310 // failures.
311 if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
312 return make_error<Failure>(Args: "Failed to attach to the child process: " +
313 Twine(strerror(errno)));
314
315 if (waitpid(pid: ChildPID, NULL, options: 0) == -1) {
316 return make_error<Failure>(
317 Args: "Failed to wait for child process to stop after attaching: " +
318 Twine(strerror(errno)));
319 }
320
321 if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != 0)
322 return make_error<Failure>(
323 Args: "Failed to continue execution of the child process: " +
324 Twine(strerror(errno)));
325
326 int CounterFileDescriptor = Counter->getFileDescriptor();
327 Error SendError =
328 sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
329
330 if (SendError)
331 return SendError;
332
333 int ChildStatus;
334 if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: 0) == -1) {
335 return make_error<Failure>(
336 Args: "Waiting for the child process to complete failed: " +
337 Twine(strerror(errno)));
338 }
339
340 if (WIFEXITED(ChildStatus)) {
341 int ChildExitCode = WEXITSTATUS(ChildStatus);
342 if (ChildExitCode == 0) {
343 // The child exited succesfully, read counter values and return
344 // success.
345 auto CounterValueOrErr = Counter->readOrError();
346 if (!CounterValueOrErr)
347 return CounterValueOrErr.takeError();
348 CounterValues = std::move(*CounterValueOrErr);
349
350 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
351 if (!ValidationValuesOrErr)
352 return ValidationValuesOrErr.takeError();
353
354 ArrayRef RealValidationValues = *ValidationValuesOrErr;
355 for (size_t I = 0; I < RealValidationValues.size(); ++I)
356 ValidationCounterValues[I] = RealValidationValues[I];
357
358 return Error::success();
359 }
360 // The child exited, but not successfully.
361 return make_error<Failure>(
362 Args: "Child benchmarking process exited with non-zero exit code: " +
363 childProcessExitCodeToString(ExitCode: ChildExitCode));
364 }
365
366 // An error was encountered running the snippet, process it
367 siginfo_t ChildSignalInfo;
368 if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
369 return make_error<Failure>(Args: "Getting signal info from the child failed: " +
370 Twine(strerror(errno)));
371 }
372
373 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
374 // handlers to run, and calling SIGTERM would mean that ptrace will force
375 // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
376 // and upon exit.
377 if (kill(pid: ChildPID, SIGKILL) == -1)
378 return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
379 Twine(strerror(errno)));
380
381 // Wait for the process to exit so that there are no zombie processes left
382 // around.
383 if (waitpid(pid: ChildPID, NULL, options: 0) == -1)
384 return make_error<Failure>(Args: "Failed to wait for process to die: " +
385 Twine(strerror(errno)));
386
387 if (ChildSignalInfo.si_signo == SIGSEGV)
388 return make_error<SnippetSegmentationFault>(
389 Args: reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
390
391 return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
392 }
393
394 static void setCPUAffinityIfRequested(int CPUToUse) {
395// Special case this function for x86_64 for now as certain more esoteric
396// platforms have different definitions for some of the libc functions that
397// cause buildtime failures. Additionally, the subprocess executor mode (the
398// sole mode where this is supported) currently only supports x86_64.
399
400// Also check that we have the SYS_getcpu macro defined, meaning the syscall
401// actually exists within the build environment. We manually use the syscall
402// rather than the libc wrapper given the wrapper for getcpu is only available
403// in glibc 2.29 and later.
404#if defined(__x86_64__) && defined(SYS_getcpu)
405 // Set the CPU affinity for the child process, so that we ensure that if
406 // the user specified a CPU the process should run on, the benchmarking
407 // process is running on that CPU.
408 cpu_set_t CPUMask;
409 CPU_ZERO(&CPUMask);
410 CPU_SET(CPUToUse, &CPUMask);
411 // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
412 // are available.
413 int SetAffinityReturn = sched_setaffinity(pid: 0, cpusetsize: sizeof(CPUMask), cpuset: &CPUMask);
414 if (SetAffinityReturn == -1) {
415 exit(status: ChildProcessExitCodeE::SetCPUAffinityFailed);
416 }
417
418 // Check (if assertions are enabled) that we are actually running on the
419 // CPU that was specified by the user.
420 [[maybe_unused]] unsigned int CurrentCPU;
421 assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 &&
422 "Expected getcpu call to succeed.");
423 assert(static_cast<int>(CurrentCPU) == CPUToUse &&
424 "Expected current CPU to equal the CPU requested by the user");
425#else
426 exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
427#endif // defined(__x86_64__) && defined(SYS_getcpu)
428 }
429
430 Error createSubProcessAndRunBenchmark(
431 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
432 ArrayRef<const char *> ValidationCounters,
433 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
434 int PipeFiles[2];
435 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: 0, fds: PipeFiles);
436 if (PipeSuccessOrErr != 0) {
437 return make_error<Failure>(
438 Args: "Failed to create a pipe for interprocess communication between "
439 "llvm-exegesis and the benchmarking subprocess: " +
440 Twine(strerror(errno)));
441 }
442
443 SubprocessMemory SPMemory;
444 Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
445 if (MemoryInitError)
446 return MemoryInitError;
447
448 Error AddMemDefError =
449 SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
450 if (AddMemDefError)
451 return AddMemDefError;
452
453 long ParentTID = SubprocessMemory::getCurrentTID();
454 pid_t ParentOrChildPID = fork();
455
456 if (ParentOrChildPID == -1) {
457 return make_error<Failure>(Args: "Failed to create child process: " +
458 Twine(strerror(errno)));
459 }
460
461 if (ParentOrChildPID == 0) {
462 if (BenchmarkProcessCPU.has_value()) {
463 setCPUAffinityIfRequested(*BenchmarkProcessCPU);
464 }
465
466 // We are in the child process, close the write end of the pipe.
467 close(fd: PipeFiles[1]);
468 // Unregister handlers, signal handling is now handled through ptrace in
469 // the host process.
470 sys::unregisterHandlers();
471 runChildSubprocess(Pipe: PipeFiles[0], Key, ParentTID);
472 // The child process terminates in the above function, so we should never
473 // get to this point.
474 llvm_unreachable("Child process didn't exit when expected.");
475 }
476
477 // Close the read end of the pipe as we only need to write to the subprocess
478 // from the parent process.
479 close(fd: PipeFiles[0]);
480 return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[1], CounterName,
481 CounterValues, ValidationCounters,
482 ValidationCounterValues);
483 }
484
485 void disableCoreDumps() const {
486 struct rlimit rlim;
487
488 rlim.rlim_cur = 0;
489 setrlimit(RLIMIT_CORE, rlimits: &rlim);
490 }
491
492 [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
493 long ParentTID) const {
494 // Disable core dumps in the child process as otherwise everytime we
495 // encounter an execution failure like a segmentation fault, we will create
496 // a core dump. We report the information directly rather than require the
497 // user inspect a core dump.
498 disableCoreDumps();
499
500 // The following occurs within the benchmarking subprocess.
501 pid_t ParentPID = getppid();
502
503 Expected<int> CounterFileDescriptorOrError =
504 getFileDescriptorFromSocket(SocketFD: Pipe);
505
506 if (!CounterFileDescriptorOrError)
507 exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
508
509 int CounterFileDescriptor = *CounterFileDescriptorOrError;
510
511// Glibc versions greater than 2.35 automatically call rseq during
512// initialization. Unmapping the region that glibc sets up for this causes
513// segfaults in the program. Unregister the rseq region so that we can safely
514// unmap it later
515#ifdef GLIBC_INITS_RSEQ
516 unsigned int RseqStructSize = __rseq_size;
517
518 // Glibc v2.40 (the change is also expected to be backported to v2.35)
519 // changes the definition of __rseq_size to be the usable area of the struct
520 // rather than the actual size of the struct. v2.35 uses only 20 bytes of
521 // the 32 byte struct. For now, it should be safe to assume that if the
522 // usable size is less than 32, the actual size of the struct will be 32
523 // bytes given alignment requirements.
524 if (__rseq_size < 32)
525 RseqStructSize = 32;
526
527 long RseqDisableOutput = syscall(
528 SYS_rseq,
529 reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
530 RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
531 if (RseqDisableOutput != 0)
532 exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
533#endif // GLIBC_INITS_RSEQ
534
535 // The frontend that generates the memory annotation structures should
536 // validate that the address to map the snippet in at is a multiple of
537 // the page size. Assert that this is true here.
538 assert(Key.SnippetAddress % getpagesize() == 0 &&
539 "The snippet address needs to be aligned to a page boundary.");
540
541 size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
542 void *MapAddress = NULL;
543 int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
544
545 if (Key.SnippetAddress != 0) {
546 MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
547 MapFlags |= MAP_FIXED_NOREPLACE;
548 }
549
550 char *FunctionDataCopy =
551 (char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ | PROT_WRITE,
552 flags: MapFlags, fd: 0, offset: 0);
553 if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1)
554 exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
555
556 memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
557 n: this->Function.FunctionBytes.size());
558 mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ | PROT_EXEC);
559
560 Expected<int> AuxMemFDOrError =
561 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
562 MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
563 if (!AuxMemFDOrError)
564 exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
565
566 ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
567 *AuxMemFDOrError);
568
569 exit(status: 0);
570 }
571
572 Expected<SmallVector<int64_t, 4>> runWithCounter(
573 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
574 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
575 SmallVector<int64_t, 4> Value(1, 0);
576 Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
577 CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
578
579 if (PossibleBenchmarkError)
580 return std::move(PossibleBenchmarkError);
581
582 return Value;
583 }
584
585 const LLVMState &State;
586 const ExecutableFunction Function;
587 const BenchmarkKey &Key;
588 const std::optional<int> BenchmarkProcessCPU;
589};
590#endif // __linux__
591} // namespace
592
593Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
594 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
595 unsigned MinInstructions, unsigned LoopBodySize,
596 bool GenerateMemoryInstructions) const {
597 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
598 SmallString<0> Buffer;
599 raw_svector_ostream OS(Buffer);
600 if (Error E = assembleToStream(
601 ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
602 Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
603 CleanupMemory: GenerateMemoryInstructions),
604 AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
605 return std::move(E);
606 }
607 return Buffer;
608}
609
610Expected<BenchmarkRunner::RunnableConfiguration>
611BenchmarkRunner::getRunnableConfiguration(
612 const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
613 const SnippetRepetitor &Repetitor) const {
614 RunnableConfiguration RC;
615
616 Benchmark &BenchmarkResult = RC.BenchmarkResult;
617 BenchmarkResult.Mode = Mode;
618 BenchmarkResult.CpuName =
619 std::string(State.getTargetMachine().getTargetCPU());
620 BenchmarkResult.LLVMTriple =
621 State.getTargetMachine().getTargetTriple().normalize();
622 BenchmarkResult.MinInstructions = MinInstructions;
623 BenchmarkResult.Info = BC.Info;
624
625 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
626
627 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
628
629 BenchmarkResult.Key = BC.Key;
630
631 // Assemble at least kMinInstructionsForSnippet instructions by repeating
632 // the snippet for debug/analysis. This is so that the user clearly
633 // understands that the inside instructions are repeated.
634 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
635 const int MinInstructionsForSnippet = 4 * Instructions.size();
636 const int LoopBodySizeForSnippet = 2 * Instructions.size();
637 auto Snippet =
638 assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
639 LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
640 if (Error E = Snippet.takeError())
641 return std::move(E);
642
643 if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
644 Bytes&: BenchmarkResult.AssembledSnippet))
645 return std::move(Err);
646 }
647
648 // Assemble enough repetitions of the snippet so we have at least
649 // MinInstructions instructions.
650 if (BenchmarkPhaseSelector >
651 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
652 auto Snippet =
653 assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
654 LoopBodySize, GenerateMemoryInstructions);
655 if (Error E = Snippet.takeError())
656 return std::move(E);
657 RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
658 }
659
660 return std::move(RC);
661}
662
663Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
664BenchmarkRunner::createFunctionExecutor(
665 object::OwningBinary<object::ObjectFile> ObjectFile,
666 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
667 switch (ExecutionMode) {
668 case ExecutionModeE::InProcess: {
669 if (BenchmarkProcessCPU.has_value())
670 return make_error<Failure>(Args: "The inprocess execution mode does not "
671 "support benchmark core pinning.");
672
673 auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
674 State, Obj: std::move(ObjectFile), Scratch: Scratch.get(), BenchmarkProcessCPU);
675 if (!InProcessExecutorOrErr)
676 return InProcessExecutorOrErr.takeError();
677
678 return std::move(*InProcessExecutorOrErr);
679 }
680 case ExecutionModeE::SubProcess: {
681#ifdef __linux__
682 auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
683 State, Obj: std::move(ObjectFile), Key, BenchmarkProcessCPU);
684 if (!SubProcessExecutorOrErr)
685 return SubProcessExecutorOrErr.takeError();
686
687 return std::move(*SubProcessExecutorOrErr);
688#else
689 return make_error<Failure>(
690 "The subprocess execution mode is only supported on Linux");
691#endif
692 }
693 }
694 llvm_unreachable("ExecutionMode is outside expected range");
695}
696
697std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
698 RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
699 std::optional<int> BenchmarkProcessCPU) const {
700 Benchmark &BenchmarkResult = RC.BenchmarkResult;
701 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
702
703 if (DumpFile && BenchmarkPhaseSelector >
704 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
705 auto ObjectFilePath =
706 writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
707 if (Error E = ObjectFilePath.takeError()) {
708 return {std::move(E), std::move(BenchmarkResult)};
709 }
710 outs() << "Check generated assembly with: /usr/bin/objdump -d "
711 << *ObjectFilePath << "\n";
712 }
713
714 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
715 BenchmarkResult.Error = "actual measurements skipped.";
716 return {Error::success(), std::move(BenchmarkResult)};
717 }
718
719 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
720 createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key,
721 BenchmarkProcessCPU);
722 if (!Executor)
723 return {Executor.takeError(), std::move(BenchmarkResult)};
724 auto NewMeasurements = runMeasurements(Executor: **Executor);
725
726 if (Error E = NewMeasurements.takeError()) {
727 return {std::move(E), std::move(BenchmarkResult)};
728 }
729 assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
730 for (BenchmarkMeasure &BM : *NewMeasurements) {
731 // Scale the measurements by the number of instructions.
732 BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
733 // Scale the measurements by the number of times the entire snippet is
734 // repeated.
735 BM.PerSnippetValue /=
736 std::ceil(x: BenchmarkResult.MinInstructions /
737 static_cast<double>(BenchmarkResult.Key.Instructions.size()));
738 }
739 BenchmarkResult.Measurements = std::move(*NewMeasurements);
740
741 return {Error::success(), std::move(BenchmarkResult)};
742}
743
744Expected<std::string>
745BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
746 int ResultFD = 0;
747 SmallString<256> ResultPath = FileName;
748 if (Error E = errorCodeToError(
749 EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
750 ResultFD, ResultPath)
751 : sys::fs::openFileForReadWrite(
752 Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
753 Flags: sys::fs::OF_None)))
754 return std::move(E);
755 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
756 OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
757 OFS.flush();
758 return std::string(ResultPath);
759}
760
761static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
762 const ValidationEvent RHS) {
763 return static_cast<int>(LHS.first) < static_cast<int>(RHS);
764}
765
766Error BenchmarkRunner::getValidationCountersToRun(
767 SmallVector<const char *> &ValCountersToRun) const {
768 const PfmCountersInfo &PCI = State.getPfmCounters();
769 ValCountersToRun.reserve(N: ValidationCounters.size());
770
771 ValCountersToRun.reserve(N: ValidationCounters.size());
772 ArrayRef TargetValidationEvents(PCI.ValidationEvents,
773 PCI.NumValidationEvents);
774 for (const ValidationEvent RequestedValEvent : ValidationCounters) {
775 auto ValCounterIt =
776 lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
777 if (ValCounterIt == TargetValidationEvents.end() ||
778 ValCounterIt->first != RequestedValEvent)
779 return make_error<Failure>(Args: "Cannot create validation counter");
780
781 assert(ValCounterIt->first == RequestedValEvent &&
782 "The array of validation events from the target should be sorted");
783 ValCountersToRun.push_back(Elt: ValCounterIt->second);
784 }
785
786 return Error::success();
787}
788
789BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
790
791} // namespace exegesis
792} // namespace llvm
793