1//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <cmath>
10#include <memory>
11#include <string>
12
13#include "Assembler.h"
14#include "BenchmarkRunner.h"
15#include "Error.h"
16#include "MCInstrDescView.h"
17#include "MmapUtils.h"
18#include "PerfHelper.h"
19#include "SubprocessMemory.h"
20#include "Target.h"
21#include "llvm/ADT/ScopeExit.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/ADT/Twine.h"
25#include "llvm/Support/CrashRecoveryContext.h"
26#include "llvm/Support/Error.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/Program.h"
30#include "llvm/Support/Signals.h"
31#include "llvm/Support/SystemZ/zOSSupport.h"
32
33#ifdef __linux__
34#ifdef HAVE_LIBPFM
35#include <perfmon/perf_event.h>
36#endif
37#include <sys/mman.h>
38#include <sys/ptrace.h>
39#include <sys/resource.h>
40#include <sys/socket.h>
41#include <sys/syscall.h>
42#include <sys/wait.h>
43#include <unistd.h>
44
45#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46#include <sys/rseq.h>
47#if defined(RSEQ_SIG) && defined(SYS_rseq)
48#define GLIBC_INITS_RSEQ
49#endif
50#endif
51#endif // __linux__
52
53namespace llvm {
54namespace exegesis {
55
56BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58 ExecutionModeE ExecutionMode,
59 ArrayRef<ValidationEvent> ValCounters)
60 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61 ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
62 Scratch(std::make_unique<ScratchSpace>()) {}
63
64BenchmarkRunner::~BenchmarkRunner() = default;
65
66void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67 const SmallVectorImpl<int64_t> &NewValues,
68 SmallVectorImpl<int64_t> *Result) {
69 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
70 if (NumValues > Result->size())
71 Result->resize(N: NumValues, NV: 0);
72 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
73 (*Result)[I] += NewValues[I];
74}
75
76Expected<SmallVector<int64_t, 4>>
77BenchmarkRunner::FunctionExecutor::runAndSample(
78 const char *Counters, ArrayRef<const char *> ValidationCounters,
79 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80 // We sum counts when there are several counters for a single ProcRes
81 // (e.g. P23 on SandyBridge).
82 SmallVector<int64_t, 4> CounterValues;
83 SmallVector<StringRef, 2> CounterNames;
84 StringRef(Counters).split(A&: CounterNames, Separator: '+');
85 for (auto &CounterName : CounterNames) {
86 CounterName = CounterName.trim();
87 Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
88 CounterName, ValidationCounters, ValidationCounterValues);
89 if (!ValueOrError)
90 return ValueOrError.takeError();
91 accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
92 }
93 return CounterValues;
94}
95
96namespace {
97class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98public:
99 static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101 BenchmarkRunner::ScratchSpace *Scratch) {
102 Expected<ExecutableFunction> EF =
103 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
104
105 if (!EF)
106 return EF.takeError();
107
108 return std::unique_ptr<InProcessFunctionExecutorImpl>(
109 new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
110 }
111
112private:
113 InProcessFunctionExecutorImpl(const LLVMState &State,
114 ExecutableFunction Function,
115 BenchmarkRunner::ScratchSpace *Scratch)
116 : State(State), Function(std::move(Function)), Scratch(Scratch) {}
117
118 static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
119 SmallVector<int64_t, 4> *Result) {
120 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
121 if (NumValues > Result->size())
122 Result->resize(N: NumValues, NV: 0);
123 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
124 (*Result)[I] += NewValues[I];
125 }
126
127 Expected<SmallVector<int64_t, 4>> runWithCounter(
128 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
129 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
130 const ExegesisTarget &ET = State.getExegesisTarget();
131 char *const ScratchPtr = Scratch->ptr();
132 auto CounterOrError =
133 ET.createCounter(CounterName, State, ValidationCounters);
134
135 if (!CounterOrError)
136 return CounterOrError.takeError();
137
138 pfm::CounterGroup *Counter = CounterOrError.get().get();
139 Scratch->clear();
140 {
141 auto PS = ET.withSavedState();
142 CrashRecoveryContext CRC;
143 CrashRecoveryContext::Enable();
144 const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
145 Counter->start();
146 this->Function(ScratchPtr);
147 Counter->stop();
148 });
149 CrashRecoveryContext::Disable();
150 PS.reset();
151 if (Crashed) {
152#ifdef LLVM_ON_UNIX
153 // See "Exit Status for Commands":
154 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
155 constexpr const int kSigOffset = 128;
156 return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
157#else
158 // The exit code of the process on windows is not meaningful as a
159 // signal, so simply pass in -1 as the signal into the error.
160 return make_error<SnippetSignal>(-1);
161#endif // LLVM_ON_UNIX
162 }
163 }
164
165 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
166 if (!ValidationValuesOrErr)
167 return ValidationValuesOrErr.takeError();
168
169 ArrayRef RealValidationValues = *ValidationValuesOrErr;
170 for (size_t I = 0; I < RealValidationValues.size(); ++I)
171 ValidationCounterValues[I] = RealValidationValues[I];
172
173 return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
174 }
175
176 const LLVMState &State;
177 const ExecutableFunction Function;
178 BenchmarkRunner::ScratchSpace *const Scratch;
179};
180
181#ifdef __linux__
182// The following class implements a function executor that executes the
183// benchmark code within a subprocess rather than within the main llvm-exegesis
184// process. This allows for much more control over the execution context of the
185// snippet, particularly with regard to memory. This class performs all the
186// necessary functions to create the subprocess, execute the snippet in the
187// subprocess, and report results/handle errors.
188class SubProcessFunctionExecutorImpl
189 : public BenchmarkRunner::FunctionExecutor {
190public:
191 static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
192 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
193 const BenchmarkKey &Key) {
194 Expected<ExecutableFunction> EF =
195 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
196 if (!EF)
197 return EF.takeError();
198
199 return std::unique_ptr<SubProcessFunctionExecutorImpl>(
200 new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key));
201 }
202
203private:
204 SubProcessFunctionExecutorImpl(const LLVMState &State,
205 ExecutableFunction Function,
206 const BenchmarkKey &Key)
207 : State(State), Function(std::move(Function)), Key(Key) {}
208
209 enum ChildProcessExitCodeE {
210 CounterFDReadFailed = 1,
211 RSeqDisableFailed,
212 FunctionDataMappingFailed,
213 AuxiliaryMemorySetupFailed
214 };
215
216 StringRef childProcessExitCodeToString(int ExitCode) const {
217 switch (ExitCode) {
218 case ChildProcessExitCodeE::CounterFDReadFailed:
219 return "Counter file descriptor read failed";
220 case ChildProcessExitCodeE::RSeqDisableFailed:
221 return "Disabling restartable sequences failed";
222 case ChildProcessExitCodeE::FunctionDataMappingFailed:
223 return "Failed to map memory for assembled snippet";
224 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
225 return "Failed to setup auxiliary memory";
226 default:
227 return "Child process returned with unknown exit code";
228 }
229 }
230
231 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
232 struct msghdr Message = {};
233 char Buffer[CMSG_SPACE(sizeof(FD))];
234 memset(s: Buffer, c: 0, n: sizeof(Buffer));
235 Message.msg_control = Buffer;
236 Message.msg_controllen = sizeof(Buffer);
237
238 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
239 ControlMessage->cmsg_level = SOL_SOCKET;
240 ControlMessage->cmsg_type = SCM_RIGHTS;
241 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
242
243 memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
244
245 Message.msg_controllen = CMSG_SPACE(sizeof(FD));
246
247 ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: 0);
248
249 if (BytesWritten < 0)
250 return make_error<Failure>(Args: "Failed to write FD to socket: " +
251 Twine(strerror(errno)));
252
253 return Error::success();
254 }
255
256 Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
257 struct msghdr Message = {};
258
259 char ControlBuffer[256];
260 Message.msg_control = ControlBuffer;
261 Message.msg_controllen = sizeof(ControlBuffer);
262
263 ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: 0);
264
265 if (BytesRead < 0)
266 return make_error<Failure>(Args: "Failed to read FD from socket: " +
267 Twine(strerror(errno)));
268
269 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
270
271 int FD;
272
273 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
274 return make_error<Failure>(Args: "Failed to get correct number of bytes for "
275 "file descriptor from socket.");
276
277 memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
278
279 return FD;
280 }
281
282 Error
283 runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
284 SmallVectorImpl<int64_t> &CounterValues,
285 ArrayRef<const char *> ValidationCounters,
286 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
287 auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); });
288 const ExegesisTarget &ET = State.getExegesisTarget();
289 auto CounterOrError =
290 ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
291
292 if (!CounterOrError)
293 return CounterOrError.takeError();
294
295 pfm::CounterGroup *Counter = CounterOrError.get().get();
296
297 // Make sure to attach to the process (and wait for the sigstop to be
298 // delivered and for the process to continue) before we write to the counter
299 // file descriptor. Attaching to the process before writing to the socket
300 // ensures that the subprocess at most has blocked on the read call. If we
301 // attach afterwards, the subprocess might exit before we get to the attach
302 // call due to effects like scheduler contention, introducing transient
303 // failures.
304 if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
305 return make_error<Failure>(Args: "Failed to attach to the child process: " +
306 Twine(strerror(errno)));
307
308 if (waitpid(pid: ChildPID, NULL, options: 0) == -1) {
309 return make_error<Failure>(
310 Args: "Failed to wait for child process to stop after attaching: " +
311 Twine(strerror(errno)));
312 }
313
314 if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != 0)
315 return make_error<Failure>(
316 Args: "Failed to continue execution of the child process: " +
317 Twine(strerror(errno)));
318
319 int CounterFileDescriptor = Counter->getFileDescriptor();
320 Error SendError =
321 sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
322
323 if (SendError)
324 return SendError;
325
326 int ChildStatus;
327 if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: 0) == -1) {
328 return make_error<Failure>(
329 Args: "Waiting for the child process to complete failed: " +
330 Twine(strerror(errno)));
331 }
332
333 if (WIFEXITED(ChildStatus)) {
334 int ChildExitCode = WEXITSTATUS(ChildStatus);
335 if (ChildExitCode == 0) {
336 // The child exited succesfully, read counter values and return
337 // success.
338 auto CounterValueOrErr = Counter->readOrError();
339 if (!CounterValueOrErr)
340 return CounterValueOrErr.takeError();
341 CounterValues = std::move(*CounterValueOrErr);
342
343 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
344 if (!ValidationValuesOrErr)
345 return ValidationValuesOrErr.takeError();
346
347 ArrayRef RealValidationValues = *ValidationValuesOrErr;
348 for (size_t I = 0; I < RealValidationValues.size(); ++I)
349 ValidationCounterValues[I] = RealValidationValues[I];
350
351 return Error::success();
352 }
353 // The child exited, but not successfully.
354 return make_error<Failure>(
355 Args: "Child benchmarking process exited with non-zero exit code: " +
356 childProcessExitCodeToString(ExitCode: ChildExitCode));
357 }
358
359 // An error was encountered running the snippet, process it
360 siginfo_t ChildSignalInfo;
361 if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
362 return make_error<Failure>(Args: "Getting signal info from the child failed: " +
363 Twine(strerror(errno)));
364 }
365
366 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
367 // handlers to run, and calling SIGTERM would mean that ptrace will force
368 // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
369 // and upon exit.
370 if (kill(pid: ChildPID, SIGKILL) == -1)
371 return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
372 Twine(strerror(errno)));
373
374 // Wait for the process to exit so that there are no zombie processes left
375 // around.
376 if (waitpid(pid: ChildPID, NULL, options: 0) == -1)
377 return make_error<Failure>(Args: "Failed to wait for process to die: " +
378 Twine(strerror(errno)));
379
380 if (ChildSignalInfo.si_signo == SIGSEGV)
381 return make_error<SnippetSegmentationFault>(
382 Args: reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
383
384 return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
385 }
386
387 Error createSubProcessAndRunBenchmark(
388 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
389 ArrayRef<const char *> ValidationCounters,
390 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
391 int PipeFiles[2];
392 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: 0, fds: PipeFiles);
393 if (PipeSuccessOrErr != 0) {
394 return make_error<Failure>(
395 Args: "Failed to create a pipe for interprocess communication between "
396 "llvm-exegesis and the benchmarking subprocess: " +
397 Twine(strerror(errno)));
398 }
399
400 SubprocessMemory SPMemory;
401 Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
402 if (MemoryInitError)
403 return MemoryInitError;
404
405 Error AddMemDefError =
406 SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
407 if (AddMemDefError)
408 return AddMemDefError;
409
410 long ParentTID = SubprocessMemory::getCurrentTID();
411 pid_t ParentOrChildPID = fork();
412
413 if (ParentOrChildPID == -1) {
414 return make_error<Failure>(Args: "Failed to create child process: " +
415 Twine(strerror(errno)));
416 }
417
418 if (ParentOrChildPID == 0) {
419 // We are in the child process, close the write end of the pipe.
420 close(fd: PipeFiles[1]);
421 // Unregister handlers, signal handling is now handled through ptrace in
422 // the host process.
423 sys::unregisterHandlers();
424 runChildSubprocess(Pipe: PipeFiles[0], Key, ParentTID);
425 // The child process terminates in the above function, so we should never
426 // get to this point.
427 llvm_unreachable("Child process didn't exit when expected.");
428 }
429
430 // Close the read end of the pipe as we only need to write to the subprocess
431 // from the parent process.
432 close(fd: PipeFiles[0]);
433 return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[1], CounterName,
434 CounterValues, ValidationCounters,
435 ValidationCounterValues);
436 }
437
438 void disableCoreDumps() const {
439 struct rlimit rlim;
440
441 rlim.rlim_cur = 0;
442 setrlimit(RLIMIT_CORE, rlimits: &rlim);
443 }
444
445 [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
446 long ParentTID) const {
447 // Disable core dumps in the child process as otherwise everytime we
448 // encounter an execution failure like a segmentation fault, we will create
449 // a core dump. We report the information directly rather than require the
450 // user inspect a core dump.
451 disableCoreDumps();
452
453 // The following occurs within the benchmarking subprocess.
454 pid_t ParentPID = getppid();
455
456 Expected<int> CounterFileDescriptorOrError =
457 getFileDescriptorFromSocket(SocketFD: Pipe);
458
459 if (!CounterFileDescriptorOrError)
460 exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
461
462 int CounterFileDescriptor = *CounterFileDescriptorOrError;
463
464// Glibc versions greater than 2.35 automatically call rseq during
465// initialization. Unmapping the region that glibc sets up for this causes
466// segfaults in the program. Unregister the rseq region so that we can safely
467// unmap it later
468#ifdef GLIBC_INITS_RSEQ
469 unsigned int RseqStructSize = __rseq_size;
470
471 // Glibc v2.40 (the change is also expected to be backported to v2.35)
472 // changes the definition of __rseq_size to be the usable area of the struct
473 // rather than the actual size of the struct. v2.35 uses only 20 bytes of
474 // the 32 byte struct. For now, it should be safe to assume that if the
475 // usable size is less than 32, the actual size of the struct will be 32
476 // bytes given alignment requirements.
477 if (__rseq_size < 32)
478 RseqStructSize = 32;
479
480 long RseqDisableOutput =
481 syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
482 RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
483 if (RseqDisableOutput != 0)
484 exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
485#endif // GLIBC_INITS_RSEQ
486
487 // The frontend that generates the memory annotation structures should
488 // validate that the address to map the snippet in at is a multiple of
489 // the page size. Assert that this is true here.
490 assert(Key.SnippetAddress % getpagesize() == 0 &&
491 "The snippet address needs to be aligned to a page boundary.");
492
493 size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
494 void *MapAddress = NULL;
495 int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
496
497 if (Key.SnippetAddress != 0) {
498 MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
499 MapFlags |= MAP_FIXED_NOREPLACE;
500 }
501
502 char *FunctionDataCopy =
503 (char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ | PROT_WRITE,
504 flags: MapFlags, fd: 0, offset: 0);
505 if ((intptr_t)FunctionDataCopy == -1)
506 exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
507
508 memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
509 n: this->Function.FunctionBytes.size());
510 mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ | PROT_EXEC);
511
512 Expected<int> AuxMemFDOrError =
513 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
514 MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
515 if (!AuxMemFDOrError)
516 exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
517
518 ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
519 *AuxMemFDOrError);
520
521 exit(status: 0);
522 }
523
524 Expected<SmallVector<int64_t, 4>> runWithCounter(
525 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
526 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
527 SmallVector<int64_t, 4> Value(1, 0);
528 Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
529 CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
530
531 if (PossibleBenchmarkError)
532 return std::move(PossibleBenchmarkError);
533
534 return Value;
535 }
536
537 const LLVMState &State;
538 const ExecutableFunction Function;
539 const BenchmarkKey &Key;
540};
541#endif // __linux__
542} // namespace
543
544Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
545 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
546 unsigned MinInstructions, unsigned LoopBodySize,
547 bool GenerateMemoryInstructions) const {
548 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
549 SmallString<0> Buffer;
550 raw_svector_ostream OS(Buffer);
551 if (Error E = assembleToStream(
552 ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
553 Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
554 CleanupMemory: GenerateMemoryInstructions),
555 AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
556 return std::move(E);
557 }
558 return Buffer;
559}
560
561Expected<BenchmarkRunner::RunnableConfiguration>
562BenchmarkRunner::getRunnableConfiguration(
563 const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
564 const SnippetRepetitor &Repetitor) const {
565 RunnableConfiguration RC;
566
567 Benchmark &BenchmarkResult = RC.BenchmarkResult;
568 BenchmarkResult.Mode = Mode;
569 BenchmarkResult.CpuName =
570 std::string(State.getTargetMachine().getTargetCPU());
571 BenchmarkResult.LLVMTriple =
572 State.getTargetMachine().getTargetTriple().normalize();
573 BenchmarkResult.MinInstructions = MinInstructions;
574 BenchmarkResult.Info = BC.Info;
575
576 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
577
578 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
579
580 BenchmarkResult.Key = BC.Key;
581
582 // Assemble at least kMinInstructionsForSnippet instructions by repeating
583 // the snippet for debug/analysis. This is so that the user clearly
584 // understands that the inside instructions are repeated.
585 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
586 const int MinInstructionsForSnippet = 4 * Instructions.size();
587 const int LoopBodySizeForSnippet = 2 * Instructions.size();
588 auto Snippet =
589 assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
590 LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
591 if (Error E = Snippet.takeError())
592 return std::move(E);
593
594 if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
595 Bytes&: BenchmarkResult.AssembledSnippet))
596 return std::move(Err);
597 }
598
599 // Assemble enough repetitions of the snippet so we have at least
600 // MinInstructions instructions.
601 if (BenchmarkPhaseSelector >
602 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
603 auto Snippet =
604 assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
605 LoopBodySize, GenerateMemoryInstructions);
606 if (Error E = Snippet.takeError())
607 return std::move(E);
608 RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
609 }
610
611 return std::move(RC);
612}
613
614Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
615BenchmarkRunner::createFunctionExecutor(
616 object::OwningBinary<object::ObjectFile> ObjectFile,
617 const BenchmarkKey &Key) const {
618 switch (ExecutionMode) {
619 case ExecutionModeE::InProcess: {
620 auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
621 State, Obj: std::move(ObjectFile), Scratch: Scratch.get());
622 if (!InProcessExecutorOrErr)
623 return InProcessExecutorOrErr.takeError();
624
625 return std::move(*InProcessExecutorOrErr);
626 }
627 case ExecutionModeE::SubProcess: {
628#ifdef __linux__
629 auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
630 State, Obj: std::move(ObjectFile), Key);
631 if (!SubProcessExecutorOrErr)
632 return SubProcessExecutorOrErr.takeError();
633
634 return std::move(*SubProcessExecutorOrErr);
635#else
636 return make_error<Failure>(
637 "The subprocess execution mode is only supported on Linux");
638#endif
639 }
640 }
641 llvm_unreachable("ExecutionMode is outside expected range");
642}
643
644std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
645 RunnableConfiguration &&RC,
646 const std::optional<StringRef> &DumpFile) const {
647 Benchmark &BenchmarkResult = RC.BenchmarkResult;
648 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
649
650 if (DumpFile && BenchmarkPhaseSelector >
651 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
652 auto ObjectFilePath =
653 writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
654 if (Error E = ObjectFilePath.takeError()) {
655 return {std::move(E), std::move(BenchmarkResult)};
656 }
657 outs() << "Check generated assembly with: /usr/bin/objdump -d "
658 << *ObjectFilePath << "\n";
659 }
660
661 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
662 BenchmarkResult.Error = "actual measurements skipped.";
663 return {Error::success(), std::move(BenchmarkResult)};
664 }
665
666 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
667 createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key);
668 if (!Executor)
669 return {Executor.takeError(), std::move(BenchmarkResult)};
670 auto NewMeasurements = runMeasurements(Executor: **Executor);
671
672 if (Error E = NewMeasurements.takeError()) {
673 return {std::move(E), std::move(BenchmarkResult)};
674 }
675 assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
676 for (BenchmarkMeasure &BM : *NewMeasurements) {
677 // Scale the measurements by the number of instructions.
678 BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
679 // Scale the measurements by the number of times the entire snippet is
680 // repeated.
681 BM.PerSnippetValue /=
682 std::ceil(x: BenchmarkResult.MinInstructions /
683 static_cast<double>(BenchmarkResult.Key.Instructions.size()));
684 }
685 BenchmarkResult.Measurements = std::move(*NewMeasurements);
686
687 return {Error::success(), std::move(BenchmarkResult)};
688}
689
690Expected<std::string>
691BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
692 int ResultFD = 0;
693 SmallString<256> ResultPath = FileName;
694 if (Error E = errorCodeToError(
695 EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
696 ResultFD, ResultPath)
697 : sys::fs::openFileForReadWrite(
698 Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
699 Flags: sys::fs::OF_None)))
700 return std::move(E);
701 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
702 OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
703 OFS.flush();
704 return std::string(ResultPath);
705}
706
707static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
708 const ValidationEvent RHS) {
709 return static_cast<int>(LHS.first) < static_cast<int>(RHS);
710}
711
712Error BenchmarkRunner::getValidationCountersToRun(
713 SmallVector<const char *> &ValCountersToRun) const {
714 const PfmCountersInfo &PCI = State.getPfmCounters();
715 ValCountersToRun.reserve(N: ValidationCounters.size());
716
717 ValCountersToRun.reserve(N: ValidationCounters.size());
718 ArrayRef TargetValidationEvents(PCI.ValidationEvents,
719 PCI.NumValidationEvents);
720 for (const ValidationEvent RequestedValEvent : ValidationCounters) {
721 auto ValCounterIt =
722 lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
723 if (ValCounterIt == TargetValidationEvents.end() ||
724 ValCounterIt->first != RequestedValEvent)
725 return make_error<Failure>(Args: "Cannot create validation counter");
726
727 assert(ValCounterIt->first == RequestedValEvent &&
728 "The array of validation events from the target should be sorted");
729 ValCountersToRun.push_back(Elt: ValCounterIt->second);
730 }
731
732 return Error::success();
733}
734
735BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
736
737} // namespace exegesis
738} // namespace llvm
739