1 | //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include <cmath> |
10 | #include <memory> |
11 | #include <string> |
12 | |
13 | #include "Assembler.h" |
14 | #include "BenchmarkRunner.h" |
15 | #include "Error.h" |
16 | #include "MCInstrDescView.h" |
17 | #include "MmapUtils.h" |
18 | #include "PerfHelper.h" |
19 | #include "SubprocessMemory.h" |
20 | #include "Target.h" |
21 | #include "llvm/ADT/ScopeExit.h" |
22 | #include "llvm/ADT/StringExtras.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/ADT/Twine.h" |
25 | #include "llvm/Support/CrashRecoveryContext.h" |
26 | #include "llvm/Support/Error.h" |
27 | #include "llvm/Support/FileSystem.h" |
28 | #include "llvm/Support/MemoryBuffer.h" |
29 | #include "llvm/Support/Program.h" |
30 | #include "llvm/Support/Signals.h" |
31 | #include "llvm/Support/SystemZ/zOSSupport.h" |
32 | |
33 | #ifdef __linux__ |
34 | #ifdef HAVE_LIBPFM |
35 | #include <perfmon/perf_event.h> |
36 | #endif |
37 | #include <sys/mman.h> |
38 | #include <sys/ptrace.h> |
39 | #include <sys/resource.h> |
40 | #include <sys/socket.h> |
41 | #include <sys/syscall.h> |
42 | #include <sys/wait.h> |
43 | #include <unistd.h> |
44 | |
45 | #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER) |
46 | #include <sys/rseq.h> |
47 | #if defined(RSEQ_SIG) && defined(SYS_rseq) |
48 | #define GLIBC_INITS_RSEQ |
49 | #endif |
50 | #endif |
51 | #endif // __linux__ |
52 | |
53 | namespace llvm { |
54 | namespace exegesis { |
55 | |
56 | BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode, |
57 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
58 | ExecutionModeE ExecutionMode, |
59 | ArrayRef<ValidationEvent> ValCounters) |
60 | : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector), |
61 | ExecutionMode(ExecutionMode), ValidationCounters(ValCounters), |
62 | Scratch(std::make_unique<ScratchSpace>()) {} |
63 | |
64 | BenchmarkRunner::~BenchmarkRunner() = default; |
65 | |
66 | void BenchmarkRunner::FunctionExecutor::accumulateCounterValues( |
67 | const SmallVectorImpl<int64_t> &NewValues, |
68 | SmallVectorImpl<int64_t> *Result) { |
69 | const size_t NumValues = std::max(a: NewValues.size(), b: Result->size()); |
70 | if (NumValues > Result->size()) |
71 | Result->resize(N: NumValues, NV: 0); |
72 | for (size_t I = 0, End = NewValues.size(); I < End; ++I) |
73 | (*Result)[I] += NewValues[I]; |
74 | } |
75 | |
76 | Expected<SmallVector<int64_t, 4>> |
77 | BenchmarkRunner::FunctionExecutor::runAndSample( |
78 | const char *Counters, ArrayRef<const char *> ValidationCounters, |
79 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { |
80 | // We sum counts when there are several counters for a single ProcRes |
81 | // (e.g. P23 on SandyBridge). |
82 | SmallVector<int64_t, 4> CounterValues; |
83 | SmallVector<StringRef, 2> CounterNames; |
84 | StringRef(Counters).split(A&: CounterNames, Separator: '+'); |
85 | for (auto &CounterName : CounterNames) { |
86 | CounterName = CounterName.trim(); |
87 | Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter( |
88 | CounterName, ValidationCounters, ValidationCounterValues); |
89 | if (!ValueOrError) |
90 | return ValueOrError.takeError(); |
91 | accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues); |
92 | } |
93 | return CounterValues; |
94 | } |
95 | |
96 | namespace { |
97 | class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { |
98 | public: |
99 | static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>> |
100 | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, |
101 | BenchmarkRunner::ScratchSpace *Scratch) { |
102 | Expected<ExecutableFunction> EF = |
103 | ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj)); |
104 | |
105 | if (!EF) |
106 | return EF.takeError(); |
107 | |
108 | return std::unique_ptr<InProcessFunctionExecutorImpl>( |
109 | new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); |
110 | } |
111 | |
112 | private: |
113 | InProcessFunctionExecutorImpl(const LLVMState &State, |
114 | ExecutableFunction Function, |
115 | BenchmarkRunner::ScratchSpace *Scratch) |
116 | : State(State), Function(std::move(Function)), Scratch(Scratch) {} |
117 | |
118 | static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues, |
119 | SmallVector<int64_t, 4> *Result) { |
120 | const size_t NumValues = std::max(a: NewValues.size(), b: Result->size()); |
121 | if (NumValues > Result->size()) |
122 | Result->resize(N: NumValues, NV: 0); |
123 | for (size_t I = 0, End = NewValues.size(); I < End; ++I) |
124 | (*Result)[I] += NewValues[I]; |
125 | } |
126 | |
127 | Expected<SmallVector<int64_t, 4>> runWithCounter( |
128 | StringRef CounterName, ArrayRef<const char *> ValidationCounters, |
129 | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { |
130 | const ExegesisTarget &ET = State.getExegesisTarget(); |
131 | char *const ScratchPtr = Scratch->ptr(); |
132 | auto CounterOrError = |
133 | ET.createCounter(CounterName, State, ValidationCounters); |
134 | |
135 | if (!CounterOrError) |
136 | return CounterOrError.takeError(); |
137 | |
138 | pfm::CounterGroup *Counter = CounterOrError.get().get(); |
139 | Scratch->clear(); |
140 | { |
141 | auto PS = ET.withSavedState(); |
142 | CrashRecoveryContext CRC; |
143 | CrashRecoveryContext::Enable(); |
144 | const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() { |
145 | Counter->start(); |
146 | this->Function(ScratchPtr); |
147 | Counter->stop(); |
148 | }); |
149 | CrashRecoveryContext::Disable(); |
150 | PS.reset(); |
151 | if (Crashed) { |
152 | #ifdef LLVM_ON_UNIX |
153 | // See "Exit Status for Commands": |
154 | // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html |
155 | constexpr const int kSigOffset = 128; |
156 | return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset); |
157 | #else |
158 | // The exit code of the process on windows is not meaningful as a |
159 | // signal, so simply pass in -1 as the signal into the error. |
160 | return make_error<SnippetSignal>(-1); |
161 | #endif // LLVM_ON_UNIX |
162 | } |
163 | } |
164 | |
165 | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); |
166 | if (!ValidationValuesOrErr) |
167 | return ValidationValuesOrErr.takeError(); |
168 | |
169 | ArrayRef RealValidationValues = *ValidationValuesOrErr; |
170 | for (size_t I = 0; I < RealValidationValues.size(); ++I) |
171 | ValidationCounterValues[I] = RealValidationValues[I]; |
172 | |
173 | return Counter->readOrError(FunctionBytes: Function.getFunctionBytes()); |
174 | } |
175 | |
176 | const LLVMState &State; |
177 | const ExecutableFunction Function; |
178 | BenchmarkRunner::ScratchSpace *const Scratch; |
179 | }; |
180 | |
181 | #ifdef __linux__ |
182 | // The following class implements a function executor that executes the |
183 | // benchmark code within a subprocess rather than within the main llvm-exegesis |
184 | // process. This allows for much more control over the execution context of the |
185 | // snippet, particularly with regard to memory. This class performs all the |
186 | // necessary functions to create the subprocess, execute the snippet in the |
187 | // subprocess, and report results/handle errors. |
188 | class SubProcessFunctionExecutorImpl |
189 | : public BenchmarkRunner::FunctionExecutor { |
190 | public: |
191 | static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>> |
192 | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, |
193 | const BenchmarkKey &Key) { |
194 | Expected<ExecutableFunction> EF = |
195 | ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj)); |
196 | if (!EF) |
197 | return EF.takeError(); |
198 | |
199 | return std::unique_ptr<SubProcessFunctionExecutorImpl>( |
200 | new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key)); |
201 | } |
202 | |
203 | private: |
204 | SubProcessFunctionExecutorImpl(const LLVMState &State, |
205 | ExecutableFunction Function, |
206 | const BenchmarkKey &Key) |
207 | : State(State), Function(std::move(Function)), Key(Key) {} |
208 | |
209 | enum ChildProcessExitCodeE { |
210 | CounterFDReadFailed = 1, |
211 | RSeqDisableFailed, |
212 | FunctionDataMappingFailed, |
213 | AuxiliaryMemorySetupFailed |
214 | }; |
215 | |
216 | StringRef childProcessExitCodeToString(int ExitCode) const { |
217 | switch (ExitCode) { |
218 | case ChildProcessExitCodeE::CounterFDReadFailed: |
219 | return "Counter file descriptor read failed" ; |
220 | case ChildProcessExitCodeE::RSeqDisableFailed: |
221 | return "Disabling restartable sequences failed" ; |
222 | case ChildProcessExitCodeE::FunctionDataMappingFailed: |
223 | return "Failed to map memory for assembled snippet" ; |
224 | case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: |
225 | return "Failed to setup auxiliary memory" ; |
226 | default: |
227 | return "Child process returned with unknown exit code" ; |
228 | } |
229 | } |
230 | |
231 | Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const { |
232 | struct msghdr Message = {}; |
233 | char Buffer[CMSG_SPACE(sizeof(FD))]; |
234 | memset(s: Buffer, c: 0, n: sizeof(Buffer)); |
235 | Message.msg_control = Buffer; |
236 | Message.msg_controllen = sizeof(Buffer); |
237 | |
238 | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); |
239 | ControlMessage->cmsg_level = SOL_SOCKET; |
240 | ControlMessage->cmsg_type = SCM_RIGHTS; |
241 | ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD)); |
242 | |
243 | memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD)); |
244 | |
245 | Message.msg_controllen = CMSG_SPACE(sizeof(FD)); |
246 | |
247 | ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: 0); |
248 | |
249 | if (BytesWritten < 0) |
250 | return make_error<Failure>(Args: "Failed to write FD to socket: " + |
251 | Twine(strerror(errno))); |
252 | |
253 | return Error::success(); |
254 | } |
255 | |
256 | Expected<int> getFileDescriptorFromSocket(int SocketFD) const { |
257 | struct msghdr Message = {}; |
258 | |
259 | char ControlBuffer[256]; |
260 | Message.msg_control = ControlBuffer; |
261 | Message.msg_controllen = sizeof(ControlBuffer); |
262 | |
263 | ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: 0); |
264 | |
265 | if (BytesRead < 0) |
266 | return make_error<Failure>(Args: "Failed to read FD from socket: " + |
267 | Twine(strerror(errno))); |
268 | |
269 | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); |
270 | |
271 | int FD; |
272 | |
273 | if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD))) |
274 | return make_error<Failure>(Args: "Failed to get correct number of bytes for " |
275 | "file descriptor from socket." ); |
276 | |
277 | memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD)); |
278 | |
279 | return FD; |
280 | } |
281 | |
282 | Error |
283 | runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName, |
284 | SmallVectorImpl<int64_t> &CounterValues, |
285 | ArrayRef<const char *> ValidationCounters, |
286 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { |
287 | auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); }); |
288 | const ExegesisTarget &ET = State.getExegesisTarget(); |
289 | auto CounterOrError = |
290 | ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID); |
291 | |
292 | if (!CounterOrError) |
293 | return CounterOrError.takeError(); |
294 | |
295 | pfm::CounterGroup *Counter = CounterOrError.get().get(); |
296 | |
297 | // Make sure to attach to the process (and wait for the sigstop to be |
298 | // delivered and for the process to continue) before we write to the counter |
299 | // file descriptor. Attaching to the process before writing to the socket |
300 | // ensures that the subprocess at most has blocked on the read call. If we |
301 | // attach afterwards, the subprocess might exit before we get to the attach |
302 | // call due to effects like scheduler contention, introducing transient |
303 | // failures. |
304 | if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != 0) |
305 | return make_error<Failure>(Args: "Failed to attach to the child process: " + |
306 | Twine(strerror(errno))); |
307 | |
308 | if (waitpid(pid: ChildPID, NULL, options: 0) == -1) { |
309 | return make_error<Failure>( |
310 | Args: "Failed to wait for child process to stop after attaching: " + |
311 | Twine(strerror(errno))); |
312 | } |
313 | |
314 | if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != 0) |
315 | return make_error<Failure>( |
316 | Args: "Failed to continue execution of the child process: " + |
317 | Twine(strerror(errno))); |
318 | |
319 | int CounterFileDescriptor = Counter->getFileDescriptor(); |
320 | Error SendError = |
321 | sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor); |
322 | |
323 | if (SendError) |
324 | return SendError; |
325 | |
326 | int ChildStatus; |
327 | if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: 0) == -1) { |
328 | return make_error<Failure>( |
329 | Args: "Waiting for the child process to complete failed: " + |
330 | Twine(strerror(errno))); |
331 | } |
332 | |
333 | if (WIFEXITED(ChildStatus)) { |
334 | int ChildExitCode = WEXITSTATUS(ChildStatus); |
335 | if (ChildExitCode == 0) { |
336 | // The child exited succesfully, read counter values and return |
337 | // success. |
338 | auto CounterValueOrErr = Counter->readOrError(); |
339 | if (!CounterValueOrErr) |
340 | return CounterValueOrErr.takeError(); |
341 | CounterValues = std::move(*CounterValueOrErr); |
342 | |
343 | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); |
344 | if (!ValidationValuesOrErr) |
345 | return ValidationValuesOrErr.takeError(); |
346 | |
347 | ArrayRef RealValidationValues = *ValidationValuesOrErr; |
348 | for (size_t I = 0; I < RealValidationValues.size(); ++I) |
349 | ValidationCounterValues[I] = RealValidationValues[I]; |
350 | |
351 | return Error::success(); |
352 | } |
353 | // The child exited, but not successfully. |
354 | return make_error<Failure>( |
355 | Args: "Child benchmarking process exited with non-zero exit code: " + |
356 | childProcessExitCodeToString(ExitCode: ChildExitCode)); |
357 | } |
358 | |
359 | // An error was encountered running the snippet, process it |
360 | siginfo_t ChildSignalInfo; |
361 | if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) { |
362 | return make_error<Failure>(Args: "Getting signal info from the child failed: " + |
363 | Twine(strerror(errno))); |
364 | } |
365 | |
366 | // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM |
367 | // handlers to run, and calling SIGTERM would mean that ptrace will force |
368 | // it to block in the signal-delivery-stop for the SIGSEGV/other signals, |
369 | // and upon exit. |
370 | if (kill(pid: ChildPID, SIGKILL) == -1) |
371 | return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " + |
372 | Twine(strerror(errno))); |
373 | |
374 | // Wait for the process to exit so that there are no zombie processes left |
375 | // around. |
376 | if (waitpid(pid: ChildPID, NULL, options: 0) == -1) |
377 | return make_error<Failure>(Args: "Failed to wait for process to die: " + |
378 | Twine(strerror(errno))); |
379 | |
380 | if (ChildSignalInfo.si_signo == SIGSEGV) |
381 | return make_error<SnippetSegmentationFault>( |
382 | Args: reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr)); |
383 | |
384 | return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo); |
385 | } |
386 | |
387 | Error createSubProcessAndRunBenchmark( |
388 | StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues, |
389 | ArrayRef<const char *> ValidationCounters, |
390 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { |
391 | int PipeFiles[2]; |
392 | int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: 0, fds: PipeFiles); |
393 | if (PipeSuccessOrErr != 0) { |
394 | return make_error<Failure>( |
395 | Args: "Failed to create a pipe for interprocess communication between " |
396 | "llvm-exegesis and the benchmarking subprocess: " + |
397 | Twine(strerror(errno))); |
398 | } |
399 | |
400 | SubprocessMemory SPMemory; |
401 | Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid()); |
402 | if (MemoryInitError) |
403 | return MemoryInitError; |
404 | |
405 | Error AddMemDefError = |
406 | SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid()); |
407 | if (AddMemDefError) |
408 | return AddMemDefError; |
409 | |
410 | long ParentTID = SubprocessMemory::getCurrentTID(); |
411 | pid_t ParentOrChildPID = fork(); |
412 | |
413 | if (ParentOrChildPID == -1) { |
414 | return make_error<Failure>(Args: "Failed to create child process: " + |
415 | Twine(strerror(errno))); |
416 | } |
417 | |
418 | if (ParentOrChildPID == 0) { |
419 | // We are in the child process, close the write end of the pipe. |
420 | close(fd: PipeFiles[1]); |
421 | // Unregister handlers, signal handling is now handled through ptrace in |
422 | // the host process. |
423 | sys::unregisterHandlers(); |
424 | runChildSubprocess(Pipe: PipeFiles[0], Key, ParentTID); |
425 | // The child process terminates in the above function, so we should never |
426 | // get to this point. |
427 | llvm_unreachable("Child process didn't exit when expected." ); |
428 | } |
429 | |
430 | // Close the read end of the pipe as we only need to write to the subprocess |
431 | // from the parent process. |
432 | close(fd: PipeFiles[0]); |
433 | return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[1], CounterName, |
434 | CounterValues, ValidationCounters, |
435 | ValidationCounterValues); |
436 | } |
437 | |
438 | void disableCoreDumps() const { |
439 | struct rlimit rlim; |
440 | |
441 | rlim.rlim_cur = 0; |
442 | setrlimit(RLIMIT_CORE, rlimits: &rlim); |
443 | } |
444 | |
445 | [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key, |
446 | long ParentTID) const { |
447 | // Disable core dumps in the child process as otherwise everytime we |
448 | // encounter an execution failure like a segmentation fault, we will create |
449 | // a core dump. We report the information directly rather than require the |
450 | // user inspect a core dump. |
451 | disableCoreDumps(); |
452 | |
453 | // The following occurs within the benchmarking subprocess. |
454 | pid_t ParentPID = getppid(); |
455 | |
456 | Expected<int> CounterFileDescriptorOrError = |
457 | getFileDescriptorFromSocket(SocketFD: Pipe); |
458 | |
459 | if (!CounterFileDescriptorOrError) |
460 | exit(status: ChildProcessExitCodeE::CounterFDReadFailed); |
461 | |
462 | int CounterFileDescriptor = *CounterFileDescriptorOrError; |
463 | |
464 | // Glibc versions greater than 2.35 automatically call rseq during |
465 | // initialization. Unmapping the region that glibc sets up for this causes |
466 | // segfaults in the program. Unregister the rseq region so that we can safely |
467 | // unmap it later |
468 | #ifdef GLIBC_INITS_RSEQ |
469 | unsigned int RseqStructSize = __rseq_size; |
470 | |
471 | // Glibc v2.40 (the change is also expected to be backported to v2.35) |
472 | // changes the definition of __rseq_size to be the usable area of the struct |
473 | // rather than the actual size of the struct. v2.35 uses only 20 bytes of |
474 | // the 32 byte struct. For now, it should be safe to assume that if the |
475 | // usable size is less than 32, the actual size of the struct will be 32 |
476 | // bytes given alignment requirements. |
477 | if (__rseq_size < 32) |
478 | RseqStructSize = 32; |
479 | |
480 | long RseqDisableOutput = |
481 | syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset, |
482 | RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); |
483 | if (RseqDisableOutput != 0) |
484 | exit(status: ChildProcessExitCodeE::RSeqDisableFailed); |
485 | #endif // GLIBC_INITS_RSEQ |
486 | |
487 | // The frontend that generates the memory annotation structures should |
488 | // validate that the address to map the snippet in at is a multiple of |
489 | // the page size. Assert that this is true here. |
490 | assert(Key.SnippetAddress % getpagesize() == 0 && |
491 | "The snippet address needs to be aligned to a page boundary." ); |
492 | |
493 | size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); |
494 | void *MapAddress = NULL; |
495 | int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS; |
496 | |
497 | if (Key.SnippetAddress != 0) { |
498 | MapAddress = reinterpret_cast<void *>(Key.SnippetAddress); |
499 | MapFlags |= MAP_FIXED_NOREPLACE; |
500 | } |
501 | |
502 | char *FunctionDataCopy = |
503 | (char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ | PROT_WRITE, |
504 | flags: MapFlags, fd: 0, offset: 0); |
505 | if ((intptr_t)FunctionDataCopy == -1) |
506 | exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed); |
507 | |
508 | memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(), |
509 | n: this->Function.FunctionBytes.size()); |
510 | mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ | PROT_EXEC); |
511 | |
512 | Expected<int> AuxMemFDOrError = |
513 | SubprocessMemory::setupAuxiliaryMemoryInSubprocess( |
514 | MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor); |
515 | if (!AuxMemFDOrError) |
516 | exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); |
517 | |
518 | ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize, |
519 | *AuxMemFDOrError); |
520 | |
521 | exit(status: 0); |
522 | } |
523 | |
524 | Expected<SmallVector<int64_t, 4>> runWithCounter( |
525 | StringRef CounterName, ArrayRef<const char *> ValidationCounters, |
526 | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { |
527 | SmallVector<int64_t, 4> Value(1, 0); |
528 | Error PossibleBenchmarkError = createSubProcessAndRunBenchmark( |
529 | CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues); |
530 | |
531 | if (PossibleBenchmarkError) |
532 | return std::move(PossibleBenchmarkError); |
533 | |
534 | return Value; |
535 | } |
536 | |
537 | const LLVMState &State; |
538 | const ExecutableFunction Function; |
539 | const BenchmarkKey &Key; |
540 | }; |
541 | #endif // __linux__ |
542 | } // namespace |
543 | |
544 | Expected<SmallString<0>> BenchmarkRunner::assembleSnippet( |
545 | const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, |
546 | unsigned MinInstructions, unsigned LoopBodySize, |
547 | bool GenerateMemoryInstructions) const { |
548 | const std::vector<MCInst> &Instructions = BC.Key.Instructions; |
549 | SmallString<0> Buffer; |
550 | raw_svector_ostream OS(Buffer); |
551 | if (Error E = assembleToStream( |
552 | ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns, |
553 | Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, |
554 | CleanupMemory: GenerateMemoryInstructions), |
555 | AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) { |
556 | return std::move(E); |
557 | } |
558 | return Buffer; |
559 | } |
560 | |
561 | Expected<BenchmarkRunner::RunnableConfiguration> |
562 | BenchmarkRunner::getRunnableConfiguration( |
563 | const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize, |
564 | const SnippetRepetitor &Repetitor) const { |
565 | RunnableConfiguration RC; |
566 | |
567 | Benchmark &BenchmarkResult = RC.BenchmarkResult; |
568 | BenchmarkResult.Mode = Mode; |
569 | BenchmarkResult.CpuName = |
570 | std::string(State.getTargetMachine().getTargetCPU()); |
571 | BenchmarkResult.LLVMTriple = |
572 | State.getTargetMachine().getTargetTriple().normalize(); |
573 | BenchmarkResult.MinInstructions = MinInstructions; |
574 | BenchmarkResult.Info = BC.Info; |
575 | |
576 | const std::vector<MCInst> &Instructions = BC.Key.Instructions; |
577 | |
578 | bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; |
579 | |
580 | BenchmarkResult.Key = BC.Key; |
581 | |
582 | // Assemble at least kMinInstructionsForSnippet instructions by repeating |
583 | // the snippet for debug/analysis. This is so that the user clearly |
584 | // understands that the inside instructions are repeated. |
585 | if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { |
586 | const int MinInstructionsForSnippet = 4 * Instructions.size(); |
587 | const int LoopBodySizeForSnippet = 2 * Instructions.size(); |
588 | auto Snippet = |
589 | assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet, |
590 | LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions); |
591 | if (Error E = Snippet.takeError()) |
592 | return std::move(E); |
593 | |
594 | if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet, |
595 | Bytes&: BenchmarkResult.AssembledSnippet)) |
596 | return std::move(Err); |
597 | } |
598 | |
599 | // Assemble enough repetitions of the snippet so we have at least |
600 | // MinInstructions instructions. |
601 | if (BenchmarkPhaseSelector > |
602 | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { |
603 | auto Snippet = |
604 | assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions, |
605 | LoopBodySize, GenerateMemoryInstructions); |
606 | if (Error E = Snippet.takeError()) |
607 | return std::move(E); |
608 | RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet); |
609 | } |
610 | |
611 | return std::move(RC); |
612 | } |
613 | |
614 | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> |
615 | BenchmarkRunner::createFunctionExecutor( |
616 | object::OwningBinary<object::ObjectFile> ObjectFile, |
617 | const BenchmarkKey &Key) const { |
618 | switch (ExecutionMode) { |
619 | case ExecutionModeE::InProcess: { |
620 | auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( |
621 | State, Obj: std::move(ObjectFile), Scratch: Scratch.get()); |
622 | if (!InProcessExecutorOrErr) |
623 | return InProcessExecutorOrErr.takeError(); |
624 | |
625 | return std::move(*InProcessExecutorOrErr); |
626 | } |
627 | case ExecutionModeE::SubProcess: { |
628 | #ifdef __linux__ |
629 | auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( |
630 | State, Obj: std::move(ObjectFile), Key); |
631 | if (!SubProcessExecutorOrErr) |
632 | return SubProcessExecutorOrErr.takeError(); |
633 | |
634 | return std::move(*SubProcessExecutorOrErr); |
635 | #else |
636 | return make_error<Failure>( |
637 | "The subprocess execution mode is only supported on Linux" ); |
638 | #endif |
639 | } |
640 | } |
641 | llvm_unreachable("ExecutionMode is outside expected range" ); |
642 | } |
643 | |
644 | std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration( |
645 | RunnableConfiguration &&RC, |
646 | const std::optional<StringRef> &DumpFile) const { |
647 | Benchmark &BenchmarkResult = RC.BenchmarkResult; |
648 | object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile; |
649 | |
650 | if (DumpFile && BenchmarkPhaseSelector > |
651 | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { |
652 | auto ObjectFilePath = |
653 | writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile); |
654 | if (Error E = ObjectFilePath.takeError()) { |
655 | return {std::move(E), std::move(BenchmarkResult)}; |
656 | } |
657 | outs() << "Check generated assembly with: /usr/bin/objdump -d " |
658 | << *ObjectFilePath << "\n" ; |
659 | } |
660 | |
661 | if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) { |
662 | BenchmarkResult.Error = "actual measurements skipped." ; |
663 | return {Error::success(), std::move(BenchmarkResult)}; |
664 | } |
665 | |
666 | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor = |
667 | createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key); |
668 | if (!Executor) |
669 | return {Executor.takeError(), std::move(BenchmarkResult)}; |
670 | auto NewMeasurements = runMeasurements(Executor: **Executor); |
671 | |
672 | if (Error E = NewMeasurements.takeError()) { |
673 | return {std::move(E), std::move(BenchmarkResult)}; |
674 | } |
675 | assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions" ); |
676 | for (BenchmarkMeasure &BM : *NewMeasurements) { |
677 | // Scale the measurements by the number of instructions. |
678 | BM.PerInstructionValue /= BenchmarkResult.MinInstructions; |
679 | // Scale the measurements by the number of times the entire snippet is |
680 | // repeated. |
681 | BM.PerSnippetValue /= |
682 | std::ceil(x: BenchmarkResult.MinInstructions / |
683 | static_cast<double>(BenchmarkResult.Key.Instructions.size())); |
684 | } |
685 | BenchmarkResult.Measurements = std::move(*NewMeasurements); |
686 | |
687 | return {Error::success(), std::move(BenchmarkResult)}; |
688 | } |
689 | |
690 | Expected<std::string> |
691 | BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const { |
692 | int ResultFD = 0; |
693 | SmallString<256> ResultPath = FileName; |
694 | if (Error E = errorCodeToError( |
695 | EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet" , Suffix: "o" , |
696 | ResultFD, ResultPath) |
697 | : sys::fs::openFileForReadWrite( |
698 | Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways, |
699 | Flags: sys::fs::OF_None))) |
700 | return std::move(E); |
701 | raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); |
702 | OFS.write(Ptr: Buffer.data(), Size: Buffer.size()); |
703 | OFS.flush(); |
704 | return std::string(ResultPath); |
705 | } |
706 | |
707 | static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS, |
708 | const ValidationEvent RHS) { |
709 | return static_cast<int>(LHS.first) < static_cast<int>(RHS); |
710 | } |
711 | |
712 | Error BenchmarkRunner::getValidationCountersToRun( |
713 | SmallVector<const char *> &ValCountersToRun) const { |
714 | const PfmCountersInfo &PCI = State.getPfmCounters(); |
715 | ValCountersToRun.reserve(N: ValidationCounters.size()); |
716 | |
717 | ValCountersToRun.reserve(N: ValidationCounters.size()); |
718 | ArrayRef TargetValidationEvents(PCI.ValidationEvents, |
719 | PCI.NumValidationEvents); |
720 | for (const ValidationEvent RequestedValEvent : ValidationCounters) { |
721 | auto ValCounterIt = |
722 | lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan); |
723 | if (ValCounterIt == TargetValidationEvents.end() || |
724 | ValCounterIt->first != RequestedValEvent) |
725 | return make_error<Failure>(Args: "Cannot create validation counter" ); |
726 | |
727 | assert(ValCounterIt->first == RequestedValEvent && |
728 | "The array of validation events from the target should be sorted" ); |
729 | ValCountersToRun.push_back(Elt: ValCounterIt->second); |
730 | } |
731 | |
732 | return Error::success(); |
733 | } |
734 | |
735 | BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} |
736 | |
737 | } // namespace exegesis |
738 | } // namespace llvm |
739 | |