| 1 | //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 |  | 
|---|
| 9 | #include "BenchmarkRunner.h" | 
|---|
| 10 | #include "Assembler.h" | 
|---|
| 11 | #include "Error.h" | 
|---|
| 12 | #include "MCInstrDescView.h" | 
|---|
| 13 | #include "MmapUtils.h" | 
|---|
| 14 | #include "PerfHelper.h" | 
|---|
| 15 | #include "SubprocessMemory.h" | 
|---|
| 16 | #include "Target.h" | 
|---|
| 17 | #include "llvm/ADT/ScopeExit.h" | 
|---|
| 18 | #include "llvm/ADT/StringExtras.h" | 
|---|
| 19 | #include "llvm/ADT/StringRef.h" | 
|---|
| 20 | #include "llvm/ADT/Twine.h" | 
|---|
| 21 | #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX | 
|---|
| 22 | #include "llvm/Support/CrashRecoveryContext.h" | 
|---|
| 23 | #include "llvm/Support/Error.h" | 
|---|
| 24 | #include "llvm/Support/FileSystem.h" | 
|---|
| 25 | #include "llvm/Support/MemoryBuffer.h" | 
|---|
| 26 | #include "llvm/Support/Program.h" | 
|---|
| 27 | #include "llvm/Support/Signals.h" | 
|---|
| 28 | #include "llvm/Support/SystemZ/zOSSupport.h" | 
|---|
| 29 | #include <cmath> | 
|---|
| 30 | #include <memory> | 
|---|
| 31 | #include <string> | 
|---|
| 32 |  | 
|---|
| 33 | #ifdef __linux__ | 
|---|
| 34 | #ifdef HAVE_LIBPFM | 
|---|
| 35 | #include <perfmon/perf_event.h> | 
|---|
| 36 | #endif | 
|---|
| 37 | #include <sys/mman.h> | 
|---|
| 38 | #include <sys/ptrace.h> | 
|---|
| 39 | #include <sys/resource.h> | 
|---|
| 40 | #include <sys/socket.h> | 
|---|
| 41 | #include <sys/syscall.h> | 
|---|
| 42 | #include <sys/wait.h> | 
|---|
| 43 | #include <unistd.h> | 
|---|
| 44 |  | 
|---|
| 45 | #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER) | 
|---|
| 46 | #include <sys/rseq.h> | 
|---|
| 47 | #if defined(RSEQ_SIG) && defined(SYS_rseq) | 
|---|
| 48 | #define GLIBC_INITS_RSEQ | 
|---|
| 49 | #endif | 
|---|
| 50 | #endif | 
|---|
| 51 | #endif // __linux__ | 
|---|
| 52 |  | 
|---|
| 53 | namespace llvm { | 
|---|
| 54 | namespace exegesis { | 
|---|
| 55 |  | 
|---|
| 56 | BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode, | 
|---|
| 57 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, | 
|---|
| 58 | ExecutionModeE ExecutionMode, | 
|---|
| 59 | ArrayRef<ValidationEvent> ValCounters) | 
|---|
| 60 | : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector), | 
|---|
| 61 | ExecutionMode(ExecutionMode), ValidationCounters(ValCounters), | 
|---|
| 62 | Scratch(std::make_unique<ScratchSpace>()) {} | 
|---|
| 63 |  | 
|---|
| 64 | BenchmarkRunner::~BenchmarkRunner() = default; | 
|---|
| 65 |  | 
|---|
| 66 | void BenchmarkRunner::FunctionExecutor::accumulateCounterValues( | 
|---|
| 67 | const SmallVectorImpl<int64_t> &NewValues, | 
|---|
| 68 | SmallVectorImpl<int64_t> *Result) { | 
|---|
| 69 | const size_t NumValues = std::max(a: NewValues.size(), b: Result->size()); | 
|---|
| 70 | if (NumValues > Result->size()) | 
|---|
| 71 | Result->resize(N: NumValues, NV: 0); | 
|---|
| 72 | for (size_t I = 0, End = NewValues.size(); I < End; ++I) | 
|---|
| 73 | (*Result)[I] += NewValues[I]; | 
|---|
| 74 | } | 
|---|
| 75 |  | 
|---|
| 76 | Expected<SmallVector<int64_t, 4>> | 
|---|
| 77 | BenchmarkRunner::FunctionExecutor::runAndSample( | 
|---|
| 78 | const char *Counters, ArrayRef<const char *> ValidationCounters, | 
|---|
| 79 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { | 
|---|
| 80 | // We sum counts when there are several counters for a single ProcRes | 
|---|
| 81 | // (e.g. P23 on SandyBridge). | 
|---|
| 82 | SmallVector<int64_t, 4> CounterValues; | 
|---|
| 83 | SmallVector<StringRef, 2> CounterNames; | 
|---|
| 84 | StringRef(Counters).split(A&: CounterNames, Separator: '+'); | 
|---|
| 85 | for (auto &CounterName : CounterNames) { | 
|---|
| 86 | CounterName = CounterName.trim(); | 
|---|
| 87 | Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter( | 
|---|
| 88 | CounterName, ValidationCounters, ValidationCounterValues); | 
|---|
| 89 | if (!ValueOrError) | 
|---|
| 90 | return ValueOrError.takeError(); | 
|---|
| 91 | accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues); | 
|---|
| 92 | } | 
|---|
| 93 | return CounterValues; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | namespace { | 
|---|
| 97 | class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { | 
|---|
| 98 | public: | 
|---|
| 99 | static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>> | 
|---|
| 100 | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, | 
|---|
| 101 | BenchmarkRunner::ScratchSpace *Scratch, | 
|---|
| 102 | std::optional<int> BenchmarkProcessCPU) { | 
|---|
| 103 | Expected<ExecutableFunction> EF = | 
|---|
| 104 | ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj)); | 
|---|
| 105 |  | 
|---|
| 106 | if (!EF) | 
|---|
| 107 | return EF.takeError(); | 
|---|
| 108 |  | 
|---|
| 109 | return std::unique_ptr<InProcessFunctionExecutorImpl>( | 
|---|
| 110 | new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | private: | 
|---|
| 114 | InProcessFunctionExecutorImpl(const LLVMState &State, | 
|---|
| 115 | ExecutableFunction Function, | 
|---|
| 116 | BenchmarkRunner::ScratchSpace *Scratch) | 
|---|
| 117 | : State(State), Function(std::move(Function)), Scratch(Scratch) {} | 
|---|
| 118 |  | 
|---|
| 119 | static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues, | 
|---|
| 120 | SmallVector<int64_t, 4> *Result) { | 
|---|
| 121 | const size_t NumValues = std::max(a: NewValues.size(), b: Result->size()); | 
|---|
| 122 | if (NumValues > Result->size()) | 
|---|
| 123 | Result->resize(N: NumValues, NV: 0); | 
|---|
| 124 | for (size_t I = 0, End = NewValues.size(); I < End; ++I) | 
|---|
| 125 | (*Result)[I] += NewValues[I]; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | Expected<SmallVector<int64_t, 4>> runWithCounter( | 
|---|
| 129 | StringRef CounterName, ArrayRef<const char *> ValidationCounters, | 
|---|
| 130 | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { | 
|---|
| 131 | const ExegesisTarget &ET = State.getExegesisTarget(); | 
|---|
| 132 | char *const ScratchPtr = Scratch->ptr(); | 
|---|
| 133 | auto CounterOrError = | 
|---|
| 134 | ET.createCounter(CounterName, State, ValidationCounters); | 
|---|
| 135 |  | 
|---|
| 136 | if (!CounterOrError) | 
|---|
| 137 | return CounterOrError.takeError(); | 
|---|
| 138 |  | 
|---|
| 139 | pfm::CounterGroup *Counter = CounterOrError.get().get(); | 
|---|
| 140 | Scratch->clear(); | 
|---|
| 141 | { | 
|---|
| 142 | auto PS = ET.withSavedState(); | 
|---|
| 143 | CrashRecoveryContext CRC; | 
|---|
| 144 | CrashRecoveryContext::Enable(); | 
|---|
| 145 | const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() { | 
|---|
| 146 | Counter->start(); | 
|---|
| 147 | this->Function(ScratchPtr); | 
|---|
| 148 | Counter->stop(); | 
|---|
| 149 | }); | 
|---|
| 150 | CrashRecoveryContext::Disable(); | 
|---|
| 151 | PS.reset(); | 
|---|
| 152 | if (Crashed) { | 
|---|
| 153 | #ifdef LLVM_ON_UNIX | 
|---|
| 154 | // See "Exit Status for Commands": | 
|---|
| 155 | // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html | 
|---|
| 156 | constexpr const int kSigOffset = 128; | 
|---|
| 157 | return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset); | 
|---|
| 158 | #else | 
|---|
| 159 | // The exit code of the process on windows is not meaningful as a | 
|---|
| 160 | // signal, so simply pass in -1 as the signal into the error. | 
|---|
| 161 | return make_error<SnippetSignal>(-1); | 
|---|
| 162 | #endif // LLVM_ON_UNIX | 
|---|
| 163 | } | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); | 
|---|
| 167 | if (!ValidationValuesOrErr) | 
|---|
| 168 | return ValidationValuesOrErr.takeError(); | 
|---|
| 169 |  | 
|---|
| 170 | ArrayRef RealValidationValues = *ValidationValuesOrErr; | 
|---|
| 171 | for (size_t I = 0; I < RealValidationValues.size(); ++I) | 
|---|
| 172 | ValidationCounterValues[I] = RealValidationValues[I]; | 
|---|
| 173 |  | 
|---|
| 174 | return Counter->readOrError(FunctionBytes: Function.getFunctionBytes()); | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | const LLVMState &State; | 
|---|
| 178 | const ExecutableFunction Function; | 
|---|
| 179 | BenchmarkRunner::ScratchSpace *const Scratch; | 
|---|
| 180 | }; | 
|---|
| 181 |  | 
|---|
| 182 | #ifdef __linux__ | 
|---|
| 183 | // The following class implements a function executor that executes the | 
|---|
| 184 | // benchmark code within a subprocess rather than within the main llvm-exegesis | 
|---|
| 185 | // process. This allows for much more control over the execution context of the | 
|---|
| 186 | // snippet, particularly with regard to memory. This class performs all the | 
|---|
| 187 | // necessary functions to create the subprocess, execute the snippet in the | 
|---|
| 188 | // subprocess, and report results/handle errors. | 
|---|
| 189 | class SubProcessFunctionExecutorImpl | 
|---|
| 190 | : public BenchmarkRunner::FunctionExecutor { | 
|---|
| 191 | public: | 
|---|
| 192 | static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>> | 
|---|
| 193 | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, | 
|---|
| 194 | const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) { | 
|---|
| 195 | Expected<ExecutableFunction> EF = | 
|---|
| 196 | ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj)); | 
|---|
| 197 | if (!EF) | 
|---|
| 198 | return EF.takeError(); | 
|---|
| 199 |  | 
|---|
| 200 | return std::unique_ptr<SubProcessFunctionExecutorImpl>( | 
|---|
| 201 | new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key, | 
|---|
| 202 | BenchmarkProcessCPU)); | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | private: | 
|---|
| 206 | SubProcessFunctionExecutorImpl(const LLVMState &State, | 
|---|
| 207 | ExecutableFunction Function, | 
|---|
| 208 | const BenchmarkKey &Key, | 
|---|
| 209 | std::optional<int> BenchmarkCPU) | 
|---|
| 210 | : State(State), Function(std::move(Function)), Key(Key), | 
|---|
| 211 | BenchmarkProcessCPU(BenchmarkCPU) {} | 
|---|
| 212 |  | 
|---|
| 213 | enum ChildProcessExitCodeE { | 
|---|
| 214 | CounterFDReadFailed = 1, | 
|---|
| 215 | RSeqDisableFailed, | 
|---|
| 216 | FunctionDataMappingFailed, | 
|---|
| 217 | AuxiliaryMemorySetupFailed, | 
|---|
| 218 | SetCPUAffinityFailed | 
|---|
| 219 | }; | 
|---|
| 220 |  | 
|---|
| 221 | StringRef childProcessExitCodeToString(int ExitCode) const { | 
|---|
| 222 | switch (ExitCode) { | 
|---|
| 223 | case ChildProcessExitCodeE::CounterFDReadFailed: | 
|---|
| 224 | return "Counter file descriptor read failed"; | 
|---|
| 225 | case ChildProcessExitCodeE::RSeqDisableFailed: | 
|---|
| 226 | return "Disabling restartable sequences failed"; | 
|---|
| 227 | case ChildProcessExitCodeE::FunctionDataMappingFailed: | 
|---|
| 228 | return "Failed to map memory for assembled snippet"; | 
|---|
| 229 | case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: | 
|---|
| 230 | return "Failed to setup auxiliary memory"; | 
|---|
| 231 | case ChildProcessExitCodeE::SetCPUAffinityFailed: | 
|---|
| 232 | return "Failed to set CPU affinity of the benchmarking process"; | 
|---|
| 233 | default: | 
|---|
| 234 | return "Child process returned with unknown exit code"; | 
|---|
| 235 | } | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const { | 
|---|
| 239 | struct msghdr Message = {}; | 
|---|
| 240 | char Buffer[CMSG_SPACE(sizeof(FD))]; | 
|---|
| 241 | memset(s: Buffer, c: 0, n: sizeof(Buffer)); | 
|---|
| 242 | Message.msg_control = Buffer; | 
|---|
| 243 | Message.msg_controllen = sizeof(Buffer); | 
|---|
| 244 |  | 
|---|
| 245 | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); | 
|---|
| 246 | ControlMessage->cmsg_level = SOL_SOCKET; | 
|---|
| 247 | ControlMessage->cmsg_type = SCM_RIGHTS; | 
|---|
| 248 | ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD)); | 
|---|
| 249 |  | 
|---|
| 250 | memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD)); | 
|---|
| 251 |  | 
|---|
| 252 | Message.msg_controllen = CMSG_SPACE(sizeof(FD)); | 
|---|
| 253 |  | 
|---|
| 254 | ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: 0); | 
|---|
| 255 |  | 
|---|
| 256 | if (BytesWritten < 0) | 
|---|
| 257 | return make_error<Failure>(Args: "Failed to write FD to socket: "+ | 
|---|
| 258 | Twine(strerror(errno))); | 
|---|
| 259 |  | 
|---|
| 260 | return Error::success(); | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | Expected<int> getFileDescriptorFromSocket(int SocketFD) const { | 
|---|
| 264 | struct msghdr Message = {}; | 
|---|
| 265 |  | 
|---|
| 266 | char ControlBuffer[256]; | 
|---|
| 267 | Message.msg_control = ControlBuffer; | 
|---|
| 268 | Message.msg_controllen = sizeof(ControlBuffer); | 
|---|
| 269 |  | 
|---|
| 270 | ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: 0); | 
|---|
| 271 |  | 
|---|
| 272 | if (BytesRead < 0) | 
|---|
| 273 | return make_error<Failure>(Args: "Failed to read FD from socket: "+ | 
|---|
| 274 | Twine(strerror(errno))); | 
|---|
| 275 |  | 
|---|
| 276 | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); | 
|---|
| 277 |  | 
|---|
| 278 | int FD; | 
|---|
| 279 |  | 
|---|
| 280 | if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD))) | 
|---|
| 281 | return make_error<Failure>(Args: "Failed to get correct number of bytes for " | 
|---|
| 282 | "file descriptor from socket."); | 
|---|
| 283 |  | 
|---|
| 284 | memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD)); | 
|---|
| 285 |  | 
|---|
| 286 | return FD; | 
|---|
| 287 | } | 
|---|
| 288 |  | 
|---|
| 289 | Error | 
|---|
| 290 | runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName, | 
|---|
| 291 | SmallVectorImpl<int64_t> &CounterValues, | 
|---|
| 292 | ArrayRef<const char *> ValidationCounters, | 
|---|
| 293 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { | 
|---|
| 294 | auto WriteFDClose = make_scope_exit(F: [WriteFD]() { close(fd: WriteFD); }); | 
|---|
| 295 | const ExegesisTarget &ET = State.getExegesisTarget(); | 
|---|
| 296 | auto CounterOrError = | 
|---|
| 297 | ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID); | 
|---|
| 298 |  | 
|---|
| 299 | if (!CounterOrError) | 
|---|
| 300 | return CounterOrError.takeError(); | 
|---|
| 301 |  | 
|---|
| 302 | pfm::CounterGroup *Counter = CounterOrError.get().get(); | 
|---|
| 303 |  | 
|---|
| 304 | // Make sure to attach to the process (and wait for the sigstop to be | 
|---|
| 305 | // delivered and for the process to continue) before we write to the counter | 
|---|
| 306 | // file descriptor. Attaching to the process before writing to the socket | 
|---|
| 307 | // ensures that the subprocess at most has blocked on the read call. If we | 
|---|
| 308 | // attach afterwards, the subprocess might exit before we get to the attach | 
|---|
| 309 | // call due to effects like scheduler contention, introducing transient | 
|---|
| 310 | // failures. | 
|---|
| 311 | if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != 0) | 
|---|
| 312 | return make_error<Failure>(Args: "Failed to attach to the child process: "+ | 
|---|
| 313 | Twine(strerror(errno))); | 
|---|
| 314 |  | 
|---|
| 315 | if (waitpid(pid: ChildPID, NULL, options: 0) == -1) { | 
|---|
| 316 | return make_error<Failure>( | 
|---|
| 317 | Args: "Failed to wait for child process to stop after attaching: "+ | 
|---|
| 318 | Twine(strerror(errno))); | 
|---|
| 319 | } | 
|---|
| 320 |  | 
|---|
| 321 | if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != 0) | 
|---|
| 322 | return make_error<Failure>( | 
|---|
| 323 | Args: "Failed to continue execution of the child process: "+ | 
|---|
| 324 | Twine(strerror(errno))); | 
|---|
| 325 |  | 
|---|
| 326 | int CounterFileDescriptor = Counter->getFileDescriptor(); | 
|---|
| 327 | Error SendError = | 
|---|
| 328 | sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor); | 
|---|
| 329 |  | 
|---|
| 330 | if (SendError) | 
|---|
| 331 | return SendError; | 
|---|
| 332 |  | 
|---|
| 333 | int ChildStatus; | 
|---|
| 334 | if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: 0) == -1) { | 
|---|
| 335 | return make_error<Failure>( | 
|---|
| 336 | Args: "Waiting for the child process to complete failed: "+ | 
|---|
| 337 | Twine(strerror(errno))); | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | if (WIFEXITED(ChildStatus)) { | 
|---|
| 341 | int ChildExitCode = WEXITSTATUS(ChildStatus); | 
|---|
| 342 | if (ChildExitCode == 0) { | 
|---|
| 343 | // The child exited succesfully, read counter values and return | 
|---|
| 344 | // success. | 
|---|
| 345 | auto CounterValueOrErr = Counter->readOrError(); | 
|---|
| 346 | if (!CounterValueOrErr) | 
|---|
| 347 | return CounterValueOrErr.takeError(); | 
|---|
| 348 | CounterValues = std::move(*CounterValueOrErr); | 
|---|
| 349 |  | 
|---|
| 350 | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); | 
|---|
| 351 | if (!ValidationValuesOrErr) | 
|---|
| 352 | return ValidationValuesOrErr.takeError(); | 
|---|
| 353 |  | 
|---|
| 354 | ArrayRef RealValidationValues = *ValidationValuesOrErr; | 
|---|
| 355 | for (size_t I = 0; I < RealValidationValues.size(); ++I) | 
|---|
| 356 | ValidationCounterValues[I] = RealValidationValues[I]; | 
|---|
| 357 |  | 
|---|
| 358 | return Error::success(); | 
|---|
| 359 | } | 
|---|
| 360 | // The child exited, but not successfully. | 
|---|
| 361 | return make_error<Failure>( | 
|---|
| 362 | Args: "Child benchmarking process exited with non-zero exit code: "+ | 
|---|
| 363 | childProcessExitCodeToString(ExitCode: ChildExitCode)); | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | // An error was encountered running the snippet, process it | 
|---|
| 367 | siginfo_t ChildSignalInfo; | 
|---|
| 368 | if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) { | 
|---|
| 369 | return make_error<Failure>(Args: "Getting signal info from the child failed: "+ | 
|---|
| 370 | Twine(strerror(errno))); | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|
| 373 | // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM | 
|---|
| 374 | // handlers to run, and calling SIGTERM would mean that ptrace will force | 
|---|
| 375 | // it to block in the signal-delivery-stop for the SIGSEGV/other signals, | 
|---|
| 376 | // and upon exit. | 
|---|
| 377 | if (kill(pid: ChildPID, SIGKILL) == -1) | 
|---|
| 378 | return make_error<Failure>(Args: "Failed to kill child benchmarking proces: "+ | 
|---|
| 379 | Twine(strerror(errno))); | 
|---|
| 380 |  | 
|---|
| 381 | // Wait for the process to exit so that there are no zombie processes left | 
|---|
| 382 | // around. | 
|---|
| 383 | if (waitpid(pid: ChildPID, NULL, options: 0) == -1) | 
|---|
| 384 | return make_error<Failure>(Args: "Failed to wait for process to die: "+ | 
|---|
| 385 | Twine(strerror(errno))); | 
|---|
| 386 |  | 
|---|
| 387 | if (ChildSignalInfo.si_signo == SIGSEGV) | 
|---|
| 388 | return make_error<SnippetSegmentationFault>( | 
|---|
| 389 | Args: reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr)); | 
|---|
| 390 |  | 
|---|
| 391 | return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo); | 
|---|
| 392 | } | 
|---|
| 393 |  | 
|---|
| 394 | static void setCPUAffinityIfRequested(int CPUToUse) { | 
|---|
| 395 | // Special case this function for x86_64 for now as certain more esoteric | 
|---|
| 396 | // platforms have different definitions for some of the libc functions that | 
|---|
| 397 | // cause buildtime failures. Additionally, the subprocess executor mode (the | 
|---|
| 398 | // sole mode where this is supported) currently only supports x86_64. | 
|---|
| 399 |  | 
|---|
| 400 | // Also check that we have the SYS_getcpu macro defined, meaning the syscall | 
|---|
| 401 | // actually exists within the build environment. We manually use the syscall | 
|---|
| 402 | // rather than the libc wrapper given the wrapper for getcpu is only available | 
|---|
| 403 | // in glibc 2.29 and later. | 
|---|
| 404 | #if defined(__x86_64__) && defined(SYS_getcpu) | 
|---|
| 405 | // Set the CPU affinity for the child process, so that we ensure that if | 
|---|
| 406 | // the user specified a CPU the process should run on, the benchmarking | 
|---|
| 407 | // process is running on that CPU. | 
|---|
| 408 | cpu_set_t CPUMask; | 
|---|
| 409 | CPU_ZERO(&CPUMask); | 
|---|
| 410 | CPU_SET(CPUToUse, &CPUMask); | 
|---|
| 411 | // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they | 
|---|
| 412 | // are available. | 
|---|
| 413 | int SetAffinityReturn = sched_setaffinity(pid: 0, cpusetsize: sizeof(CPUMask), cpuset: &CPUMask); | 
|---|
| 414 | if (SetAffinityReturn == -1) { | 
|---|
| 415 | exit(status: ChildProcessExitCodeE::SetCPUAffinityFailed); | 
|---|
| 416 | } | 
|---|
| 417 |  | 
|---|
| 418 | // Check (if assertions are enabled) that we are actually running on the | 
|---|
| 419 | // CPU that was specified by the user. | 
|---|
| 420 | [[maybe_unused]] unsigned int CurrentCPU; | 
|---|
| 421 | assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 && | 
|---|
| 422 | "Expected getcpu call to succeed."); | 
|---|
| 423 | assert(static_cast<int>(CurrentCPU) == CPUToUse && | 
|---|
| 424 | "Expected current CPU to equal the CPU requested by the user"); | 
|---|
| 425 | #else | 
|---|
| 426 | exit(ChildProcessExitCodeE::SetCPUAffinityFailed); | 
|---|
| 427 | #endif // defined(__x86_64__) && defined(SYS_getcpu) | 
|---|
| 428 | } | 
|---|
| 429 |  | 
|---|
| 430 | Error createSubProcessAndRunBenchmark( | 
|---|
| 431 | StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues, | 
|---|
| 432 | ArrayRef<const char *> ValidationCounters, | 
|---|
| 433 | SmallVectorImpl<int64_t> &ValidationCounterValues) const { | 
|---|
| 434 | int PipeFiles[2]; | 
|---|
| 435 | int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: 0, fds: PipeFiles); | 
|---|
| 436 | if (PipeSuccessOrErr != 0) { | 
|---|
| 437 | return make_error<Failure>( | 
|---|
| 438 | Args: "Failed to create a pipe for interprocess communication between " | 
|---|
| 439 | "llvm-exegesis and the benchmarking subprocess: "+ | 
|---|
| 440 | Twine(strerror(errno))); | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | SubprocessMemory SPMemory; | 
|---|
| 444 | Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid()); | 
|---|
| 445 | if (MemoryInitError) | 
|---|
| 446 | return MemoryInitError; | 
|---|
| 447 |  | 
|---|
| 448 | Error AddMemDefError = | 
|---|
| 449 | SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid()); | 
|---|
| 450 | if (AddMemDefError) | 
|---|
| 451 | return AddMemDefError; | 
|---|
| 452 |  | 
|---|
| 453 | long ParentTID = SubprocessMemory::getCurrentTID(); | 
|---|
| 454 | pid_t ParentOrChildPID = fork(); | 
|---|
| 455 |  | 
|---|
| 456 | if (ParentOrChildPID == -1) { | 
|---|
| 457 | return make_error<Failure>(Args: "Failed to create child process: "+ | 
|---|
| 458 | Twine(strerror(errno))); | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | if (ParentOrChildPID == 0) { | 
|---|
| 462 | if (BenchmarkProcessCPU.has_value()) { | 
|---|
| 463 | setCPUAffinityIfRequested(*BenchmarkProcessCPU); | 
|---|
| 464 | } | 
|---|
| 465 |  | 
|---|
| 466 | // We are in the child process, close the write end of the pipe. | 
|---|
| 467 | close(fd: PipeFiles[1]); | 
|---|
| 468 | // Unregister handlers, signal handling is now handled through ptrace in | 
|---|
| 469 | // the host process. | 
|---|
| 470 | sys::unregisterHandlers(); | 
|---|
| 471 | runChildSubprocess(Pipe: PipeFiles[0], Key, ParentTID); | 
|---|
| 472 | // The child process terminates in the above function, so we should never | 
|---|
| 473 | // get to this point. | 
|---|
| 474 | llvm_unreachable( "Child process didn't exit when expected."); | 
|---|
| 475 | } | 
|---|
| 476 |  | 
|---|
| 477 | // Close the read end of the pipe as we only need to write to the subprocess | 
|---|
| 478 | // from the parent process. | 
|---|
| 479 | close(fd: PipeFiles[0]); | 
|---|
| 480 | return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[1], CounterName, | 
|---|
| 481 | CounterValues, ValidationCounters, | 
|---|
| 482 | ValidationCounterValues); | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | void disableCoreDumps() const { | 
|---|
| 486 | struct rlimit rlim; | 
|---|
| 487 |  | 
|---|
| 488 | rlim.rlim_cur = 0; | 
|---|
| 489 | setrlimit(RLIMIT_CORE, rlimits: &rlim); | 
|---|
| 490 | } | 
|---|
| 491 |  | 
|---|
| 492 | [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key, | 
|---|
| 493 | long ParentTID) const { | 
|---|
| 494 | // Disable core dumps in the child process as otherwise everytime we | 
|---|
| 495 | // encounter an execution failure like a segmentation fault, we will create | 
|---|
| 496 | // a core dump. We report the information directly rather than require the | 
|---|
| 497 | // user inspect a core dump. | 
|---|
| 498 | disableCoreDumps(); | 
|---|
| 499 |  | 
|---|
| 500 | // The following occurs within the benchmarking subprocess. | 
|---|
| 501 | pid_t ParentPID = getppid(); | 
|---|
| 502 |  | 
|---|
| 503 | Expected<int> CounterFileDescriptorOrError = | 
|---|
| 504 | getFileDescriptorFromSocket(SocketFD: Pipe); | 
|---|
| 505 |  | 
|---|
| 506 | if (!CounterFileDescriptorOrError) | 
|---|
| 507 | exit(status: ChildProcessExitCodeE::CounterFDReadFailed); | 
|---|
| 508 |  | 
|---|
| 509 | int CounterFileDescriptor = *CounterFileDescriptorOrError; | 
|---|
| 510 |  | 
|---|
| 511 | // Glibc versions greater than 2.35 automatically call rseq during | 
|---|
| 512 | // initialization. Unmapping the region that glibc sets up for this causes | 
|---|
| 513 | // segfaults in the program. Unregister the rseq region so that we can safely | 
|---|
| 514 | // unmap it later | 
|---|
| 515 | #ifdef GLIBC_INITS_RSEQ | 
|---|
| 516 | unsigned int RseqStructSize = __rseq_size; | 
|---|
| 517 |  | 
|---|
| 518 | // Glibc v2.40 (the change is also expected to be backported to v2.35) | 
|---|
| 519 | // changes the definition of __rseq_size to be the usable area of the struct | 
|---|
| 520 | // rather than the actual size of the struct. v2.35 uses only 20 bytes of | 
|---|
| 521 | // the 32 byte struct. For now, it should be safe to assume that if the | 
|---|
| 522 | // usable size is less than 32, the actual size of the struct will be 32 | 
|---|
| 523 | // bytes given alignment requirements. | 
|---|
| 524 | if (__rseq_size < 32) | 
|---|
| 525 | RseqStructSize = 32; | 
|---|
| 526 |  | 
|---|
| 527 | long RseqDisableOutput = syscall( | 
|---|
| 528 | SYS_rseq, | 
|---|
| 529 | reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset, | 
|---|
| 530 | RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); | 
|---|
| 531 | if (RseqDisableOutput != 0) | 
|---|
| 532 | exit(status: ChildProcessExitCodeE::RSeqDisableFailed); | 
|---|
| 533 | #endif // GLIBC_INITS_RSEQ | 
|---|
| 534 |  | 
|---|
| 535 | // The frontend that generates the memory annotation structures should | 
|---|
| 536 | // validate that the address to map the snippet in at is a multiple of | 
|---|
| 537 | // the page size. Assert that this is true here. | 
|---|
| 538 | assert(Key.SnippetAddress % getpagesize() == 0 && | 
|---|
| 539 | "The snippet address needs to be aligned to a page boundary."); | 
|---|
| 540 |  | 
|---|
| 541 | size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); | 
|---|
| 542 | void *MapAddress = NULL; | 
|---|
| 543 | int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS; | 
|---|
| 544 |  | 
|---|
| 545 | if (Key.SnippetAddress != 0) { | 
|---|
| 546 | MapAddress = reinterpret_cast<void *>(Key.SnippetAddress); | 
|---|
| 547 | MapFlags |= MAP_FIXED_NOREPLACE; | 
|---|
| 548 | } | 
|---|
| 549 |  | 
|---|
| 550 | char *FunctionDataCopy = | 
|---|
| 551 | (char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ | PROT_WRITE, | 
|---|
| 552 | flags: MapFlags, fd: 0, offset: 0); | 
|---|
| 553 | if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1) | 
|---|
| 554 | exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed); | 
|---|
| 555 |  | 
|---|
| 556 | memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(), | 
|---|
| 557 | n: this->Function.FunctionBytes.size()); | 
|---|
| 558 | mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ | PROT_EXEC); | 
|---|
| 559 |  | 
|---|
| 560 | Expected<int> AuxMemFDOrError = | 
|---|
| 561 | SubprocessMemory::setupAuxiliaryMemoryInSubprocess( | 
|---|
| 562 | MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor); | 
|---|
| 563 | if (!AuxMemFDOrError) | 
|---|
| 564 | exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); | 
|---|
| 565 |  | 
|---|
| 566 | ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize, | 
|---|
| 567 | *AuxMemFDOrError); | 
|---|
| 568 |  | 
|---|
| 569 | exit(status: 0); | 
|---|
| 570 | } | 
|---|
| 571 |  | 
|---|
| 572 | Expected<SmallVector<int64_t, 4>> runWithCounter( | 
|---|
| 573 | StringRef CounterName, ArrayRef<const char *> ValidationCounters, | 
|---|
| 574 | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { | 
|---|
| 575 | SmallVector<int64_t, 4> Value(1, 0); | 
|---|
| 576 | Error PossibleBenchmarkError = createSubProcessAndRunBenchmark( | 
|---|
| 577 | CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues); | 
|---|
| 578 |  | 
|---|
| 579 | if (PossibleBenchmarkError) | 
|---|
| 580 | return std::move(PossibleBenchmarkError); | 
|---|
| 581 |  | 
|---|
| 582 | return Value; | 
|---|
| 583 | } | 
|---|
| 584 |  | 
|---|
| 585 | const LLVMState &State; | 
|---|
| 586 | const ExecutableFunction Function; | 
|---|
| 587 | const BenchmarkKey &Key; | 
|---|
| 588 | const std::optional<int> BenchmarkProcessCPU; | 
|---|
| 589 | }; | 
|---|
| 590 | #endif // __linux__ | 
|---|
| 591 | } // namespace | 
|---|
| 592 |  | 
|---|
| 593 | Expected<SmallString<0>> BenchmarkRunner::assembleSnippet( | 
|---|
| 594 | const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, | 
|---|
| 595 | unsigned MinInstructions, unsigned LoopBodySize, | 
|---|
| 596 | bool GenerateMemoryInstructions) const { | 
|---|
| 597 | const std::vector<MCInst> &Instructions = BC.Key.Instructions; | 
|---|
| 598 | SmallString<0> Buffer; | 
|---|
| 599 | raw_svector_ostream OS(Buffer); | 
|---|
| 600 | if (Error E = assembleToStream( | 
|---|
| 601 | ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns, | 
|---|
| 602 | Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, | 
|---|
| 603 | CleanupMemory: GenerateMemoryInstructions), | 
|---|
| 604 | AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) { | 
|---|
| 605 | return std::move(E); | 
|---|
| 606 | } | 
|---|
| 607 | return Buffer; | 
|---|
| 608 | } | 
|---|
| 609 |  | 
|---|
| 610 | Expected<BenchmarkRunner::RunnableConfiguration> | 
|---|
| 611 | BenchmarkRunner::getRunnableConfiguration( | 
|---|
| 612 | const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize, | 
|---|
| 613 | const SnippetRepetitor &Repetitor) const { | 
|---|
| 614 | RunnableConfiguration RC; | 
|---|
| 615 |  | 
|---|
| 616 | Benchmark &BenchmarkResult = RC.BenchmarkResult; | 
|---|
| 617 | BenchmarkResult.Mode = Mode; | 
|---|
| 618 | BenchmarkResult.CpuName = | 
|---|
| 619 | std::string(State.getTargetMachine().getTargetCPU()); | 
|---|
| 620 | BenchmarkResult.LLVMTriple = | 
|---|
| 621 | State.getTargetMachine().getTargetTriple().normalize(); | 
|---|
| 622 | BenchmarkResult.MinInstructions = MinInstructions; | 
|---|
| 623 | BenchmarkResult.Info = BC.Info; | 
|---|
| 624 |  | 
|---|
| 625 | const std::vector<MCInst> &Instructions = BC.Key.Instructions; | 
|---|
| 626 |  | 
|---|
| 627 | bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; | 
|---|
| 628 |  | 
|---|
| 629 | BenchmarkResult.Key = BC.Key; | 
|---|
| 630 |  | 
|---|
| 631 | // Assemble at least kMinInstructionsForSnippet instructions by repeating | 
|---|
| 632 | // the snippet for debug/analysis. This is so that the user clearly | 
|---|
| 633 | // understands that the inside instructions are repeated. | 
|---|
| 634 | if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { | 
|---|
| 635 | const int MinInstructionsForSnippet = 4 * Instructions.size(); | 
|---|
| 636 | const int LoopBodySizeForSnippet = 2 * Instructions.size(); | 
|---|
| 637 | auto Snippet = | 
|---|
| 638 | assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet, | 
|---|
| 639 | LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions); | 
|---|
| 640 | if (Error E = Snippet.takeError()) | 
|---|
| 641 | return std::move(E); | 
|---|
| 642 |  | 
|---|
| 643 | if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet, | 
|---|
| 644 | Bytes&: BenchmarkResult.AssembledSnippet)) | 
|---|
| 645 | return std::move(Err); | 
|---|
| 646 | } | 
|---|
| 647 |  | 
|---|
| 648 | // Assemble enough repetitions of the snippet so we have at least | 
|---|
| 649 | // MinInstructions instructions. | 
|---|
| 650 | if (BenchmarkPhaseSelector > | 
|---|
| 651 | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { | 
|---|
| 652 | auto Snippet = | 
|---|
| 653 | assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions, | 
|---|
| 654 | LoopBodySize, GenerateMemoryInstructions); | 
|---|
| 655 | if (Error E = Snippet.takeError()) | 
|---|
| 656 | return std::move(E); | 
|---|
| 657 | RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet); | 
|---|
| 658 | } | 
|---|
| 659 |  | 
|---|
| 660 | return std::move(RC); | 
|---|
| 661 | } | 
|---|
| 662 |  | 
|---|
| 663 | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> | 
|---|
| 664 | BenchmarkRunner::createFunctionExecutor( | 
|---|
| 665 | object::OwningBinary<object::ObjectFile> ObjectFile, | 
|---|
| 666 | const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const { | 
|---|
| 667 | switch (ExecutionMode) { | 
|---|
| 668 | case ExecutionModeE::InProcess: { | 
|---|
| 669 | if (BenchmarkProcessCPU.has_value()) | 
|---|
| 670 | return make_error<Failure>(Args: "The inprocess execution mode does not " | 
|---|
| 671 | "support benchmark core pinning."); | 
|---|
| 672 |  | 
|---|
| 673 | auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( | 
|---|
| 674 | State, Obj: std::move(ObjectFile), Scratch: Scratch.get(), BenchmarkProcessCPU); | 
|---|
| 675 | if (!InProcessExecutorOrErr) | 
|---|
| 676 | return InProcessExecutorOrErr.takeError(); | 
|---|
| 677 |  | 
|---|
| 678 | return std::move(*InProcessExecutorOrErr); | 
|---|
| 679 | } | 
|---|
| 680 | case ExecutionModeE::SubProcess: { | 
|---|
| 681 | #ifdef __linux__ | 
|---|
| 682 | auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( | 
|---|
| 683 | State, Obj: std::move(ObjectFile), Key, BenchmarkProcessCPU); | 
|---|
| 684 | if (!SubProcessExecutorOrErr) | 
|---|
| 685 | return SubProcessExecutorOrErr.takeError(); | 
|---|
| 686 |  | 
|---|
| 687 | return std::move(*SubProcessExecutorOrErr); | 
|---|
| 688 | #else | 
|---|
| 689 | return make_error<Failure>( | 
|---|
| 690 | "The subprocess execution mode is only supported on Linux"); | 
|---|
| 691 | #endif | 
|---|
| 692 | } | 
|---|
| 693 | } | 
|---|
| 694 | llvm_unreachable( "ExecutionMode is outside expected range"); | 
|---|
| 695 | } | 
|---|
| 696 |  | 
|---|
| 697 | std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration( | 
|---|
| 698 | RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile, | 
|---|
| 699 | std::optional<int> BenchmarkProcessCPU) const { | 
|---|
| 700 | Benchmark &BenchmarkResult = RC.BenchmarkResult; | 
|---|
| 701 | object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile; | 
|---|
| 702 |  | 
|---|
| 703 | if (DumpFile && BenchmarkPhaseSelector > | 
|---|
| 704 | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { | 
|---|
| 705 | auto ObjectFilePath = | 
|---|
| 706 | writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile); | 
|---|
| 707 | if (Error E = ObjectFilePath.takeError()) { | 
|---|
| 708 | return {std::move(E), std::move(BenchmarkResult)}; | 
|---|
| 709 | } | 
|---|
| 710 | outs() << "Check generated assembly with: /usr/bin/objdump -d " | 
|---|
| 711 | << *ObjectFilePath << "\n"; | 
|---|
| 712 | } | 
|---|
| 713 |  | 
|---|
| 714 | if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) { | 
|---|
| 715 | BenchmarkResult.Error = "actual measurements skipped."; | 
|---|
| 716 | return {Error::success(), std::move(BenchmarkResult)}; | 
|---|
| 717 | } | 
|---|
| 718 |  | 
|---|
| 719 | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor = | 
|---|
| 720 | createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key, | 
|---|
| 721 | BenchmarkProcessCPU); | 
|---|
| 722 | if (!Executor) | 
|---|
| 723 | return {Executor.takeError(), std::move(BenchmarkResult)}; | 
|---|
| 724 | auto NewMeasurements = runMeasurements(Executor: **Executor); | 
|---|
| 725 |  | 
|---|
| 726 | if (Error E = NewMeasurements.takeError()) { | 
|---|
| 727 | return {std::move(E), std::move(BenchmarkResult)}; | 
|---|
| 728 | } | 
|---|
| 729 | assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions"); | 
|---|
| 730 | for (BenchmarkMeasure &BM : *NewMeasurements) { | 
|---|
| 731 | // Scale the measurements by the number of instructions. | 
|---|
| 732 | BM.PerInstructionValue /= BenchmarkResult.MinInstructions; | 
|---|
| 733 | // Scale the measurements by the number of times the entire snippet is | 
|---|
| 734 | // repeated. | 
|---|
| 735 | BM.PerSnippetValue /= | 
|---|
| 736 | std::ceil(x: BenchmarkResult.MinInstructions / | 
|---|
| 737 | static_cast<double>(BenchmarkResult.Key.Instructions.size())); | 
|---|
| 738 | } | 
|---|
| 739 | BenchmarkResult.Measurements = std::move(*NewMeasurements); | 
|---|
| 740 |  | 
|---|
| 741 | return {Error::success(), std::move(BenchmarkResult)}; | 
|---|
| 742 | } | 
|---|
| 743 |  | 
|---|
| 744 | Expected<std::string> | 
|---|
| 745 | BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const { | 
|---|
| 746 | int ResultFD = 0; | 
|---|
| 747 | SmallString<256> ResultPath = FileName; | 
|---|
| 748 | if (Error E = errorCodeToError( | 
|---|
| 749 | EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o", | 
|---|
| 750 | ResultFD, ResultPath) | 
|---|
| 751 | : sys::fs::openFileForReadWrite( | 
|---|
| 752 | Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways, | 
|---|
| 753 | Flags: sys::fs::OF_None))) | 
|---|
| 754 | return std::move(E); | 
|---|
| 755 | raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); | 
|---|
| 756 | OFS.write(Ptr: Buffer.data(), Size: Buffer.size()); | 
|---|
| 757 | OFS.flush(); | 
|---|
| 758 | return std::string(ResultPath); | 
|---|
| 759 | } | 
|---|
| 760 |  | 
|---|
| 761 | static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS, | 
|---|
| 762 | const ValidationEvent RHS) { | 
|---|
| 763 | return static_cast<int>(LHS.first) < static_cast<int>(RHS); | 
|---|
| 764 | } | 
|---|
| 765 |  | 
|---|
| 766 | Error BenchmarkRunner::getValidationCountersToRun( | 
|---|
| 767 | SmallVector<const char *> &ValCountersToRun) const { | 
|---|
| 768 | const PfmCountersInfo &PCI = State.getPfmCounters(); | 
|---|
| 769 | ValCountersToRun.reserve(N: ValidationCounters.size()); | 
|---|
| 770 |  | 
|---|
| 771 | ValCountersToRun.reserve(N: ValidationCounters.size()); | 
|---|
| 772 | ArrayRef TargetValidationEvents(PCI.ValidationEvents, | 
|---|
| 773 | PCI.NumValidationEvents); | 
|---|
| 774 | for (const ValidationEvent RequestedValEvent : ValidationCounters) { | 
|---|
| 775 | auto ValCounterIt = | 
|---|
| 776 | lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan); | 
|---|
| 777 | if (ValCounterIt == TargetValidationEvents.end() || | 
|---|
| 778 | ValCounterIt->first != RequestedValEvent) | 
|---|
| 779 | return make_error<Failure>(Args: "Cannot create validation counter"); | 
|---|
| 780 |  | 
|---|
| 781 | assert(ValCounterIt->first == RequestedValEvent && | 
|---|
| 782 | "The array of validation events from the target should be sorted"); | 
|---|
| 783 | ValCountersToRun.push_back(Elt: ValCounterIt->second); | 
|---|
| 784 | } | 
|---|
| 785 |  | 
|---|
| 786 | return Error::success(); | 
|---|
| 787 | } | 
|---|
| 788 |  | 
|---|
| 789 | BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} | 
|---|
| 790 |  | 
|---|
| 791 | } // namespace exegesis | 
|---|
| 792 | } // namespace llvm | 
|---|
| 793 |  | 
|---|