| 1 | //===-- Target.h ------------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// |
| 11 | /// Classes that handle the creation of target-specific objects. This is |
| 12 | /// similar to Target/TargetRegistry. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
| 17 | #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
| 18 | |
| 19 | #include "BenchmarkResult.h" |
| 20 | #include "BenchmarkRunner.h" |
| 21 | #include "Error.h" |
| 22 | #include "LlvmState.h" |
| 23 | #include "PerfHelper.h" |
| 24 | #include "SnippetGenerator.h" |
| 25 | #include "ValidationEvent.h" |
| 26 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 27 | #include "llvm/IR/CallingConv.h" |
| 28 | #include "llvm/IR/LegacyPassManager.h" |
| 29 | #include "llvm/MC/MCInst.h" |
| 30 | #include "llvm/MC/MCRegisterInfo.h" |
| 31 | #include "llvm/Support/CommandLine.h" |
| 32 | #include "llvm/Support/Error.h" |
| 33 | #include "llvm/TargetParser/SubtargetFeature.h" |
| 34 | #include "llvm/TargetParser/Triple.h" |
| 35 | |
| 36 | namespace llvm { |
| 37 | namespace exegesis { |
| 38 | |
| 39 | extern cl::OptionCategory Options; |
| 40 | extern cl::OptionCategory BenchmarkOptions; |
| 41 | extern cl::OptionCategory AnalysisOptions; |
| 42 | |
| 43 | struct PfmCountersInfo { |
| 44 | // An optional name of a performance counter that can be used to measure |
| 45 | // cycles. |
| 46 | const char *CycleCounter; |
| 47 | |
| 48 | // An optional name of a performance counter that can be used to measure |
| 49 | // uops. |
| 50 | const char *UopsCounter; |
| 51 | |
| 52 | // An IssueCounter specifies how to measure uops issued to specific proc |
| 53 | // resources. |
| 54 | struct IssueCounter { |
| 55 | const char *Counter; |
| 56 | // The name of the ProcResource that this counter measures. |
| 57 | const char *ProcResName; |
| 58 | }; |
| 59 | // An optional list of IssueCounters. |
| 60 | const IssueCounter *IssueCounters; |
| 61 | unsigned NumIssueCounters; |
| 62 | |
| 63 | const std::pair<ValidationEvent, const char *> *ValidationEvents; |
| 64 | unsigned NumValidationEvents; |
| 65 | |
| 66 | static const PfmCountersInfo Default; |
| 67 | static const PfmCountersInfo Dummy; |
| 68 | }; |
| 69 | |
| 70 | struct CpuAndPfmCounters { |
| 71 | const char *CpuName; |
| 72 | const PfmCountersInfo *PCI; |
| 73 | bool operator<(StringRef S) const { return StringRef(CpuName) < S; } |
| 74 | }; |
| 75 | |
| 76 | class ExegesisTarget { |
| 77 | public: |
| 78 | typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &); |
| 79 | ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters, |
| 80 | OpcodeAvailabilityChecker IsOpcodeAvailable) |
| 81 | : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {} |
| 82 | |
| 83 | // Targets can use this to create target-specific perf counters. |
| 84 | virtual Expected<std::unique_ptr<pfm::CounterGroup>> |
| 85 | createCounter(StringRef CounterName, const LLVMState &State, |
| 86 | ArrayRef<const char *> ValidationCounters, |
| 87 | const pid_t ProcessID = 0) const; |
| 88 | |
| 89 | // Targets can use this to add target-specific passes in assembleToStream(); |
| 90 | virtual void addTargetSpecificPasses(PassManagerBase &PM) const {} |
| 91 | |
| 92 | // Generates code to move a constant into a the given register. |
| 93 | // Precondition: Value must fit into Reg. |
| 94 | virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, |
| 95 | MCRegister Reg, |
| 96 | const APInt &Value) const = 0; |
| 97 | |
| 98 | // Generates the code for the lower munmap call. The code generated by this |
| 99 | // function may clobber registers. |
| 100 | virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const { |
| 101 | report_fatal_error( |
| 102 | reason: "generateLowerMunmap is not implemented on the current architecture" ); |
| 103 | } |
| 104 | |
| 105 | // Generates the upper munmap call. The code generated by this function may |
| 106 | // clobber registers. |
| 107 | virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const { |
| 108 | report_fatal_error( |
| 109 | reason: "generateUpperMunmap is not implemented on the current architecture" ); |
| 110 | } |
| 111 | |
| 112 | // Generates the code for an exit syscall. The code generated by this function |
| 113 | // may clobber registers. |
| 114 | virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const { |
| 115 | report_fatal_error( |
| 116 | reason: "generateExitSyscall is not implemented on the current architecture" ); |
| 117 | } |
| 118 | |
| 119 | // Generates the code to mmap a region of code. The code generated by this |
| 120 | // function may clobber registers. |
| 121 | virtual std::vector<MCInst> |
| 122 | generateMmap(uintptr_t Address, size_t Length, |
| 123 | uintptr_t FileDescriptorAddress) const { |
| 124 | report_fatal_error( |
| 125 | reason: "generateMmap is not implemented on the current architecture" ); |
| 126 | } |
| 127 | |
| 128 | // Generates the mmap code for the aux memory. The code generated by this |
| 129 | // function may clobber registers. |
| 130 | virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const { |
| 131 | report_fatal_error( |
| 132 | reason: "generateMmapAuxMem is not implemented on the current architecture\n" ); |
| 133 | } |
| 134 | |
| 135 | // Moves argument registers into other registers that won't get clobbered |
| 136 | // while making syscalls. The code generated by this function may clobber |
| 137 | // registers. |
| 138 | virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const { |
| 139 | report_fatal_error(reason: "moveArgumentRegisters is not implemented on the " |
| 140 | "current architecture\n" ); |
| 141 | } |
| 142 | |
| 143 | // Generates code to move argument registers, unmap memory above and below the |
| 144 | // snippet, and map the auxiliary memory into the subprocess. The code |
| 145 | // generated by this function may clobber registers. |
| 146 | virtual std::vector<MCInst> generateMemoryInitialSetup() const { |
| 147 | report_fatal_error(reason: "generateMemoryInitialSetup is not supported on the " |
| 148 | "current architecture\n" ); |
| 149 | } |
| 150 | |
| 151 | // Returns true if all features are available that are required by Opcode. |
| 152 | virtual bool isOpcodeAvailable(unsigned Opcode, |
| 153 | const FeatureBitset &Features) const { |
| 154 | return IsOpcodeAvailable(Opcode, Features); |
| 155 | } |
| 156 | |
| 157 | virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, |
| 158 | unsigned Opcode) const; |
| 159 | |
| 160 | // Sets the stack register to the auxiliary memory so that operations |
| 161 | // requiring the stack can be formed (e.g., setting large registers). The code |
| 162 | // generated by this function may clobber registers. |
| 163 | virtual std::vector<MCInst> setStackRegisterToAuxMem() const { |
| 164 | report_fatal_error(reason: "setStackRegisterToAuxMem is not implemented on the " |
| 165 | "current architectures" ); |
| 166 | } |
| 167 | |
| 168 | virtual uintptr_t getAuxiliaryMemoryStartAddress() const { |
| 169 | report_fatal_error(reason: "getAuxiliaryMemoryStartAddress is not implemented on " |
| 170 | "the current architecture" ); |
| 171 | } |
| 172 | |
| 173 | // Generates the necessary ioctl system calls to configure the perf counters. |
| 174 | // The code generated by this function preserves all registers if the |
| 175 | // parameter SaveRegisters is set to true. |
| 176 | virtual std::vector<MCInst> configurePerfCounter(long Request, |
| 177 | bool SaveRegisters) const { |
| 178 | report_fatal_error( |
| 179 | reason: "configurePerfCounter is not implemented on the current architecture" ); |
| 180 | } |
| 181 | |
| 182 | // Gets the ABI dependent registers that are used to pass arguments in a |
| 183 | // function call. |
| 184 | virtual std::vector<MCRegister> getArgumentRegisters() const { |
| 185 | report_fatal_error( |
| 186 | reason: "getArgumentRegisters is not implemented on the current architecture" ); |
| 187 | }; |
| 188 | |
| 189 | // Gets the registers that might potentially need to be saved by while |
| 190 | // the setup in the test harness executes. |
| 191 | virtual std::vector<MCRegister> getRegistersNeedSaving() const { |
| 192 | report_fatal_error(reason: "getRegistersNeedSaving is not implemented on the " |
| 193 | "current architecture" ); |
| 194 | }; |
| 195 | |
| 196 | // Returns the register pointing to scratch memory, or 0 if this target |
| 197 | // does not support memory operands. The benchmark function uses the |
| 198 | // default calling convention. |
| 199 | virtual MCRegister getScratchMemoryRegister(const Triple &) const { |
| 200 | return MCRegister(); |
| 201 | } |
| 202 | |
| 203 | // Fills memory operands with references to the address at [Reg] + Offset. |
| 204 | virtual void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg, |
| 205 | unsigned Offset) const { |
| 206 | llvm_unreachable( |
| 207 | "fillMemoryOperands() requires getScratchMemoryRegister() > 0" ); |
| 208 | } |
| 209 | |
| 210 | // Returns a counter usable as a loop counter. |
| 211 | virtual MCRegister getDefaultLoopCounterRegister(const Triple &) const { |
| 212 | return MCRegister(); |
| 213 | } |
| 214 | |
| 215 | // Adds the code to decrement the loop counter and |
| 216 | virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB, |
| 217 | MachineBasicBlock &TargetMBB, |
| 218 | const MCInstrInfo &MII, |
| 219 | MCRegister LoopRegister) const { |
| 220 | llvm_unreachable("decrementLoopCounterAndBranch() requires " |
| 221 | "getLoopCounterRegister() > 0" ); |
| 222 | } |
| 223 | |
| 224 | // Returns a list of unavailable registers. |
| 225 | // Targets can use this to prevent some registers to be automatically selected |
| 226 | // for use in snippets. |
| 227 | virtual ArrayRef<MCPhysReg> getUnavailableRegisters() const { return {}; } |
| 228 | |
| 229 | // Returns the maximum number of bytes a load/store instruction can access at |
| 230 | // once. This is typically the size of the largest register available on the |
| 231 | // processor. Note that this only used as a hint to generate independant |
| 232 | // load/stores to/from memory, so the exact returned value does not really |
| 233 | // matter as long as it's large enough. |
| 234 | virtual unsigned getMaxMemoryAccessSize() const { return 0; } |
| 235 | |
| 236 | // Assigns a random operand of the right type to variable Var. |
| 237 | // The target is responsible for handling any operand starting from |
| 238 | // OPERAND_FIRST_TARGET. |
| 239 | virtual Error randomizeTargetMCOperand(const Instruction &Instr, |
| 240 | const Variable &Var, |
| 241 | MCOperand &AssignedValue, |
| 242 | const BitVector &ForbiddenRegs) const { |
| 243 | return make_error<Failure>( |
| 244 | Args: "targets with target-specific operands should implement this" ); |
| 245 | } |
| 246 | |
| 247 | // Returns true if this instruction is supported as a back-to-back |
| 248 | // instructions. |
| 249 | // FIXME: Eventually we should discover this dynamically. |
| 250 | virtual bool allowAsBackToBack(const Instruction &Instr) const { |
| 251 | return true; |
| 252 | } |
| 253 | |
| 254 | // For some instructions, it is interesting to measure how it's performance |
| 255 | // characteristics differ depending on it's operands. |
| 256 | // This allows us to produce all the interesting variants. |
| 257 | virtual std::vector<InstructionTemplate> |
| 258 | generateInstructionVariants(const Instruction &Instr, |
| 259 | unsigned MaxConfigsPerOpcode) const { |
| 260 | // By default, we're happy with whatever randomizer will give us. |
| 261 | return {&Instr}; |
| 262 | } |
| 263 | |
| 264 | // Checks hardware and software support for current benchmark mode. |
| 265 | // Returns an error if the target host does not have support to run the |
| 266 | // benchmark. |
| 267 | virtual Error checkFeatureSupport() const { return Error::success(); } |
| 268 | |
| 269 | // Creates a snippet generator for the given mode. |
| 270 | std::unique_ptr<SnippetGenerator> |
| 271 | createSnippetGenerator(Benchmark::ModeE Mode, |
| 272 | const LLVMState &State, |
| 273 | const SnippetGenerator::Options &Opts) const; |
| 274 | // Creates a benchmark runner for the given mode. |
| 275 | Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner( |
| 276 | Benchmark::ModeE Mode, const LLVMState &State, |
| 277 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
| 278 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
| 279 | unsigned BenchmarkRepeatCount, |
| 280 | ArrayRef<ValidationEvent> ValidationCounters, |
| 281 | Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const; |
| 282 | |
| 283 | // Returns the ExegesisTarget for the given triple or nullptr if the target |
| 284 | // does not exist. |
| 285 | static const ExegesisTarget *lookup(Triple TT); |
| 286 | // Returns the default (unspecialized) ExegesisTarget. |
| 287 | static const ExegesisTarget &getDefault(); |
| 288 | // Registers a target. Not thread safe. |
| 289 | static void registerTarget(ExegesisTarget *T); |
| 290 | |
| 291 | virtual ~ExegesisTarget(); |
| 292 | |
| 293 | // Returns the Pfm counters for the given CPU (or the default if no pfm |
| 294 | // counters are defined for this CPU). |
| 295 | const PfmCountersInfo &getPfmCounters(StringRef CpuName) const; |
| 296 | |
| 297 | // Returns dummy Pfm counters which can be used to execute generated snippet |
| 298 | // without access to performance counters. |
| 299 | const PfmCountersInfo &getDummyPfmCounters() const; |
| 300 | |
| 301 | // Saves the CPU state that needs to be preserved when running a benchmark, |
| 302 | // and returns and RAII object that restores the state on destruction. |
| 303 | // By default no state is preserved. |
| 304 | struct SavedState { |
| 305 | virtual ~SavedState(); |
| 306 | }; |
| 307 | virtual std::unique_ptr<SavedState> withSavedState() const { |
| 308 | return std::make_unique<SavedState>(); |
| 309 | } |
| 310 | |
| 311 | private: |
| 312 | virtual bool matchesArch(Triple::ArchType Arch) const = 0; |
| 313 | |
| 314 | // Targets can implement their own snippet generators/benchmarks runners by |
| 315 | // implementing these. |
| 316 | std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator( |
| 317 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
| 318 | std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator( |
| 319 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
| 320 | std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( |
| 321 | const LLVMState &State, Benchmark::ModeE Mode, |
| 322 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
| 323 | Benchmark::ResultAggregationModeE ResultAggMode, |
| 324 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
| 325 | ArrayRef<ValidationEvent> ValidationCounters, |
| 326 | unsigned BenchmarkRepeatCount) const; |
| 327 | std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner( |
| 328 | const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
| 329 | Benchmark::ResultAggregationModeE ResultAggMode, |
| 330 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
| 331 | ArrayRef<ValidationEvent> ValidationCounters) const; |
| 332 | |
| 333 | const ExegesisTarget *Next = nullptr; |
| 334 | const ArrayRef<CpuAndPfmCounters> CpuPfmCounters; |
| 335 | const OpcodeAvailabilityChecker IsOpcodeAvailable; |
| 336 | }; |
| 337 | |
| 338 | } // namespace exegesis |
| 339 | } // namespace llvm |
| 340 | |
| 341 | #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
| 342 | |