1//===-- Target.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10///
11/// Classes that handle the creation of target-specific objects. This is
12/// similar to Target/TargetRegistry.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
17#define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
18
19#include "BenchmarkResult.h"
20#include "BenchmarkRunner.h"
21#include "Error.h"
22#include "LlvmState.h"
23#include "PerfHelper.h"
24#include "SnippetGenerator.h"
25#include "ValidationEvent.h"
26#include "llvm/CodeGen/TargetPassConfig.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/LegacyPassManager.h"
29#include "llvm/MC/MCInst.h"
30#include "llvm/MC/MCRegisterInfo.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Error.h"
33#include "llvm/TargetParser/SubtargetFeature.h"
34#include "llvm/TargetParser/Triple.h"
35
36namespace llvm {
37namespace exegesis {
38
39extern cl::OptionCategory Options;
40extern cl::OptionCategory BenchmarkOptions;
41extern cl::OptionCategory AnalysisOptions;
42
43struct PfmCountersInfo {
44 // An optional name of a performance counter that can be used to measure
45 // cycles.
46 const char *CycleCounter;
47
48 // An optional name of a performance counter that can be used to measure
49 // uops.
50 const char *UopsCounter;
51
52 // An IssueCounter specifies how to measure uops issued to specific proc
53 // resources.
54 struct IssueCounter {
55 const char *Counter;
56 // The name of the ProcResource that this counter measures.
57 const char *ProcResName;
58 };
59 // An optional list of IssueCounters.
60 const IssueCounter *IssueCounters;
61 unsigned NumIssueCounters;
62
63 const std::pair<ValidationEvent, const char *> *ValidationEvents;
64 unsigned NumValidationEvents;
65
66 static const PfmCountersInfo Default;
67 static const PfmCountersInfo Dummy;
68};
69
70struct CpuAndPfmCounters {
71 const char *CpuName;
72 const PfmCountersInfo *PCI;
73 bool operator<(StringRef S) const { return StringRef(CpuName) < S; }
74};
75
76class ExegesisTarget {
77public:
78 typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &);
79 ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters,
80 OpcodeAvailabilityChecker IsOpcodeAvailable)
81 : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {}
82
83 // Targets can use this to create target-specific perf counters.
84 virtual Expected<std::unique_ptr<pfm::CounterGroup>>
85 createCounter(StringRef CounterName, const LLVMState &State,
86 ArrayRef<const char *> ValidationCounters,
87 const pid_t ProcessID = 0) const;
88
89 // Targets can use this to add target-specific passes in assembleToStream();
90 virtual void addTargetSpecificPasses(PassManagerBase &PM) const {}
91
92 // Generates code to move a constant into a the given register.
93 // Precondition: Value must fit into Reg.
94 virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI,
95 MCRegister Reg,
96 const APInt &Value) const = 0;
97
98 // Generates the code for the lower munmap call. The code generated by this
99 // function may clobber registers.
100 virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const {
101 report_fatal_error(
102 reason: "generateLowerMunmap is not implemented on the current architecture");
103 }
104
105 // Generates the upper munmap call. The code generated by this function may
106 // clobber registers.
107 virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const {
108 report_fatal_error(
109 reason: "generateUpperMunmap is not implemented on the current architecture");
110 }
111
112 // Generates the code for an exit syscall. The code generated by this function
113 // may clobber registers.
114 virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const {
115 report_fatal_error(
116 reason: "generateExitSyscall is not implemented on the current architecture");
117 }
118
119 // Generates the code to mmap a region of code. The code generated by this
120 // function may clobber registers.
121 virtual std::vector<MCInst>
122 generateMmap(uintptr_t Address, size_t Length,
123 uintptr_t FileDescriptorAddress) const {
124 report_fatal_error(
125 reason: "generateMmap is not implemented on the current architecture");
126 }
127
128 // Generates the mmap code for the aux memory. The code generated by this
129 // function may clobber registers.
130 virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const {
131 report_fatal_error(
132 reason: "generateMmapAuxMem is not implemented on the current architecture\n");
133 }
134
135 // Moves argument registers into other registers that won't get clobbered
136 // while making syscalls. The code generated by this function may clobber
137 // registers.
138 virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const {
139 report_fatal_error(reason: "moveArgumentRegisters is not implemented on the "
140 "current architecture\n");
141 }
142
143 // Generates code to move argument registers, unmap memory above and below the
144 // snippet, and map the auxiliary memory into the subprocess. The code
145 // generated by this function may clobber registers.
146 virtual std::vector<MCInst> generateMemoryInitialSetup() const {
147 report_fatal_error(reason: "generateMemoryInitialSetup is not supported on the "
148 "current architecture\n");
149 }
150
151 // Returns true if all features are available that are required by Opcode.
152 virtual bool isOpcodeAvailable(unsigned Opcode,
153 const FeatureBitset &Features) const {
154 return IsOpcodeAvailable(Opcode, Features);
155 }
156
157 virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
158 unsigned Opcode) const;
159
160 // Sets the stack register to the auxiliary memory so that operations
161 // requiring the stack can be formed (e.g., setting large registers). The code
162 // generated by this function may clobber registers.
163 virtual std::vector<MCInst> setStackRegisterToAuxMem() const {
164 report_fatal_error(reason: "setStackRegisterToAuxMem is not implemented on the "
165 "current architectures");
166 }
167
168 virtual uintptr_t getAuxiliaryMemoryStartAddress() const {
169 report_fatal_error(reason: "getAuxiliaryMemoryStartAddress is not implemented on "
170 "the current architecture");
171 }
172
173 // Generates the necessary ioctl system calls to configure the perf counters.
174 // The code generated by this function preserves all registers if the
175 // parameter SaveRegisters is set to true.
176 virtual std::vector<MCInst> configurePerfCounter(long Request,
177 bool SaveRegisters) const {
178 report_fatal_error(
179 reason: "configurePerfCounter is not implemented on the current architecture");
180 }
181
182 // Gets the ABI dependent registers that are used to pass arguments in a
183 // function call.
184 virtual std::vector<MCRegister> getArgumentRegisters() const {
185 report_fatal_error(
186 reason: "getArgumentRegisters is not implemented on the current architecture");
187 };
188
189 // Gets the registers that might potentially need to be saved by while
190 // the setup in the test harness executes.
191 virtual std::vector<MCRegister> getRegistersNeedSaving() const {
192 report_fatal_error(reason: "getRegistersNeedSaving is not implemented on the "
193 "current architecture");
194 };
195
196 // Returns the register pointing to scratch memory, or 0 if this target
197 // does not support memory operands. The benchmark function uses the
198 // default calling convention.
199 virtual MCRegister getScratchMemoryRegister(const Triple &) const {
200 return MCRegister();
201 }
202
203 // Fills memory operands with references to the address at [Reg] + Offset.
204 virtual void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
205 unsigned Offset) const {
206 llvm_unreachable(
207 "fillMemoryOperands() requires getScratchMemoryRegister() > 0");
208 }
209
210 // Returns a counter usable as a loop counter.
211 virtual MCRegister getDefaultLoopCounterRegister(const Triple &) const {
212 return MCRegister();
213 }
214
215 // Adds the code to decrement the loop counter and
216 virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
217 MachineBasicBlock &TargetMBB,
218 const MCInstrInfo &MII,
219 MCRegister LoopRegister) const {
220 llvm_unreachable("decrementLoopCounterAndBranch() requires "
221 "getLoopCounterRegister() > 0");
222 }
223
224 // Returns a list of unavailable registers.
225 // Targets can use this to prevent some registers to be automatically selected
226 // for use in snippets.
227 virtual ArrayRef<MCPhysReg> getUnavailableRegisters() const { return {}; }
228
229 // Returns the maximum number of bytes a load/store instruction can access at
230 // once. This is typically the size of the largest register available on the
231 // processor. Note that this only used as a hint to generate independant
232 // load/stores to/from memory, so the exact returned value does not really
233 // matter as long as it's large enough.
234 virtual unsigned getMaxMemoryAccessSize() const { return 0; }
235
236 // Assigns a random operand of the right type to variable Var.
237 // The target is responsible for handling any operand starting from
238 // OPERAND_FIRST_TARGET.
239 virtual Error randomizeTargetMCOperand(const Instruction &Instr,
240 const Variable &Var,
241 MCOperand &AssignedValue,
242 const BitVector &ForbiddenRegs) const {
243 return make_error<Failure>(
244 Args: "targets with target-specific operands should implement this");
245 }
246
247 // Returns true if this instruction is supported as a back-to-back
248 // instructions.
249 // FIXME: Eventually we should discover this dynamically.
250 virtual bool allowAsBackToBack(const Instruction &Instr) const {
251 return true;
252 }
253
254 // For some instructions, it is interesting to measure how it's performance
255 // characteristics differ depending on it's operands.
256 // This allows us to produce all the interesting variants.
257 virtual std::vector<InstructionTemplate>
258 generateInstructionVariants(const Instruction &Instr,
259 unsigned MaxConfigsPerOpcode) const {
260 // By default, we're happy with whatever randomizer will give us.
261 return {&Instr};
262 }
263
264 // Checks hardware and software support for current benchmark mode.
265 // Returns an error if the target host does not have support to run the
266 // benchmark.
267 virtual Error checkFeatureSupport() const { return Error::success(); }
268
269 // Creates a snippet generator for the given mode.
270 std::unique_ptr<SnippetGenerator>
271 createSnippetGenerator(Benchmark::ModeE Mode,
272 const LLVMState &State,
273 const SnippetGenerator::Options &Opts) const;
274 // Creates a benchmark runner for the given mode.
275 Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
276 Benchmark::ModeE Mode, const LLVMState &State,
277 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
278 BenchmarkRunner::ExecutionModeE ExecutionMode,
279 unsigned BenchmarkRepeatCount,
280 ArrayRef<ValidationEvent> ValidationCounters,
281 Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const;
282
283 // Returns the ExegesisTarget for the given triple or nullptr if the target
284 // does not exist.
285 static const ExegesisTarget *lookup(Triple TT);
286 // Returns the default (unspecialized) ExegesisTarget.
287 static const ExegesisTarget &getDefault();
288 // Registers a target. Not thread safe.
289 static void registerTarget(ExegesisTarget *T);
290
291 virtual ~ExegesisTarget();
292
293 // Returns the Pfm counters for the given CPU (or the default if no pfm
294 // counters are defined for this CPU).
295 const PfmCountersInfo &getPfmCounters(StringRef CpuName) const;
296
297 // Returns dummy Pfm counters which can be used to execute generated snippet
298 // without access to performance counters.
299 const PfmCountersInfo &getDummyPfmCounters() const;
300
301 // Saves the CPU state that needs to be preserved when running a benchmark,
302 // and returns and RAII object that restores the state on destruction.
303 // By default no state is preserved.
304 struct SavedState {
305 virtual ~SavedState();
306 };
307 virtual std::unique_ptr<SavedState> withSavedState() const {
308 return std::make_unique<SavedState>();
309 }
310
311private:
312 virtual bool matchesArch(Triple::ArchType Arch) const = 0;
313
314 // Targets can implement their own snippet generators/benchmarks runners by
315 // implementing these.
316 std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator(
317 const LLVMState &State, const SnippetGenerator::Options &Opts) const;
318 std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
319 const LLVMState &State, const SnippetGenerator::Options &Opts) const;
320 std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
321 const LLVMState &State, Benchmark::ModeE Mode,
322 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
323 Benchmark::ResultAggregationModeE ResultAggMode,
324 BenchmarkRunner::ExecutionModeE ExecutionMode,
325 ArrayRef<ValidationEvent> ValidationCounters,
326 unsigned BenchmarkRepeatCount) const;
327 std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
328 const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
329 Benchmark::ResultAggregationModeE ResultAggMode,
330 BenchmarkRunner::ExecutionModeE ExecutionMode,
331 ArrayRef<ValidationEvent> ValidationCounters) const;
332
333 const ExegesisTarget *Next = nullptr;
334 const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
335 const OpcodeAvailabilityChecker IsOpcodeAvailable;
336};
337
338} // namespace exegesis
339} // namespace llvm
340
341#endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
342