1 | //===-- Target.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// |
11 | /// Classes that handle the creation of target-specific objects. This is |
12 | /// similar to Target/TargetRegistry. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
17 | #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
18 | |
19 | #include "BenchmarkResult.h" |
20 | #include "BenchmarkRunner.h" |
21 | #include "Error.h" |
22 | #include "LlvmState.h" |
23 | #include "PerfHelper.h" |
24 | #include "SnippetGenerator.h" |
25 | #include "ValidationEvent.h" |
26 | #include "llvm/CodeGen/TargetPassConfig.h" |
27 | #include "llvm/IR/CallingConv.h" |
28 | #include "llvm/IR/LegacyPassManager.h" |
29 | #include "llvm/MC/MCInst.h" |
30 | #include "llvm/MC/MCRegisterInfo.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/Error.h" |
33 | #include "llvm/TargetParser/SubtargetFeature.h" |
34 | #include "llvm/TargetParser/Triple.h" |
35 | |
36 | namespace llvm { |
37 | namespace exegesis { |
38 | |
39 | extern cl::OptionCategory Options; |
40 | extern cl::OptionCategory BenchmarkOptions; |
41 | extern cl::OptionCategory AnalysisOptions; |
42 | |
43 | struct PfmCountersInfo { |
44 | // An optional name of a performance counter that can be used to measure |
45 | // cycles. |
46 | const char *CycleCounter; |
47 | |
48 | // An optional name of a performance counter that can be used to measure |
49 | // uops. |
50 | const char *UopsCounter; |
51 | |
52 | // An IssueCounter specifies how to measure uops issued to specific proc |
53 | // resources. |
54 | struct IssueCounter { |
55 | const char *Counter; |
56 | // The name of the ProcResource that this counter measures. |
57 | const char *ProcResName; |
58 | }; |
59 | // An optional list of IssueCounters. |
60 | const IssueCounter *IssueCounters; |
61 | unsigned NumIssueCounters; |
62 | |
63 | const std::pair<ValidationEvent, const char *> *ValidationEvents; |
64 | unsigned NumValidationEvents; |
65 | |
66 | static const PfmCountersInfo Default; |
67 | static const PfmCountersInfo Dummy; |
68 | }; |
69 | |
70 | struct CpuAndPfmCounters { |
71 | const char *CpuName; |
72 | const PfmCountersInfo *PCI; |
73 | bool operator<(StringRef S) const { return StringRef(CpuName) < S; } |
74 | }; |
75 | |
76 | class ExegesisTarget { |
77 | public: |
78 | typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &); |
79 | ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters, |
80 | OpcodeAvailabilityChecker IsOpcodeAvailable) |
81 | : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {} |
82 | |
83 | // Targets can use this to create target-specific perf counters. |
84 | virtual Expected<std::unique_ptr<pfm::CounterGroup>> |
85 | createCounter(StringRef CounterName, const LLVMState &State, |
86 | ArrayRef<const char *> ValidationCounters, |
87 | const pid_t ProcessID = 0) const; |
88 | |
89 | // Targets can use this to add target-specific passes in assembleToStream(); |
90 | virtual void addTargetSpecificPasses(PassManagerBase &PM) const {} |
91 | |
92 | // Generates code to move a constant into a the given register. |
93 | // Precondition: Value must fit into Reg. |
94 | virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, |
95 | const APInt &Value) const = 0; |
96 | |
97 | // Generates the code for the lower munmap call. The code generated by this |
98 | // function may clobber registers. |
99 | virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const { |
100 | report_fatal_error( |
101 | reason: "generateLowerMunmap is not implemented on the current architecture" ); |
102 | } |
103 | |
104 | // Generates the upper munmap call. The code generated by this function may |
105 | // clobber registers. |
106 | virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const { |
107 | report_fatal_error( |
108 | reason: "generateUpperMunmap is not implemented on the current architecture" ); |
109 | } |
110 | |
111 | // Generates the code for an exit syscall. The code generated by this function |
112 | // may clobber registers. |
113 | virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const { |
114 | report_fatal_error( |
115 | reason: "generateExitSyscall is not implemented on the current architecture" ); |
116 | } |
117 | |
118 | // Generates the code to mmap a region of code. The code generated by this |
119 | // function may clobber registers. |
120 | virtual std::vector<MCInst> |
121 | generateMmap(intptr_t Address, size_t Length, |
122 | intptr_t FileDescriptorAddress) const { |
123 | report_fatal_error( |
124 | reason: "generateMmap is not implemented on the current architecture" ); |
125 | } |
126 | |
127 | // Generates the mmap code for the aux memory. The code generated by this |
128 | // function may clobber registers. |
129 | virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const { |
130 | report_fatal_error( |
131 | reason: "generateMmapAuxMem is not implemented on the current architecture\n" ); |
132 | } |
133 | |
134 | // Moves argument registers into other registers that won't get clobbered |
135 | // while making syscalls. The code generated by this function may clobber |
136 | // registers. |
137 | virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const { |
138 | report_fatal_error(reason: "moveArgumentRegisters is not implemented on the " |
139 | "current architecture\n" ); |
140 | } |
141 | |
142 | // Generates code to move argument registers, unmap memory above and below the |
143 | // snippet, and map the auxiliary memory into the subprocess. The code |
144 | // generated by this function may clobber registers. |
145 | virtual std::vector<MCInst> generateMemoryInitialSetup() const { |
146 | report_fatal_error(reason: "generateMemoryInitialSetup is not supported on the " |
147 | "current architecture\n" ); |
148 | } |
149 | |
150 | // Returns true if all features are available that are required by Opcode. |
151 | virtual bool isOpcodeAvailable(unsigned Opcode, |
152 | const FeatureBitset &Features) const { |
153 | return IsOpcodeAvailable(Opcode, Features); |
154 | } |
155 | |
156 | // Sets the stack register to the auxiliary memory so that operations |
157 | // requiring the stack can be formed (e.g., setting large registers). The code |
158 | // generated by this function may clobber registers. |
159 | virtual std::vector<MCInst> setStackRegisterToAuxMem() const { |
160 | report_fatal_error(reason: "setStackRegisterToAuxMem is not implemented on the " |
161 | "current architectures" ); |
162 | } |
163 | |
164 | virtual intptr_t getAuxiliaryMemoryStartAddress() const { |
165 | report_fatal_error(reason: "getAuxiliaryMemoryStartAddress is not implemented on " |
166 | "the current architecture" ); |
167 | } |
168 | |
169 | // Generates the necessary ioctl system calls to configure the perf counters. |
170 | // The code generated by this function preserves all registers if the |
171 | // parameter SaveRegisters is set to true. |
172 | virtual std::vector<MCInst> configurePerfCounter(long Request, |
173 | bool SaveRegisters) const { |
174 | report_fatal_error( |
175 | reason: "configurePerfCounter is not implemented on the current architecture" ); |
176 | } |
177 | |
178 | // Gets the ABI dependent registers that are used to pass arguments in a |
179 | // function call. |
180 | virtual std::vector<unsigned> getArgumentRegisters() const { |
181 | report_fatal_error( |
182 | reason: "getArgumentRegisters is not implemented on the current architecture" ); |
183 | }; |
184 | |
185 | // Gets the registers that might potentially need to be saved by while |
186 | // the setup in the test harness executes. |
187 | virtual std::vector<unsigned> getRegistersNeedSaving() const { |
188 | report_fatal_error(reason: "getRegistersNeedSaving is not implemented on the " |
189 | "current architecture" ); |
190 | }; |
191 | |
192 | // Returns the register pointing to scratch memory, or 0 if this target |
193 | // does not support memory operands. The benchmark function uses the |
194 | // default calling convention. |
195 | virtual unsigned getScratchMemoryRegister(const Triple &) const { return 0; } |
196 | |
197 | // Fills memory operands with references to the address at [Reg] + Offset. |
198 | virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, |
199 | unsigned Offset) const { |
200 | llvm_unreachable( |
201 | "fillMemoryOperands() requires getScratchMemoryRegister() > 0" ); |
202 | } |
203 | |
204 | // Returns a counter usable as a loop counter. |
205 | virtual unsigned getDefaultLoopCounterRegister(const Triple &) const { |
206 | return 0; |
207 | } |
208 | |
209 | // Adds the code to decrement the loop counter and |
210 | virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB, |
211 | MachineBasicBlock &TargetMBB, |
212 | const MCInstrInfo &MII, |
213 | unsigned LoopRegister) const { |
214 | llvm_unreachable("decrementLoopCounterAndBranch() requires " |
215 | "getLoopCounterRegister() > 0" ); |
216 | } |
217 | |
218 | // Returns a list of unavailable registers. |
219 | // Targets can use this to prevent some registers to be automatically selected |
220 | // for use in snippets. |
221 | virtual ArrayRef<unsigned> getUnavailableRegisters() const { return {}; } |
222 | |
223 | // Returns the maximum number of bytes a load/store instruction can access at |
224 | // once. This is typically the size of the largest register available on the |
225 | // processor. Note that this only used as a hint to generate independant |
226 | // load/stores to/from memory, so the exact returned value does not really |
227 | // matter as long as it's large enough. |
228 | virtual unsigned getMaxMemoryAccessSize() const { return 0; } |
229 | |
230 | // Assigns a random operand of the right type to variable Var. |
231 | // The target is responsible for handling any operand starting from |
232 | // OPERAND_FIRST_TARGET. |
233 | virtual Error randomizeTargetMCOperand(const Instruction &Instr, |
234 | const Variable &Var, |
235 | MCOperand &AssignedValue, |
236 | const BitVector &ForbiddenRegs) const { |
237 | return make_error<Failure>( |
238 | Args: "targets with target-specific operands should implement this" ); |
239 | } |
240 | |
241 | // Returns true if this instruction is supported as a back-to-back |
242 | // instructions. |
243 | // FIXME: Eventually we should discover this dynamically. |
244 | virtual bool allowAsBackToBack(const Instruction &Instr) const { |
245 | return true; |
246 | } |
247 | |
248 | // For some instructions, it is interesting to measure how it's performance |
249 | // characteristics differ depending on it's operands. |
250 | // This allows us to produce all the interesting variants. |
251 | virtual std::vector<InstructionTemplate> |
252 | generateInstructionVariants(const Instruction &Instr, |
253 | unsigned MaxConfigsPerOpcode) const { |
254 | // By default, we're happy with whatever randomizer will give us. |
255 | return {&Instr}; |
256 | } |
257 | |
258 | // Checks hardware and software support for current benchmark mode. |
259 | // Returns an error if the target host does not have support to run the |
260 | // benchmark. |
261 | virtual Error checkFeatureSupport() const { return Error::success(); } |
262 | |
263 | // Creates a snippet generator for the given mode. |
264 | std::unique_ptr<SnippetGenerator> |
265 | createSnippetGenerator(Benchmark::ModeE Mode, |
266 | const LLVMState &State, |
267 | const SnippetGenerator::Options &Opts) const; |
268 | // Creates a benchmark runner for the given mode. |
269 | Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner( |
270 | Benchmark::ModeE Mode, const LLVMState &State, |
271 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
272 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
273 | unsigned BenchmarkRepeatCount, |
274 | ArrayRef<ValidationEvent> ValidationCounters, |
275 | Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const; |
276 | |
277 | // Returns the ExegesisTarget for the given triple or nullptr if the target |
278 | // does not exist. |
279 | static const ExegesisTarget *lookup(Triple TT); |
280 | // Returns the default (unspecialized) ExegesisTarget. |
281 | static const ExegesisTarget &getDefault(); |
282 | // Registers a target. Not thread safe. |
283 | static void registerTarget(ExegesisTarget *T); |
284 | |
285 | virtual ~ExegesisTarget(); |
286 | |
287 | // Returns the Pfm counters for the given CPU (or the default if no pfm |
288 | // counters are defined for this CPU). |
289 | const PfmCountersInfo &getPfmCounters(StringRef CpuName) const; |
290 | |
291 | // Returns dummy Pfm counters which can be used to execute generated snippet |
292 | // without access to performance counters. |
293 | const PfmCountersInfo &getDummyPfmCounters() const; |
294 | |
295 | // Saves the CPU state that needs to be preserved when running a benchmark, |
296 | // and returns and RAII object that restores the state on destruction. |
297 | // By default no state is preserved. |
298 | struct SavedState { |
299 | virtual ~SavedState(); |
300 | }; |
301 | virtual std::unique_ptr<SavedState> withSavedState() const { |
302 | return std::make_unique<SavedState>(); |
303 | } |
304 | |
305 | private: |
306 | virtual bool matchesArch(Triple::ArchType Arch) const = 0; |
307 | |
308 | // Targets can implement their own snippet generators/benchmarks runners by |
309 | // implementing these. |
310 | std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator( |
311 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
312 | std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator( |
313 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
314 | std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( |
315 | const LLVMState &State, Benchmark::ModeE Mode, |
316 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
317 | Benchmark::ResultAggregationModeE ResultAggMode, |
318 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
319 | ArrayRef<ValidationEvent> ValidationCounters, |
320 | unsigned BenchmarkRepeatCount) const; |
321 | std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner( |
322 | const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
323 | Benchmark::ResultAggregationModeE ResultAggMode, |
324 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
325 | ArrayRef<ValidationEvent> ValidationCounters) const; |
326 | |
327 | const ExegesisTarget *Next = nullptr; |
328 | const ArrayRef<CpuAndPfmCounters> CpuPfmCounters; |
329 | const OpcodeAvailabilityChecker IsOpcodeAvailable; |
330 | }; |
331 | |
332 | } // namespace exegesis |
333 | } // namespace llvm |
334 | |
335 | #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
336 | |