1 | //===-- Target.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// |
11 | /// Classes that handle the creation of target-specific objects. This is |
12 | /// similar to Target/TargetRegistry. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
17 | #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
18 | |
19 | #include "BenchmarkResult.h" |
20 | #include "BenchmarkRunner.h" |
21 | #include "Error.h" |
22 | #include "LlvmState.h" |
23 | #include "PerfHelper.h" |
24 | #include "SnippetGenerator.h" |
25 | #include "ValidationEvent.h" |
26 | #include "llvm/CodeGen/TargetPassConfig.h" |
27 | #include "llvm/IR/CallingConv.h" |
28 | #include "llvm/IR/LegacyPassManager.h" |
29 | #include "llvm/MC/MCInst.h" |
30 | #include "llvm/MC/MCRegisterInfo.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/Error.h" |
33 | #include "llvm/TargetParser/SubtargetFeature.h" |
34 | #include "llvm/TargetParser/Triple.h" |
35 | |
36 | namespace llvm { |
37 | namespace exegesis { |
38 | |
39 | extern cl::OptionCategory Options; |
40 | extern cl::OptionCategory BenchmarkOptions; |
41 | extern cl::OptionCategory AnalysisOptions; |
42 | |
43 | struct PfmCountersInfo { |
44 | // An optional name of a performance counter that can be used to measure |
45 | // cycles. |
46 | const char *CycleCounter; |
47 | |
48 | // An optional name of a performance counter that can be used to measure |
49 | // uops. |
50 | const char *UopsCounter; |
51 | |
52 | // An IssueCounter specifies how to measure uops issued to specific proc |
53 | // resources. |
54 | struct IssueCounter { |
55 | const char *Counter; |
56 | // The name of the ProcResource that this counter measures. |
57 | const char *ProcResName; |
58 | }; |
59 | // An optional list of IssueCounters. |
60 | const IssueCounter *IssueCounters; |
61 | unsigned NumIssueCounters; |
62 | |
63 | const std::pair<ValidationEvent, const char *> *ValidationEvents; |
64 | unsigned NumValidationEvents; |
65 | |
66 | static const PfmCountersInfo Default; |
67 | static const PfmCountersInfo Dummy; |
68 | }; |
69 | |
70 | struct CpuAndPfmCounters { |
71 | const char *CpuName; |
72 | const PfmCountersInfo *PCI; |
73 | bool operator<(StringRef S) const { return StringRef(CpuName) < S; } |
74 | }; |
75 | |
76 | class ExegesisTarget { |
77 | public: |
78 | typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &); |
79 | ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters, |
80 | OpcodeAvailabilityChecker IsOpcodeAvailable) |
81 | : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {} |
82 | |
83 | // Targets can use this to create target-specific perf counters. |
84 | virtual Expected<std::unique_ptr<pfm::CounterGroup>> |
85 | createCounter(StringRef CounterName, const LLVMState &State, |
86 | ArrayRef<const char *> ValidationCounters, |
87 | const pid_t ProcessID = 0) const; |
88 | |
89 | // Targets can use this to add target-specific passes in assembleToStream(); |
90 | virtual void addTargetSpecificPasses(PassManagerBase &PM) const {} |
91 | |
92 | // Generates code to move a constant into a the given register. |
93 | // Precondition: Value must fit into Reg. |
94 | virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, |
95 | MCRegister Reg, |
96 | const APInt &Value) const = 0; |
97 | |
98 | // Generates the code for the lower munmap call. The code generated by this |
99 | // function may clobber registers. |
100 | virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const { |
101 | report_fatal_error( |
102 | reason: "generateLowerMunmap is not implemented on the current architecture" ); |
103 | } |
104 | |
105 | // Generates the upper munmap call. The code generated by this function may |
106 | // clobber registers. |
107 | virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const { |
108 | report_fatal_error( |
109 | reason: "generateUpperMunmap is not implemented on the current architecture" ); |
110 | } |
111 | |
112 | // Generates the code for an exit syscall. The code generated by this function |
113 | // may clobber registers. |
114 | virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const { |
115 | report_fatal_error( |
116 | reason: "generateExitSyscall is not implemented on the current architecture" ); |
117 | } |
118 | |
119 | // Generates the code to mmap a region of code. The code generated by this |
120 | // function may clobber registers. |
121 | virtual std::vector<MCInst> |
122 | generateMmap(uintptr_t Address, size_t Length, |
123 | uintptr_t FileDescriptorAddress) const { |
124 | report_fatal_error( |
125 | reason: "generateMmap is not implemented on the current architecture" ); |
126 | } |
127 | |
128 | // Generates the mmap code for the aux memory. The code generated by this |
129 | // function may clobber registers. |
130 | virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const { |
131 | report_fatal_error( |
132 | reason: "generateMmapAuxMem is not implemented on the current architecture\n" ); |
133 | } |
134 | |
135 | // Moves argument registers into other registers that won't get clobbered |
136 | // while making syscalls. The code generated by this function may clobber |
137 | // registers. |
138 | virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const { |
139 | report_fatal_error(reason: "moveArgumentRegisters is not implemented on the " |
140 | "current architecture\n" ); |
141 | } |
142 | |
143 | // Generates code to move argument registers, unmap memory above and below the |
144 | // snippet, and map the auxiliary memory into the subprocess. The code |
145 | // generated by this function may clobber registers. |
146 | virtual std::vector<MCInst> generateMemoryInitialSetup() const { |
147 | report_fatal_error(reason: "generateMemoryInitialSetup is not supported on the " |
148 | "current architecture\n" ); |
149 | } |
150 | |
151 | // Returns true if all features are available that are required by Opcode. |
152 | virtual bool isOpcodeAvailable(unsigned Opcode, |
153 | const FeatureBitset &Features) const { |
154 | return IsOpcodeAvailable(Opcode, Features); |
155 | } |
156 | |
157 | virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, |
158 | unsigned Opcode) const; |
159 | |
160 | // Sets the stack register to the auxiliary memory so that operations |
161 | // requiring the stack can be formed (e.g., setting large registers). The code |
162 | // generated by this function may clobber registers. |
163 | virtual std::vector<MCInst> setStackRegisterToAuxMem() const { |
164 | report_fatal_error(reason: "setStackRegisterToAuxMem is not implemented on the " |
165 | "current architectures" ); |
166 | } |
167 | |
168 | virtual uintptr_t getAuxiliaryMemoryStartAddress() const { |
169 | report_fatal_error(reason: "getAuxiliaryMemoryStartAddress is not implemented on " |
170 | "the current architecture" ); |
171 | } |
172 | |
173 | // Generates the necessary ioctl system calls to configure the perf counters. |
174 | // The code generated by this function preserves all registers if the |
175 | // parameter SaveRegisters is set to true. |
176 | virtual std::vector<MCInst> configurePerfCounter(long Request, |
177 | bool SaveRegisters) const { |
178 | report_fatal_error( |
179 | reason: "configurePerfCounter is not implemented on the current architecture" ); |
180 | } |
181 | |
182 | // Gets the ABI dependent registers that are used to pass arguments in a |
183 | // function call. |
184 | virtual std::vector<MCRegister> getArgumentRegisters() const { |
185 | report_fatal_error( |
186 | reason: "getArgumentRegisters is not implemented on the current architecture" ); |
187 | }; |
188 | |
189 | // Gets the registers that might potentially need to be saved by while |
190 | // the setup in the test harness executes. |
191 | virtual std::vector<MCRegister> getRegistersNeedSaving() const { |
192 | report_fatal_error(reason: "getRegistersNeedSaving is not implemented on the " |
193 | "current architecture" ); |
194 | }; |
195 | |
196 | // Returns the register pointing to scratch memory, or 0 if this target |
197 | // does not support memory operands. The benchmark function uses the |
198 | // default calling convention. |
199 | virtual MCRegister getScratchMemoryRegister(const Triple &) const { |
200 | return MCRegister(); |
201 | } |
202 | |
203 | // Fills memory operands with references to the address at [Reg] + Offset. |
204 | virtual void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg, |
205 | unsigned Offset) const { |
206 | llvm_unreachable( |
207 | "fillMemoryOperands() requires getScratchMemoryRegister() > 0" ); |
208 | } |
209 | |
210 | // Returns a counter usable as a loop counter. |
211 | virtual MCRegister getDefaultLoopCounterRegister(const Triple &) const { |
212 | return MCRegister(); |
213 | } |
214 | |
215 | // Adds the code to decrement the loop counter and |
216 | virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB, |
217 | MachineBasicBlock &TargetMBB, |
218 | const MCInstrInfo &MII, |
219 | MCRegister LoopRegister) const { |
220 | llvm_unreachable("decrementLoopCounterAndBranch() requires " |
221 | "getLoopCounterRegister() > 0" ); |
222 | } |
223 | |
224 | // Returns a list of unavailable registers. |
225 | // Targets can use this to prevent some registers to be automatically selected |
226 | // for use in snippets. |
227 | virtual ArrayRef<MCPhysReg> getUnavailableRegisters() const { return {}; } |
228 | |
229 | // Returns the maximum number of bytes a load/store instruction can access at |
230 | // once. This is typically the size of the largest register available on the |
231 | // processor. Note that this only used as a hint to generate independant |
232 | // load/stores to/from memory, so the exact returned value does not really |
233 | // matter as long as it's large enough. |
234 | virtual unsigned getMaxMemoryAccessSize() const { return 0; } |
235 | |
236 | // Assigns a random operand of the right type to variable Var. |
237 | // The target is responsible for handling any operand starting from |
238 | // OPERAND_FIRST_TARGET. |
239 | virtual Error randomizeTargetMCOperand(const Instruction &Instr, |
240 | const Variable &Var, |
241 | MCOperand &AssignedValue, |
242 | const BitVector &ForbiddenRegs) const { |
243 | return make_error<Failure>( |
244 | Args: "targets with target-specific operands should implement this" ); |
245 | } |
246 | |
247 | // Returns true if this instruction is supported as a back-to-back |
248 | // instructions. |
249 | // FIXME: Eventually we should discover this dynamically. |
250 | virtual bool allowAsBackToBack(const Instruction &Instr) const { |
251 | return true; |
252 | } |
253 | |
254 | // For some instructions, it is interesting to measure how it's performance |
255 | // characteristics differ depending on it's operands. |
256 | // This allows us to produce all the interesting variants. |
257 | virtual std::vector<InstructionTemplate> |
258 | generateInstructionVariants(const Instruction &Instr, |
259 | unsigned MaxConfigsPerOpcode) const { |
260 | // By default, we're happy with whatever randomizer will give us. |
261 | return {&Instr}; |
262 | } |
263 | |
264 | // Checks hardware and software support for current benchmark mode. |
265 | // Returns an error if the target host does not have support to run the |
266 | // benchmark. |
267 | virtual Error checkFeatureSupport() const { return Error::success(); } |
268 | |
269 | // Creates a snippet generator for the given mode. |
270 | std::unique_ptr<SnippetGenerator> |
271 | createSnippetGenerator(Benchmark::ModeE Mode, |
272 | const LLVMState &State, |
273 | const SnippetGenerator::Options &Opts) const; |
274 | // Creates a benchmark runner for the given mode. |
275 | Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner( |
276 | Benchmark::ModeE Mode, const LLVMState &State, |
277 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
278 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
279 | unsigned BenchmarkRepeatCount, |
280 | ArrayRef<ValidationEvent> ValidationCounters, |
281 | Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const; |
282 | |
283 | // Returns the ExegesisTarget for the given triple or nullptr if the target |
284 | // does not exist. |
285 | static const ExegesisTarget *lookup(Triple TT); |
286 | // Returns the default (unspecialized) ExegesisTarget. |
287 | static const ExegesisTarget &getDefault(); |
288 | // Registers a target. Not thread safe. |
289 | static void registerTarget(ExegesisTarget *T); |
290 | |
291 | virtual ~ExegesisTarget(); |
292 | |
293 | // Returns the Pfm counters for the given CPU (or the default if no pfm |
294 | // counters are defined for this CPU). |
295 | const PfmCountersInfo &getPfmCounters(StringRef CpuName) const; |
296 | |
297 | // Returns dummy Pfm counters which can be used to execute generated snippet |
298 | // without access to performance counters. |
299 | const PfmCountersInfo &getDummyPfmCounters() const; |
300 | |
301 | // Saves the CPU state that needs to be preserved when running a benchmark, |
302 | // and returns and RAII object that restores the state on destruction. |
303 | // By default no state is preserved. |
304 | struct SavedState { |
305 | virtual ~SavedState(); |
306 | }; |
307 | virtual std::unique_ptr<SavedState> withSavedState() const { |
308 | return std::make_unique<SavedState>(); |
309 | } |
310 | |
311 | private: |
312 | virtual bool matchesArch(Triple::ArchType Arch) const = 0; |
313 | |
314 | // Targets can implement their own snippet generators/benchmarks runners by |
315 | // implementing these. |
316 | std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator( |
317 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
318 | std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator( |
319 | const LLVMState &State, const SnippetGenerator::Options &Opts) const; |
320 | std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( |
321 | const LLVMState &State, Benchmark::ModeE Mode, |
322 | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
323 | Benchmark::ResultAggregationModeE ResultAggMode, |
324 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
325 | ArrayRef<ValidationEvent> ValidationCounters, |
326 | unsigned BenchmarkRepeatCount) const; |
327 | std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner( |
328 | const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector, |
329 | Benchmark::ResultAggregationModeE ResultAggMode, |
330 | BenchmarkRunner::ExecutionModeE ExecutionMode, |
331 | ArrayRef<ValidationEvent> ValidationCounters) const; |
332 | |
333 | const ExegesisTarget *Next = nullptr; |
334 | const ArrayRef<CpuAndPfmCounters> CpuPfmCounters; |
335 | const OpcodeAvailabilityChecker IsOpcodeAvailable; |
336 | }; |
337 | |
338 | } // namespace exegesis |
339 | } // namespace llvm |
340 | |
341 | #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H |
342 | |