1 | //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Measures execution properties (latencies/uops) of an instruction. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "lib/Analysis.h" |
15 | #include "lib/BenchmarkResult.h" |
16 | #include "lib/BenchmarkRunner.h" |
17 | #include "lib/Clustering.h" |
18 | #include "lib/CodeTemplate.h" |
19 | #include "lib/Error.h" |
20 | #include "lib/LlvmState.h" |
21 | #include "lib/PerfHelper.h" |
22 | #include "lib/ProgressMeter.h" |
23 | #include "lib/ResultAggregator.h" |
24 | #include "lib/SnippetFile.h" |
25 | #include "lib/SnippetRepetitor.h" |
26 | #include "lib/Target.h" |
27 | #include "lib/TargetSelect.h" |
28 | #include "lib/ValidationEvent.h" |
29 | #include "llvm/ADT/StringExtras.h" |
30 | #include "llvm/ADT/Twine.h" |
31 | #include "llvm/MC/MCInstBuilder.h" |
32 | #include "llvm/MC/MCObjectFileInfo.h" |
33 | #include "llvm/MC/MCParser/MCAsmParser.h" |
34 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
35 | #include "llvm/MC/MCRegisterInfo.h" |
36 | #include "llvm/MC/MCSubtargetInfo.h" |
37 | #include "llvm/MC/TargetRegistry.h" |
38 | #include "llvm/Object/ObjectFile.h" |
39 | #include "llvm/Support/CommandLine.h" |
40 | #include "llvm/Support/FileSystem.h" |
41 | #include "llvm/Support/Format.h" |
42 | #include "llvm/Support/InitLLVM.h" |
43 | #include "llvm/Support/Path.h" |
44 | #include "llvm/Support/SourceMgr.h" |
45 | #include "llvm/Support/TargetSelect.h" |
46 | #include "llvm/TargetParser/Host.h" |
47 | #include <algorithm> |
48 | #include <string> |
49 | |
50 | namespace llvm { |
51 | namespace exegesis { |
52 | |
53 | static cl::opt<int> OpcodeIndex( |
54 | "opcode-index" , |
55 | cl::desc("opcode to measure, by index, or -1 to measure all opcodes" ), |
56 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
57 | |
58 | static cl::opt<std::string> |
59 | OpcodeNames("opcode-name" , |
60 | cl::desc("comma-separated list of opcodes to measure, by name" ), |
61 | cl::cat(BenchmarkOptions), cl::init(Val: "" )); |
62 | |
63 | static cl::opt<std::string> SnippetsFile("snippets-file" , |
64 | cl::desc("code snippets to measure" ), |
65 | cl::cat(BenchmarkOptions), |
66 | cl::init(Val: "" )); |
67 | |
68 | static cl::opt<std::string> |
69 | BenchmarkFile("benchmarks-file" , |
70 | cl::desc("File to read (analysis mode) or write " |
71 | "(latency/uops/inverse_throughput modes) benchmark " |
72 | "results. “-” uses stdin/stdout." ), |
73 | cl::cat(Options), cl::init(Val: "" )); |
74 | |
75 | static cl::opt<Benchmark::ModeE> BenchmarkMode( |
76 | "mode" , cl::desc("the mode to run" ), cl::cat(Options), |
77 | cl::values(clEnumValN(Benchmark::Latency, "latency" , "Instruction Latency" ), |
78 | clEnumValN(Benchmark::InverseThroughput, "inverse_throughput" , |
79 | "Instruction Inverse Throughput" ), |
80 | clEnumValN(Benchmark::Uops, "uops" , "Uop Decomposition" ), |
81 | // When not asking for a specific benchmark mode, |
82 | // we'll analyse the results. |
83 | clEnumValN(Benchmark::Unknown, "analysis" , "Analysis" ))); |
84 | |
85 | static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode( |
86 | "result-aggregation-mode" , cl::desc("How to aggregate multi-values result" ), |
87 | cl::cat(BenchmarkOptions), |
88 | cl::values(clEnumValN(Benchmark::Min, "min" , "Keep min reading" ), |
89 | clEnumValN(Benchmark::Max, "max" , "Keep max reading" ), |
90 | clEnumValN(Benchmark::Mean, "mean" , |
91 | "Compute mean of all readings" ), |
92 | clEnumValN(Benchmark::MinVariance, "min-variance" , |
93 | "Keep readings set with min-variance" )), |
94 | cl::init(Val: Benchmark::Min)); |
95 | |
96 | static cl::opt<Benchmark::RepetitionModeE> RepetitionMode( |
97 | "repetition-mode" , cl::desc("how to repeat the instruction snippet" ), |
98 | cl::cat(BenchmarkOptions), |
99 | cl::values( |
100 | clEnumValN(Benchmark::Duplicate, "duplicate" , "Duplicate the snippet" ), |
101 | clEnumValN(Benchmark::Loop, "loop" , "Loop over the snippet" ), |
102 | clEnumValN(Benchmark::AggregateMin, "min" , |
103 | "All of the above and take the minimum of measurements" ), |
104 | clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate" , |
105 | "Middle half duplicate mode" ), |
106 | clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop" , |
107 | "Middle half loop mode" )), |
108 | cl::init(Val: Benchmark::Duplicate)); |
109 | |
110 | static cl::opt<bool> BenchmarkMeasurementsPrintProgress( |
111 | "measurements-print-progress" , |
112 | cl::desc("Produce progress indicator when performing measurements" ), |
113 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
114 | |
115 | static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector( |
116 | "benchmark-phase" , |
117 | cl::desc( |
118 | "it is possible to stop the benchmarking process after some phase" ), |
119 | cl::cat(BenchmarkOptions), |
120 | cl::values( |
121 | clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet" , |
122 | "Only generate the minimal instruction sequence" ), |
123 | clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet, |
124 | "prepare-and-assemble-snippet" , |
125 | "Same as prepare-snippet, but also dumps an excerpt of the " |
126 | "sequence (hex encoded)" ), |
127 | clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode, |
128 | "assemble-measured-code" , |
129 | "Same as prepare-and-assemble-snippet, but also creates the " |
130 | "full sequence " |
131 | "that can be dumped to a file using --dump-object-to-disk" ), |
132 | clEnumValN( |
133 | BenchmarkPhaseSelectorE::Measure, "measure" , |
134 | "Same as prepare-measured-code, but also runs the measurement " |
135 | "(default)" )), |
136 | cl::init(Val: BenchmarkPhaseSelectorE::Measure)); |
137 | |
138 | static cl::opt<bool> |
139 | UseDummyPerfCounters("use-dummy-perf-counters" , |
140 | cl::desc("Do not read real performance counters, use " |
141 | "dummy values (for testing)" ), |
142 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
143 | |
144 | static cl::opt<unsigned> |
145 | MinInstructions("min-instructions" , |
146 | cl::desc("The minimum number of instructions that should " |
147 | "be included in the snippet" ), |
148 | cl::cat(BenchmarkOptions), cl::init(Val: 10000)); |
149 | |
150 | static cl::opt<unsigned> |
151 | LoopBodySize("loop-body-size" , |
152 | cl::desc("when repeating the instruction snippet by looping " |
153 | "over it, duplicate the snippet until the loop body " |
154 | "contains at least this many instruction" ), |
155 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
156 | |
157 | static cl::opt<unsigned> MaxConfigsPerOpcode( |
158 | "max-configs-per-opcode" , |
159 | cl::desc( |
160 | "allow to snippet generator to generate at most that many configs" ), |
161 | cl::cat(BenchmarkOptions), cl::init(Val: 1)); |
162 | |
163 | static cl::opt<bool> IgnoreInvalidSchedClass( |
164 | "ignore-invalid-sched-class" , |
165 | cl::desc("ignore instructions that do not define a sched class" ), |
166 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
167 | |
168 | static cl::opt<BenchmarkFilter> AnalysisSnippetFilter( |
169 | "analysis-filter" , cl::desc("Filter the benchmarks before analysing them" ), |
170 | cl::cat(BenchmarkOptions), |
171 | cl::values( |
172 | clEnumValN(BenchmarkFilter::All, "all" , |
173 | "Keep all benchmarks (default)" ), |
174 | clEnumValN(BenchmarkFilter::RegOnly, "reg-only" , |
175 | "Keep only those benchmarks that do *NOT* involve memory" ), |
176 | clEnumValN(BenchmarkFilter::WithMem, "mem-only" , |
177 | "Keep only the benchmarks that *DO* involve memory" )), |
178 | cl::init(Val: BenchmarkFilter::All)); |
179 | |
180 | static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm( |
181 | "analysis-clustering" , cl::desc("the clustering algorithm to use" ), |
182 | cl::cat(AnalysisOptions), |
183 | cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan" , |
184 | "use DBSCAN/OPTICS algorithm" ), |
185 | clEnumValN(BenchmarkClustering::Naive, "naive" , |
186 | "one cluster per opcode" )), |
187 | cl::init(Val: BenchmarkClustering::Dbscan)); |
188 | |
189 | static cl::opt<unsigned> AnalysisDbscanNumPoints( |
190 | "analysis-numpoints" , |
191 | cl::desc("minimum number of points in an analysis cluster (dbscan only)" ), |
192 | cl::cat(AnalysisOptions), cl::init(Val: 3)); |
193 | |
194 | static cl::opt<float> AnalysisClusteringEpsilon( |
195 | "analysis-clustering-epsilon" , |
196 | cl::desc("epsilon for benchmark point clustering" ), |
197 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
198 | |
199 | static cl::opt<float> AnalysisInconsistencyEpsilon( |
200 | "analysis-inconsistency-epsilon" , |
201 | cl::desc("epsilon for detection of when the cluster is different from the " |
202 | "LLVM schedule profile values" ), |
203 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
204 | |
205 | static cl::opt<std::string> |
206 | AnalysisClustersOutputFile("analysis-clusters-output-file" , cl::desc("" ), |
207 | cl::cat(AnalysisOptions), cl::init(Val: "" )); |
208 | static cl::opt<std::string> |
209 | AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file" , |
210 | cl::desc("" ), cl::cat(AnalysisOptions), |
211 | cl::init(Val: "" )); |
212 | |
213 | static cl::opt<bool> AnalysisDisplayUnstableOpcodes( |
214 | "analysis-display-unstable-clusters" , |
215 | cl::desc("if there is more than one benchmark for an opcode, said " |
216 | "benchmarks may end up not being clustered into the same cluster " |
217 | "if the measured performance characteristics are different. by " |
218 | "default all such opcodes are filtered out. this flag will " |
219 | "instead show only such unstable opcodes" ), |
220 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
221 | |
222 | static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu( |
223 | "analysis-override-benchmark-triple-and-cpu" , |
224 | cl::desc("By default, we analyze the benchmarks for the triple/CPU they " |
225 | "were measured for, but if you want to analyze them for some " |
226 | "other combination (specified via -mtriple/-mcpu), you can " |
227 | "pass this flag." ), |
228 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
229 | |
230 | static cl::opt<std::string> |
231 | TripleName("mtriple" , |
232 | cl::desc("Target triple. See -version for available targets" ), |
233 | cl::cat(Options)); |
234 | |
235 | static cl::opt<std::string> |
236 | MCPU("mcpu" , |
237 | cl::desc("Target a specific cpu type (-mcpu=help for details)" ), |
238 | cl::value_desc("cpu-name" ), cl::cat(Options), cl::init(Val: "native" )); |
239 | |
240 | static cl::opt<std::string> |
241 | DumpObjectToDisk("dump-object-to-disk" , |
242 | cl::desc("dumps the generated benchmark object to disk " |
243 | "and prints a message to access it" ), |
244 | cl::ValueOptional, cl::cat(BenchmarkOptions)); |
245 | |
246 | static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode( |
247 | "execution-mode" , |
248 | cl::desc("Selects the execution mode to use for running snippets" ), |
249 | cl::cat(BenchmarkOptions), |
250 | cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess, |
251 | "inprocess" , |
252 | "Executes the snippets within the same process" ), |
253 | clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess, |
254 | "subprocess" , |
255 | "Spawns a subprocess for each snippet execution, " |
256 | "allows for the use of memory annotations" )), |
257 | cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess)); |
258 | |
259 | static cl::opt<unsigned> BenchmarkRepeatCount( |
260 | "benchmark-repeat-count" , |
261 | cl::desc("The number of times to repeat measurements on the benchmark k " |
262 | "before aggregating the results" ), |
263 | cl::cat(BenchmarkOptions), cl::init(Val: 30)); |
264 | |
265 | static cl::list<ValidationEvent> ValidationCounters( |
266 | "validation-counter" , |
267 | cl::desc( |
268 | "The name of a validation counter to run concurrently with the main " |
269 | "counter to validate benchmarking assumptions" ), |
270 | cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions()); |
271 | |
272 | static ExitOnError ExitOnErr("llvm-exegesis error: " ); |
273 | |
274 | // Helper function that logs the error(s) and exits. |
275 | template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { |
276 | ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...)); |
277 | } |
278 | |
279 | // Check Err. If it's in a failure state log the file error(s) and exit. |
280 | static void ExitOnFileError(const Twine &FileName, Error Err) { |
281 | if (Err) { |
282 | ExitOnErr(createFileError(F: FileName, E: std::move(Err))); |
283 | } |
284 | } |
285 | |
286 | // Check E. If it's in a success state then return the contained value. |
287 | // If it's in a failure state log the file error(s) and exit. |
288 | template <typename T> |
289 | T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { |
290 | ExitOnFileError(FileName, E.takeError()); |
291 | return std::move(*E); |
292 | } |
293 | |
294 | // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, |
295 | // and returns the opcode indices or {} if snippets should be read from |
296 | // `SnippetsFile`. |
297 | static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { |
298 | const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) + |
299 | (OpcodeIndex == 0 ? 0 : 1) + |
300 | (SnippetsFile.empty() ? 0 : 1); |
301 | const auto &ET = State.getExegesisTarget(); |
302 | const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits(); |
303 | |
304 | if (NumSetFlags != 1) { |
305 | ExitOnErr.setBanner("llvm-exegesis: " ); |
306 | ExitWithError(Args: "please provide one and only one of 'opcode-index', " |
307 | "'opcode-name' or 'snippets-file'" ); |
308 | } |
309 | if (!SnippetsFile.empty()) |
310 | return {}; |
311 | if (OpcodeIndex > 0) |
312 | return {static_cast<unsigned>(OpcodeIndex)}; |
313 | if (OpcodeIndex < 0) { |
314 | std::vector<unsigned> Result; |
315 | unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes(); |
316 | Result.reserve(n: NumOpcodes); |
317 | for (unsigned I = 0, E = NumOpcodes; I < E; ++I) { |
318 | if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures)) |
319 | continue; |
320 | Result.push_back(x: I); |
321 | } |
322 | return Result; |
323 | } |
324 | // Resolve opcode name -> opcode. |
325 | const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned { |
326 | const auto &Map = State.getOpcodeNameToOpcodeIdxMapping(); |
327 | auto I = Map.find(Val: OpcodeName); |
328 | if (I != Map.end()) |
329 | return I->getSecond(); |
330 | return 0u; |
331 | }; |
332 | SmallVector<StringRef, 2> Pieces; |
333 | StringRef(OpcodeNames.getValue()) |
334 | .split(A&: Pieces, Separator: "," , /* MaxSplit */ -1, /* KeepEmpty */ false); |
335 | std::vector<unsigned> Result; |
336 | Result.reserve(n: Pieces.size()); |
337 | for (const StringRef &OpcodeName : Pieces) { |
338 | if (unsigned Opcode = ResolveName(OpcodeName)) |
339 | Result.push_back(x: Opcode); |
340 | else |
341 | ExitWithError(Args: Twine("unknown opcode " ).concat(Suffix: OpcodeName)); |
342 | } |
343 | return Result; |
344 | } |
345 | |
346 | // Generates code snippets for opcode `Opcode`. |
347 | static Expected<std::vector<BenchmarkCode>> |
348 | generateSnippets(const LLVMState &State, unsigned Opcode, |
349 | const BitVector &ForbiddenRegs) { |
350 | const Instruction &Instr = State.getIC().getInstr(Opcode); |
351 | const MCInstrDesc &InstrDesc = Instr.Description; |
352 | // Ignore instructions that we cannot run. |
353 | if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) |
354 | return make_error<Failure>( |
355 | Args: "Unsupported opcode: isPseudo/usesCustomInserter" ); |
356 | if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) |
357 | return make_error<Failure>(Args: "Unsupported opcode: isBranch/isIndirectBranch" ); |
358 | if (InstrDesc.isCall() || InstrDesc.isReturn()) |
359 | return make_error<Failure>(Args: "Unsupported opcode: isCall/isReturn" ); |
360 | |
361 | const std::vector<InstructionTemplate> InstructionVariants = |
362 | State.getExegesisTarget().generateInstructionVariants( |
363 | Instr, MaxConfigsPerOpcode); |
364 | |
365 | SnippetGenerator::Options SnippetOptions; |
366 | SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode; |
367 | const std::unique_ptr<SnippetGenerator> Generator = |
368 | State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State, |
369 | Opts: SnippetOptions); |
370 | if (!Generator) |
371 | ExitWithError(Args: "cannot create snippet generator" ); |
372 | |
373 | std::vector<BenchmarkCode> Benchmarks; |
374 | for (const InstructionTemplate &Variant : InstructionVariants) { |
375 | if (Benchmarks.size() >= MaxConfigsPerOpcode) |
376 | break; |
377 | if (auto Err = Generator->generateConfigurations(Variant, Benchmarks, |
378 | ExtraForbiddenRegs: ForbiddenRegs)) |
379 | return std::move(Err); |
380 | } |
381 | return Benchmarks; |
382 | } |
383 | |
384 | static void runBenchmarkConfigurations( |
385 | const LLVMState &State, ArrayRef<BenchmarkCode> Configurations, |
386 | ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, |
387 | const BenchmarkRunner &Runner) { |
388 | assert(!Configurations.empty() && "Don't have any configurations to run." ); |
389 | std::optional<raw_fd_ostream> FileOstr; |
390 | if (BenchmarkFile != "-" ) { |
391 | int ResultFD = 0; |
392 | // Create output file or open existing file and truncate it, once. |
393 | ExitOnErr(errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD, |
394 | Disp: sys::fs::CD_CreateAlways, |
395 | Flags: sys::fs::OF_TextWithCRLF))); |
396 | FileOstr.emplace(args&: ResultFD, args: true /*shouldClose*/); |
397 | } |
398 | raw_ostream &Ostr = FileOstr ? *FileOstr : outs(); |
399 | |
400 | std::optional<ProgressMeter<>> Meter; |
401 | if (BenchmarkMeasurementsPrintProgress) |
402 | Meter.emplace(args: Configurations.size()); |
403 | |
404 | SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions}; |
405 | if (RepetitionMode == Benchmark::MiddleHalfDuplicate || |
406 | RepetitionMode == Benchmark::MiddleHalfLoop) |
407 | MinInstructionCounts.push_back(Elt: MinInstructions * 2); |
408 | |
409 | for (const BenchmarkCode &Conf : Configurations) { |
410 | ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr); |
411 | SmallVector<Benchmark, 2> AllResults; |
412 | |
413 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : |
414 | Repetitors) { |
415 | for (unsigned IterationRepetitions : MinInstructionCounts) { |
416 | auto RC = ExitOnErr(Runner.getRunnableConfiguration( |
417 | Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor)); |
418 | std::optional<StringRef> DumpFile; |
419 | if (DumpObjectToDisk.getNumOccurrences()) |
420 | DumpFile = DumpObjectToDisk; |
421 | auto [Err, BenchmarkResult] = |
422 | Runner.runConfiguration(RC: std::move(RC), DumpFile); |
423 | if (Err) { |
424 | // Errors from executing the snippets are fine. |
425 | // All other errors are a framework issue and should fail. |
426 | if (!Err.isA<SnippetExecutionFailure>()) |
427 | ExitOnErr(std::move(Err)); |
428 | |
429 | BenchmarkResult.Error = toString(E: std::move(Err)); |
430 | } |
431 | AllResults.push_back(Elt: std::move(BenchmarkResult)); |
432 | } |
433 | } |
434 | |
435 | Benchmark &Result = AllResults.front(); |
436 | |
437 | // If any of our measurements failed, pretend they all have failed. |
438 | if (AllResults.size() > 1 && |
439 | any_of(Range&: AllResults, P: [](const Benchmark &R) { |
440 | return R.Measurements.empty(); |
441 | })) |
442 | Result.Measurements.clear(); |
443 | |
444 | std::unique_ptr<ResultAggregator> ResultAgg = |
445 | ResultAggregator::CreateAggregator(RepetitionMode); |
446 | ResultAgg->AggregateResults(Result, |
447 | OtherResults: ArrayRef<Benchmark>(AllResults).drop_front()); |
448 | |
449 | // With dummy counters, measurements are rather meaningless, |
450 | // so drop them altogether. |
451 | if (UseDummyPerfCounters) |
452 | Result.Measurements.clear(); |
453 | |
454 | ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr)); |
455 | } |
456 | } |
457 | |
458 | void benchmarkMain() { |
459 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && |
460 | !UseDummyPerfCounters) { |
461 | #ifndef HAVE_LIBPFM |
462 | ExitWithError( |
463 | Args: "benchmarking unavailable, LLVM was built without libpfm. You can " |
464 | "pass --benchmark-phase=... to skip the actual benchmarking or " |
465 | "--use-dummy-perf-counters to not query the kernel for real event " |
466 | "counts." ); |
467 | #else |
468 | if (pfm::pfmInitialize()) |
469 | ExitWithError("cannot initialize libpfm" ); |
470 | #endif |
471 | } |
472 | |
473 | InitializeAllExegesisTargets(); |
474 | #define LLVM_EXEGESIS(TargetName) \ |
475 | LLVMInitialize##TargetName##AsmPrinter(); \ |
476 | LLVMInitialize##TargetName##AsmParser(); |
477 | #include "llvm/Config/TargetExegesis.def" |
478 | |
479 | const LLVMState State = |
480 | ExitOnErr(LLVMState::Create(TripleName, CpuName: MCPU, Features: "" , UseDummyPerfCounters)); |
481 | |
482 | // Preliminary check to ensure features needed for requested |
483 | // benchmark mode are present on target CPU and/or OS. |
484 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) |
485 | ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); |
486 | |
487 | if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && |
488 | UseDummyPerfCounters) |
489 | ExitWithError(Args: "Dummy perf counters are not supported in the subprocess " |
490 | "execution mode." ); |
491 | |
492 | const std::unique_ptr<BenchmarkRunner> Runner = |
493 | ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( |
494 | Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode, |
495 | BenchmarkRepeatCount, ValidationCounters, ResultAggMode)); |
496 | if (!Runner) { |
497 | ExitWithError(Args: "cannot create benchmark runner" ); |
498 | } |
499 | |
500 | const auto Opcodes = getOpcodesOrDie(State); |
501 | std::vector<BenchmarkCode> Configurations; |
502 | |
503 | unsigned LoopRegister = |
504 | State.getExegesisTarget().getDefaultLoopCounterRegister( |
505 | State.getTargetMachine().getTargetTriple()); |
506 | |
507 | if (Opcodes.empty()) { |
508 | Configurations = ExitOnErr(readSnippets(State, Filename: SnippetsFile)); |
509 | for (const auto &Configuration : Configurations) { |
510 | if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess && |
511 | (Configuration.Key.MemoryMappings.size() != 0 || |
512 | Configuration.Key.MemoryValues.size() != 0 || |
513 | Configuration.Key.SnippetAddress != 0)) |
514 | ExitWithError(Args: "Memory and snippet address annotations are only " |
515 | "supported in subprocess " |
516 | "execution mode" ); |
517 | } |
518 | LoopRegister = Configurations[0].Key.LoopRegister; |
519 | } |
520 | |
521 | SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors; |
522 | if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin) |
523 | Repetitors.emplace_back( |
524 | Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister)); |
525 | else { |
526 | for (Benchmark::RepetitionModeE RepMode : |
527 | {Benchmark::RepetitionModeE::Duplicate, |
528 | Benchmark::RepetitionModeE::Loop}) |
529 | Repetitors.emplace_back( |
530 | Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister)); |
531 | } |
532 | |
533 | BitVector AllReservedRegs; |
534 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors) |
535 | AllReservedRegs |= Repetitor->getReservedRegs(); |
536 | |
537 | if (!Opcodes.empty()) { |
538 | for (const unsigned Opcode : Opcodes) { |
539 | // Ignore instructions without a sched class if |
540 | // -ignore-invalid-sched-class is passed. |
541 | if (IgnoreInvalidSchedClass && |
542 | State.getInstrInfo().get(Opcode).getSchedClass() == 0) { |
543 | errs() << State.getInstrInfo().getName(Opcode) |
544 | << ": ignoring instruction without sched class\n" ; |
545 | continue; |
546 | } |
547 | |
548 | auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs); |
549 | if (!ConfigsForInstr) { |
550 | logAllUnhandledErrors( |
551 | E: ConfigsForInstr.takeError(), OS&: errs(), |
552 | ErrorBanner: Twine(State.getInstrInfo().getName(Opcode)).concat(Suffix: ": " )); |
553 | continue; |
554 | } |
555 | std::move(first: ConfigsForInstr->begin(), last: ConfigsForInstr->end(), |
556 | result: std::back_inserter(x&: Configurations)); |
557 | } |
558 | } |
559 | |
560 | if (MinInstructions == 0) { |
561 | ExitOnErr.setBanner("llvm-exegesis: " ); |
562 | ExitWithError(Args: "--min-instructions must be greater than zero" ); |
563 | } |
564 | |
565 | // Write to standard output if file is not set. |
566 | if (BenchmarkFile.empty()) |
567 | BenchmarkFile = "-" ; |
568 | |
569 | if (!Configurations.empty()) |
570 | runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner); |
571 | |
572 | pfm::pfmTerminate(); |
573 | } |
574 | |
575 | // Prints the results of running analysis pass `Pass` to file `OutputFilename` |
576 | // if OutputFilename is non-empty. |
577 | template <typename Pass> |
578 | static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name, |
579 | const std::string &OutputFilename) { |
580 | if (OutputFilename.empty()) |
581 | return; |
582 | if (OutputFilename != "-" ) { |
583 | errs() << "Printing " << Name << " results to file '" << OutputFilename |
584 | << "'\n" ; |
585 | } |
586 | std::error_code ErrorCode; |
587 | raw_fd_ostream ClustersOS(OutputFilename, ErrorCode, |
588 | sys::fs::FA_Read | sys::fs::FA_Write); |
589 | if (ErrorCode) |
590 | ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode)); |
591 | if (auto Err = Analyzer.run<Pass>(ClustersOS)) |
592 | ExitOnFileError(OutputFilename, std::move(Err)); |
593 | } |
594 | |
595 | static void filterPoints(MutableArrayRef<Benchmark> Points, |
596 | const MCInstrInfo &MCII) { |
597 | if (AnalysisSnippetFilter == BenchmarkFilter::All) |
598 | return; |
599 | |
600 | bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem; |
601 | for (Benchmark &Point : Points) { |
602 | if (!Point.Error.empty()) |
603 | continue; |
604 | if (WantPointsWithMemOps == |
605 | any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) { |
606 | const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode()); |
607 | return MCDesc.mayLoad() || MCDesc.mayStore(); |
608 | })) |
609 | continue; |
610 | Point.Error = "filtered out by user" ; |
611 | } |
612 | } |
613 | |
614 | static void analysisMain() { |
615 | ExitOnErr.setBanner("llvm-exegesis: " ); |
616 | if (BenchmarkFile.empty()) |
617 | ExitWithError(Args: "--benchmarks-file must be set" ); |
618 | |
619 | if (AnalysisClustersOutputFile.empty() && |
620 | AnalysisInconsistenciesOutputFile.empty()) { |
621 | ExitWithError( |
622 | Args: "for --mode=analysis: At least one of --analysis-clusters-output-file " |
623 | "and --analysis-inconsistencies-output-file must be specified" ); |
624 | } |
625 | |
626 | InitializeAllExegesisTargets(); |
627 | #define LLVM_EXEGESIS(TargetName) \ |
628 | LLVMInitialize##TargetName##AsmPrinter(); \ |
629 | LLVMInitialize##TargetName##Disassembler(); |
630 | #include "llvm/Config/TargetExegesis.def" |
631 | |
632 | auto MemoryBuffer = ExitOnFileError( |
633 | FileName: BenchmarkFile, |
634 | E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /*IsText=*/true))); |
635 | |
636 | const auto TriplesAndCpus = ExitOnFileError( |
637 | FileName: BenchmarkFile, |
638 | E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer)); |
639 | if (TriplesAndCpus.empty()) { |
640 | errs() << "no benchmarks to analyze\n" ; |
641 | return; |
642 | } |
643 | if (TriplesAndCpus.size() > 1) { |
644 | ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This " |
645 | "is unsupported." ); |
646 | } |
647 | auto TripleAndCpu = *TriplesAndCpus.begin(); |
648 | if (AnalysisOverrideBenchmarksTripleAndCpu) { |
649 | errs() << "overridding file CPU name (" << TripleAndCpu.CpuName |
650 | << ") with provided tripled (" << TripleName << ") and CPU name (" |
651 | << MCPU << ")\n" ; |
652 | TripleAndCpu.LLVMTriple = TripleName; |
653 | TripleAndCpu.CpuName = MCPU; |
654 | } |
655 | errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" |
656 | << TripleAndCpu.CpuName << "'\n" ; |
657 | |
658 | // Read benchmarks. |
659 | const LLVMState State = ExitOnErr( |
660 | LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName)); |
661 | std::vector<Benchmark> Points = ExitOnFileError( |
662 | FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer)); |
663 | |
664 | outs() << "Parsed " << Points.size() << " benchmark points\n" ; |
665 | if (Points.empty()) { |
666 | errs() << "no benchmarks to analyze\n" ; |
667 | return; |
668 | } |
669 | // FIXME: Merge points from several runs (latency and uops). |
670 | |
671 | filterPoints(Points, MCII: State.getInstrInfo()); |
672 | |
673 | const auto Clustering = ExitOnErr(BenchmarkClustering::create( |
674 | Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints, |
675 | AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(), |
676 | InstrInfo: &State.getInstrInfo())); |
677 | |
678 | const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon, |
679 | AnalysisDisplayUnstableOpcodes); |
680 | |
681 | maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters" , |
682 | OutputFilename: AnalysisClustersOutputFile); |
683 | maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>( |
684 | Analyzer, Name: "sched class consistency analysis" , |
685 | OutputFilename: AnalysisInconsistenciesOutputFile); |
686 | } |
687 | |
688 | } // namespace exegesis |
689 | } // namespace llvm |
690 | |
691 | int main(int Argc, char **Argv) { |
692 | using namespace llvm; |
693 | |
694 | InitLLVM X(Argc, Argv); |
695 | |
696 | // Initialize targets so we can print them when flag --version is specified. |
697 | #define LLVM_EXEGESIS(TargetName) \ |
698 | LLVMInitialize##TargetName##Target(); \ |
699 | LLVMInitialize##TargetName##TargetInfo(); \ |
700 | LLVMInitialize##TargetName##TargetMC(); |
701 | #include "llvm/Config/TargetExegesis.def" |
702 | |
703 | // Register the Target and CPU printer for --version. |
704 | cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU); |
705 | |
706 | // Enable printing of available targets when flag --version is specified. |
707 | cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion); |
708 | |
709 | cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions, |
710 | &exegesis::AnalysisOptions}); |
711 | |
712 | cl::ParseCommandLineOptions(argc: Argc, argv: Argv, |
713 | Overview: "llvm host machine instruction characteristics " |
714 | "measurment and analysis.\n" ); |
715 | |
716 | exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) { |
717 | if (Err.isA<exegesis::ClusteringError>()) |
718 | return EXIT_SUCCESS; |
719 | return EXIT_FAILURE; |
720 | }); |
721 | |
722 | if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) { |
723 | exegesis::analysisMain(); |
724 | } else { |
725 | exegesis::benchmarkMain(); |
726 | } |
727 | return EXIT_SUCCESS; |
728 | } |
729 | |