1 | //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Measures execution properties (latencies/uops) of an instruction. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "lib/Analysis.h" |
15 | #include "lib/BenchmarkResult.h" |
16 | #include "lib/BenchmarkRunner.h" |
17 | #include "lib/Clustering.h" |
18 | #include "lib/CodeTemplate.h" |
19 | #include "lib/Error.h" |
20 | #include "lib/LlvmState.h" |
21 | #include "lib/PerfHelper.h" |
22 | #include "lib/ProgressMeter.h" |
23 | #include "lib/ResultAggregator.h" |
24 | #include "lib/SnippetFile.h" |
25 | #include "lib/SnippetRepetitor.h" |
26 | #include "lib/Target.h" |
27 | #include "lib/TargetSelect.h" |
28 | #include "lib/ValidationEvent.h" |
29 | #include "llvm/ADT/StringExtras.h" |
30 | #include "llvm/ADT/Twine.h" |
31 | #include "llvm/MC/MCInstBuilder.h" |
32 | #include "llvm/MC/MCObjectFileInfo.h" |
33 | #include "llvm/MC/MCParser/MCAsmParser.h" |
34 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
35 | #include "llvm/MC/MCRegisterInfo.h" |
36 | #include "llvm/MC/MCSubtargetInfo.h" |
37 | #include "llvm/MC/TargetRegistry.h" |
38 | #include "llvm/Object/ObjectFile.h" |
39 | #include "llvm/Support/CommandLine.h" |
40 | #include "llvm/Support/FileSystem.h" |
41 | #include "llvm/Support/Format.h" |
42 | #include "llvm/Support/InitLLVM.h" |
43 | #include "llvm/Support/Path.h" |
44 | #include "llvm/Support/SourceMgr.h" |
45 | #include "llvm/Support/TargetSelect.h" |
46 | #include "llvm/TargetParser/Host.h" |
47 | #include <algorithm> |
48 | #include <string> |
49 | |
50 | namespace llvm { |
51 | namespace exegesis { |
52 | |
53 | static cl::opt<int> OpcodeIndex( |
54 | "opcode-index" , |
55 | cl::desc("opcode to measure, by index, or -1 to measure all opcodes" ), |
56 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
57 | |
58 | static cl::opt<std::string> |
59 | OpcodeNames("opcode-name" , |
60 | cl::desc("comma-separated list of opcodes to measure, by name" ), |
61 | cl::cat(BenchmarkOptions), cl::init(Val: "" )); |
62 | |
63 | static cl::opt<std::string> SnippetsFile("snippets-file" , |
64 | cl::desc("code snippets to measure" ), |
65 | cl::cat(BenchmarkOptions), |
66 | cl::init(Val: "" )); |
67 | |
68 | static cl::opt<std::string> |
69 | BenchmarkFile("benchmarks-file" , |
70 | cl::desc("File to read (analysis mode) or write " |
71 | "(latency/uops/inverse_throughput modes) benchmark " |
72 | "results. “-” uses stdin/stdout." ), |
73 | cl::cat(Options), cl::init(Val: "" )); |
74 | |
75 | static cl::opt<Benchmark::ModeE> BenchmarkMode( |
76 | "mode" , cl::desc("the mode to run" ), cl::cat(Options), |
77 | cl::values(clEnumValN(Benchmark::Latency, "latency" , "Instruction Latency" ), |
78 | clEnumValN(Benchmark::InverseThroughput, "inverse_throughput" , |
79 | "Instruction Inverse Throughput" ), |
80 | clEnumValN(Benchmark::Uops, "uops" , "Uop Decomposition" ), |
81 | // When not asking for a specific benchmark mode, |
82 | // we'll analyse the results. |
83 | clEnumValN(Benchmark::Unknown, "analysis" , "Analysis" ))); |
84 | |
85 | static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode( |
86 | "result-aggregation-mode" , cl::desc("How to aggregate multi-values result" ), |
87 | cl::cat(BenchmarkOptions), |
88 | cl::values(clEnumValN(Benchmark::Min, "min" , "Keep min reading" ), |
89 | clEnumValN(Benchmark::Max, "max" , "Keep max reading" ), |
90 | clEnumValN(Benchmark::Mean, "mean" , |
91 | "Compute mean of all readings" ), |
92 | clEnumValN(Benchmark::MinVariance, "min-variance" , |
93 | "Keep readings set with min-variance" )), |
94 | cl::init(Val: Benchmark::Min)); |
95 | |
96 | static cl::opt<Benchmark::RepetitionModeE> RepetitionMode( |
97 | "repetition-mode" , cl::desc("how to repeat the instruction snippet" ), |
98 | cl::cat(BenchmarkOptions), |
99 | cl::values( |
100 | clEnumValN(Benchmark::Duplicate, "duplicate" , "Duplicate the snippet" ), |
101 | clEnumValN(Benchmark::Loop, "loop" , "Loop over the snippet" ), |
102 | clEnumValN(Benchmark::AggregateMin, "min" , |
103 | "All of the above and take the minimum of measurements" ), |
104 | clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate" , |
105 | "Middle half duplicate mode" ), |
106 | clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop" , |
107 | "Middle half loop mode" )), |
108 | cl::init(Val: Benchmark::Duplicate)); |
109 | |
110 | static cl::opt<bool> BenchmarkMeasurementsPrintProgress( |
111 | "measurements-print-progress" , |
112 | cl::desc("Produce progress indicator when performing measurements" ), |
113 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
114 | |
115 | static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector( |
116 | "benchmark-phase" , |
117 | cl::desc( |
118 | "it is possible to stop the benchmarking process after some phase" ), |
119 | cl::cat(BenchmarkOptions), |
120 | cl::values( |
121 | clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet" , |
122 | "Only generate the minimal instruction sequence" ), |
123 | clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet, |
124 | "prepare-and-assemble-snippet" , |
125 | "Same as prepare-snippet, but also dumps an excerpt of the " |
126 | "sequence (hex encoded)" ), |
127 | clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode, |
128 | "assemble-measured-code" , |
129 | "Same as prepare-and-assemble-snippet, but also creates the " |
130 | "full sequence " |
131 | "that can be dumped to a file using --dump-object-to-disk" ), |
132 | clEnumValN( |
133 | BenchmarkPhaseSelectorE::Measure, "measure" , |
134 | "Same as prepare-measured-code, but also runs the measurement " |
135 | "(default)" )), |
136 | cl::init(Val: BenchmarkPhaseSelectorE::Measure)); |
137 | |
138 | static cl::opt<bool> |
139 | UseDummyPerfCounters("use-dummy-perf-counters" , |
140 | cl::desc("Do not read real performance counters, use " |
141 | "dummy values (for testing)" ), |
142 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
143 | |
144 | static cl::opt<unsigned> |
145 | MinInstructions("min-instructions" , |
146 | cl::desc("The minimum number of instructions that should " |
147 | "be included in the snippet" ), |
148 | cl::cat(BenchmarkOptions), cl::init(Val: 10000)); |
149 | |
150 | static cl::opt<unsigned> |
151 | LoopBodySize("loop-body-size" , |
152 | cl::desc("when repeating the instruction snippet by looping " |
153 | "over it, duplicate the snippet until the loop body " |
154 | "contains at least this many instruction" ), |
155 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
156 | |
157 | static cl::opt<unsigned> MaxConfigsPerOpcode( |
158 | "max-configs-per-opcode" , |
159 | cl::desc( |
160 | "allow to snippet generator to generate at most that many configs" ), |
161 | cl::cat(BenchmarkOptions), cl::init(Val: 1)); |
162 | |
163 | static cl::opt<bool> IgnoreInvalidSchedClass( |
164 | "ignore-invalid-sched-class" , |
165 | cl::desc("ignore instructions that do not define a sched class" ), |
166 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
167 | |
168 | static cl::opt<BenchmarkFilter> AnalysisSnippetFilter( |
169 | "analysis-filter" , cl::desc("Filter the benchmarks before analysing them" ), |
170 | cl::cat(BenchmarkOptions), |
171 | cl::values( |
172 | clEnumValN(BenchmarkFilter::All, "all" , |
173 | "Keep all benchmarks (default)" ), |
174 | clEnumValN(BenchmarkFilter::RegOnly, "reg-only" , |
175 | "Keep only those benchmarks that do *NOT* involve memory" ), |
176 | clEnumValN(BenchmarkFilter::WithMem, "mem-only" , |
177 | "Keep only the benchmarks that *DO* involve memory" )), |
178 | cl::init(Val: BenchmarkFilter::All)); |
179 | |
180 | static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm( |
181 | "analysis-clustering" , cl::desc("the clustering algorithm to use" ), |
182 | cl::cat(AnalysisOptions), |
183 | cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan" , |
184 | "use DBSCAN/OPTICS algorithm" ), |
185 | clEnumValN(BenchmarkClustering::Naive, "naive" , |
186 | "one cluster per opcode" )), |
187 | cl::init(Val: BenchmarkClustering::Dbscan)); |
188 | |
189 | static cl::opt<unsigned> AnalysisDbscanNumPoints( |
190 | "analysis-numpoints" , |
191 | cl::desc("minimum number of points in an analysis cluster (dbscan only)" ), |
192 | cl::cat(AnalysisOptions), cl::init(Val: 3)); |
193 | |
194 | static cl::opt<float> AnalysisClusteringEpsilon( |
195 | "analysis-clustering-epsilon" , |
196 | cl::desc("epsilon for benchmark point clustering" ), |
197 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
198 | |
199 | static cl::opt<float> AnalysisInconsistencyEpsilon( |
200 | "analysis-inconsistency-epsilon" , |
201 | cl::desc("epsilon for detection of when the cluster is different from the " |
202 | "LLVM schedule profile values" ), |
203 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
204 | |
205 | static cl::opt<std::string> |
206 | AnalysisClustersOutputFile("analysis-clusters-output-file" , cl::desc("" ), |
207 | cl::cat(AnalysisOptions), cl::init(Val: "" )); |
208 | static cl::opt<std::string> |
209 | AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file" , |
210 | cl::desc("" ), cl::cat(AnalysisOptions), |
211 | cl::init(Val: "" )); |
212 | |
213 | static cl::opt<bool> AnalysisDisplayUnstableOpcodes( |
214 | "analysis-display-unstable-clusters" , |
215 | cl::desc("if there is more than one benchmark for an opcode, said " |
216 | "benchmarks may end up not being clustered into the same cluster " |
217 | "if the measured performance characteristics are different. by " |
218 | "default all such opcodes are filtered out. this flag will " |
219 | "instead show only such unstable opcodes" ), |
220 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
221 | |
222 | static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu( |
223 | "analysis-override-benchmark-triple-and-cpu" , |
224 | cl::desc("By default, we analyze the benchmarks for the triple/CPU they " |
225 | "were measured for, but if you want to analyze them for some " |
226 | "other combination (specified via -mtriple/-mcpu), you can " |
227 | "pass this flag." ), |
228 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
229 | |
230 | static cl::opt<std::string> |
231 | TripleName("mtriple" , |
232 | cl::desc("Target triple. See -version for available targets" ), |
233 | cl::cat(Options)); |
234 | |
235 | static cl::opt<std::string> |
236 | MCPU("mcpu" , |
237 | cl::desc("Target a specific cpu type (-mcpu=help for details)" ), |
238 | cl::value_desc("cpu-name" ), cl::cat(Options), cl::init(Val: "native" )); |
239 | |
240 | static cl::opt<std::string> |
241 | DumpObjectToDisk("dump-object-to-disk" , |
242 | cl::desc("dumps the generated benchmark object to disk " |
243 | "and prints a message to access it" ), |
244 | cl::ValueOptional, cl::cat(BenchmarkOptions)); |
245 | |
246 | static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode( |
247 | "execution-mode" , |
248 | cl::desc("Selects the execution mode to use for running snippets" ), |
249 | cl::cat(BenchmarkOptions), |
250 | cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess, |
251 | "inprocess" , |
252 | "Executes the snippets within the same process" ), |
253 | clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess, |
254 | "subprocess" , |
255 | "Spawns a subprocess for each snippet execution, " |
256 | "allows for the use of memory annotations" )), |
257 | cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess)); |
258 | |
259 | static cl::opt<unsigned> BenchmarkRepeatCount( |
260 | "benchmark-repeat-count" , |
261 | cl::desc("The number of times to repeat measurements on the benchmark k " |
262 | "before aggregating the results" ), |
263 | cl::cat(BenchmarkOptions), cl::init(Val: 30)); |
264 | |
265 | static cl::list<ValidationEvent> ValidationCounters( |
266 | "validation-counter" , |
267 | cl::desc( |
268 | "The name of a validation counter to run concurrently with the main " |
269 | "counter to validate benchmarking assumptions" ), |
270 | cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions()); |
271 | |
272 | static cl::opt<int> BenchmarkProcessCPU( |
273 | "benchmark-process-cpu" , |
274 | cl::desc("The CPU number that the benchmarking process should executon on" ), |
275 | cl::cat(BenchmarkOptions), cl::init(Val: -1)); |
276 | |
277 | static cl::opt<std::string> MAttr( |
278 | "mattr" , cl::desc("comma-separated list of target architecture features" ), |
279 | cl::value_desc("+feature1,-feature2,..." ), cl::cat(Options), cl::init(Val: "" )); |
280 | |
281 | static ExitOnError ExitOnErr("llvm-exegesis error: " ); |
282 | |
283 | // Helper function that logs the error(s) and exits. |
284 | template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { |
285 | ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...)); |
286 | } |
287 | |
288 | // Check Err. If it's in a failure state log the file error(s) and exit. |
289 | static void ExitOnFileError(const Twine &FileName, Error Err) { |
290 | if (Err) { |
291 | ExitOnErr(createFileError(F: FileName, E: std::move(Err))); |
292 | } |
293 | } |
294 | |
295 | // Check E. If it's in a success state then return the contained value. |
296 | // If it's in a failure state log the file error(s) and exit. |
297 | template <typename T> |
298 | T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { |
299 | ExitOnFileError(FileName, E.takeError()); |
300 | return std::move(*E); |
301 | } |
302 | |
303 | // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, |
304 | // and returns the opcode indices or {} if snippets should be read from |
305 | // `SnippetsFile`. |
306 | static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { |
307 | const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) + |
308 | (OpcodeIndex == 0 ? 0 : 1) + |
309 | (SnippetsFile.empty() ? 0 : 1); |
310 | const auto &ET = State.getExegesisTarget(); |
311 | const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits(); |
312 | |
313 | if (NumSetFlags != 1) { |
314 | ExitOnErr.setBanner("llvm-exegesis: " ); |
315 | ExitWithError(Args: "please provide one and only one of 'opcode-index', " |
316 | "'opcode-name' or 'snippets-file'" ); |
317 | } |
318 | if (!SnippetsFile.empty()) |
319 | return {}; |
320 | if (OpcodeIndex > 0) |
321 | return {static_cast<unsigned>(OpcodeIndex)}; |
322 | if (OpcodeIndex < 0) { |
323 | std::vector<unsigned> Result; |
324 | unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes(); |
325 | Result.reserve(n: NumOpcodes); |
326 | for (unsigned I = 0, E = NumOpcodes; I < E; ++I) { |
327 | if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures)) |
328 | continue; |
329 | Result.push_back(x: I); |
330 | } |
331 | return Result; |
332 | } |
333 | // Resolve opcode name -> opcode. |
334 | const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned { |
335 | const auto &Map = State.getOpcodeNameToOpcodeIdxMapping(); |
336 | auto I = Map.find(Val: OpcodeName); |
337 | if (I != Map.end()) |
338 | return I->getSecond(); |
339 | return 0u; |
340 | }; |
341 | |
342 | SmallVector<StringRef, 2> Pieces; |
343 | StringRef(OpcodeNames.getValue()) |
344 | .split(A&: Pieces, Separator: "," , /* MaxSplit */ -1, /* KeepEmpty */ false); |
345 | std::vector<unsigned> Result; |
346 | Result.reserve(n: Pieces.size()); |
347 | for (const StringRef &OpcodeName : Pieces) { |
348 | if (unsigned Opcode = ResolveName(OpcodeName)) |
349 | Result.push_back(x: Opcode); |
350 | else |
351 | ExitWithError(Args: Twine("unknown opcode " ).concat(Suffix: OpcodeName)); |
352 | } |
353 | return Result; |
354 | } |
355 | |
356 | // Generates code snippets for opcode `Opcode`. |
357 | static Expected<std::vector<BenchmarkCode>> |
358 | generateSnippets(const LLVMState &State, unsigned Opcode, |
359 | const BitVector &ForbiddenRegs) { |
360 | // Ignore instructions that we cannot run. |
361 | if (const char *Reason = |
362 | State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode)) |
363 | return make_error<Failure>(Args&: Reason); |
364 | |
365 | const Instruction &Instr = State.getIC().getInstr(Opcode); |
366 | const std::vector<InstructionTemplate> InstructionVariants = |
367 | State.getExegesisTarget().generateInstructionVariants( |
368 | Instr, MaxConfigsPerOpcode); |
369 | |
370 | SnippetGenerator::Options SnippetOptions; |
371 | SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode; |
372 | const std::unique_ptr<SnippetGenerator> Generator = |
373 | State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State, |
374 | Opts: SnippetOptions); |
375 | if (!Generator) |
376 | ExitWithError(Args: "cannot create snippet generator" ); |
377 | |
378 | std::vector<BenchmarkCode> Benchmarks; |
379 | for (const InstructionTemplate &Variant : InstructionVariants) { |
380 | if (Benchmarks.size() >= MaxConfigsPerOpcode) |
381 | break; |
382 | if (auto Err = Generator->generateConfigurations(Variant, Benchmarks, |
383 | ExtraForbiddenRegs: ForbiddenRegs)) |
384 | return std::move(Err); |
385 | } |
386 | return Benchmarks; |
387 | } |
388 | |
389 | static void runBenchmarkConfigurations( |
390 | const LLVMState &State, ArrayRef<BenchmarkCode> Configurations, |
391 | ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, |
392 | const BenchmarkRunner &Runner) { |
393 | assert(!Configurations.empty() && "Don't have any configurations to run." ); |
394 | std::optional<raw_fd_ostream> FileOstr; |
395 | if (BenchmarkFile != "-" ) { |
396 | int ResultFD = 0; |
397 | // Create output file or open existing file and truncate it, once. |
398 | ExitOnErr(errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD, |
399 | Disp: sys::fs::CD_CreateAlways, |
400 | Flags: sys::fs::OF_TextWithCRLF))); |
401 | FileOstr.emplace(args&: ResultFD, args: true /*shouldClose*/); |
402 | } |
403 | raw_ostream &Ostr = FileOstr ? *FileOstr : outs(); |
404 | |
405 | std::optional<ProgressMeter<>> Meter; |
406 | if (BenchmarkMeasurementsPrintProgress) |
407 | Meter.emplace(args: Configurations.size()); |
408 | |
409 | SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions}; |
410 | if (RepetitionMode == Benchmark::MiddleHalfDuplicate || |
411 | RepetitionMode == Benchmark::MiddleHalfLoop) |
412 | MinInstructionCounts.push_back(Elt: MinInstructions * 2); |
413 | |
414 | for (const BenchmarkCode &Conf : Configurations) { |
415 | ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr); |
416 | SmallVector<Benchmark, 2> AllResults; |
417 | |
418 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : |
419 | Repetitors) { |
420 | for (unsigned IterationRepetitions : MinInstructionCounts) { |
421 | auto RC = ExitOnErr(Runner.getRunnableConfiguration( |
422 | Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor)); |
423 | std::optional<StringRef> DumpFile; |
424 | if (DumpObjectToDisk.getNumOccurrences()) |
425 | DumpFile = DumpObjectToDisk; |
426 | const std::optional<int> BenchmarkCPU = |
427 | BenchmarkProcessCPU == -1 |
428 | ? std::nullopt |
429 | : std::optional(BenchmarkProcessCPU.getValue()); |
430 | auto [Err, BenchmarkResult] = |
431 | Runner.runConfiguration(RC: std::move(RC), DumpFile, BenchmarkProcessCPU: BenchmarkCPU); |
432 | if (Err) { |
433 | // Errors from executing the snippets are fine. |
434 | // All other errors are a framework issue and should fail. |
435 | if (!Err.isA<SnippetExecutionFailure>()) |
436 | ExitOnErr(std::move(Err)); |
437 | |
438 | BenchmarkResult.Error = toString(E: std::move(Err)); |
439 | } |
440 | AllResults.push_back(Elt: std::move(BenchmarkResult)); |
441 | } |
442 | } |
443 | |
444 | Benchmark &Result = AllResults.front(); |
445 | |
446 | // If any of our measurements failed, pretend they all have failed. |
447 | if (AllResults.size() > 1 && |
448 | any_of(Range&: AllResults, P: [](const Benchmark &R) { |
449 | return R.Measurements.empty(); |
450 | })) |
451 | Result.Measurements.clear(); |
452 | |
453 | std::unique_ptr<ResultAggregator> ResultAgg = |
454 | ResultAggregator::CreateAggregator(RepetitionMode); |
455 | ResultAgg->AggregateResults(Result, |
456 | OtherResults: ArrayRef<Benchmark>(AllResults).drop_front()); |
457 | |
458 | // With dummy counters, measurements are rather meaningless, |
459 | // so drop them altogether. |
460 | if (UseDummyPerfCounters) |
461 | Result.Measurements.clear(); |
462 | |
463 | ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr)); |
464 | } |
465 | } |
466 | |
467 | void benchmarkMain() { |
468 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && |
469 | !UseDummyPerfCounters) { |
470 | #ifndef HAVE_LIBPFM |
471 | ExitWithError( |
472 | Args: "benchmarking unavailable, LLVM was built without libpfm. You can " |
473 | "pass --benchmark-phase=... to skip the actual benchmarking or " |
474 | "--use-dummy-perf-counters to not query the kernel for real event " |
475 | "counts." ); |
476 | #else |
477 | if (pfm::pfmInitialize()) |
478 | ExitWithError("cannot initialize libpfm" ); |
479 | #endif |
480 | } |
481 | |
482 | InitializeAllExegesisTargets(); |
483 | #define LLVM_EXEGESIS(TargetName) \ |
484 | LLVMInitialize##TargetName##AsmPrinter(); \ |
485 | LLVMInitialize##TargetName##AsmParser(); |
486 | #include "llvm/Config/TargetExegesis.def" |
487 | |
488 | const LLVMState State = ExitOnErr( |
489 | LLVMState::Create(TripleName, CpuName: MCPU, Features: MAttr, UseDummyPerfCounters)); |
490 | |
491 | // Preliminary check to ensure features needed for requested |
492 | // benchmark mode are present on target CPU and/or OS. |
493 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) |
494 | ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); |
495 | |
496 | if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && |
497 | UseDummyPerfCounters) |
498 | ExitWithError(Args: "Dummy perf counters are not supported in the subprocess " |
499 | "execution mode." ); |
500 | |
501 | const std::unique_ptr<BenchmarkRunner> Runner = |
502 | ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( |
503 | Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode, |
504 | BenchmarkRepeatCount, ValidationCounters, ResultAggMode)); |
505 | if (!Runner) { |
506 | ExitWithError(Args: "cannot create benchmark runner" ); |
507 | } |
508 | |
509 | const auto Opcodes = getOpcodesOrDie(State); |
510 | std::vector<BenchmarkCode> Configurations; |
511 | |
512 | MCRegister LoopRegister = |
513 | State.getExegesisTarget().getDefaultLoopCounterRegister( |
514 | State.getTargetMachine().getTargetTriple()); |
515 | |
516 | if (Opcodes.empty()) { |
517 | Configurations = ExitOnErr(readSnippets(State, Filename: SnippetsFile)); |
518 | for (const auto &Configuration : Configurations) { |
519 | if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess && |
520 | (Configuration.Key.MemoryMappings.size() != 0 || |
521 | Configuration.Key.MemoryValues.size() != 0 || |
522 | Configuration.Key.SnippetAddress != 0)) |
523 | ExitWithError(Args: "Memory and snippet address annotations are only " |
524 | "supported in subprocess " |
525 | "execution mode" ); |
526 | } |
527 | LoopRegister = Configurations[0].Key.LoopRegister; |
528 | } |
529 | |
530 | SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors; |
531 | if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin) |
532 | Repetitors.emplace_back( |
533 | Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister)); |
534 | else { |
535 | for (Benchmark::RepetitionModeE RepMode : |
536 | {Benchmark::RepetitionModeE::Duplicate, |
537 | Benchmark::RepetitionModeE::Loop}) |
538 | Repetitors.emplace_back( |
539 | Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister)); |
540 | } |
541 | |
542 | BitVector AllReservedRegs; |
543 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors) |
544 | AllReservedRegs |= Repetitor->getReservedRegs(); |
545 | |
546 | if (!Opcodes.empty()) { |
547 | for (const unsigned Opcode : Opcodes) { |
548 | // Ignore instructions without a sched class if |
549 | // -ignore-invalid-sched-class is passed. |
550 | if (IgnoreInvalidSchedClass && |
551 | State.getInstrInfo().get(Opcode).getSchedClass() == 0) { |
552 | errs() << State.getInstrInfo().getName(Opcode) |
553 | << ": ignoring instruction without sched class\n" ; |
554 | continue; |
555 | } |
556 | |
557 | auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs); |
558 | if (!ConfigsForInstr) { |
559 | logAllUnhandledErrors( |
560 | E: ConfigsForInstr.takeError(), OS&: errs(), |
561 | ErrorBanner: Twine(State.getInstrInfo().getName(Opcode)).concat(Suffix: ": " )); |
562 | continue; |
563 | } |
564 | std::move(first: ConfigsForInstr->begin(), last: ConfigsForInstr->end(), |
565 | result: std::back_inserter(x&: Configurations)); |
566 | } |
567 | } |
568 | |
569 | if (MinInstructions == 0) { |
570 | ExitOnErr.setBanner("llvm-exegesis: " ); |
571 | ExitWithError(Args: "--min-instructions must be greater than zero" ); |
572 | } |
573 | |
574 | // Write to standard output if file is not set. |
575 | if (BenchmarkFile.empty()) |
576 | BenchmarkFile = "-" ; |
577 | |
578 | if (!Configurations.empty()) |
579 | runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner); |
580 | |
581 | pfm::pfmTerminate(); |
582 | } |
583 | |
584 | // Prints the results of running analysis pass `Pass` to file `OutputFilename` |
585 | // if OutputFilename is non-empty. |
586 | template <typename Pass> |
587 | static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name, |
588 | const std::string &OutputFilename) { |
589 | if (OutputFilename.empty()) |
590 | return; |
591 | if (OutputFilename != "-" ) { |
592 | errs() << "Printing " << Name << " results to file '" << OutputFilename |
593 | << "'\n" ; |
594 | } |
595 | std::error_code ErrorCode; |
596 | raw_fd_ostream ClustersOS(OutputFilename, ErrorCode, |
597 | sys::fs::FA_Read | sys::fs::FA_Write); |
598 | if (ErrorCode) |
599 | ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode)); |
600 | if (auto Err = Analyzer.run<Pass>(ClustersOS)) |
601 | ExitOnFileError(OutputFilename, std::move(Err)); |
602 | } |
603 | |
604 | static void filterPoints(MutableArrayRef<Benchmark> Points, |
605 | const MCInstrInfo &MCII) { |
606 | if (AnalysisSnippetFilter == BenchmarkFilter::All) |
607 | return; |
608 | |
609 | bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem; |
610 | for (Benchmark &Point : Points) { |
611 | if (!Point.Error.empty()) |
612 | continue; |
613 | if (WantPointsWithMemOps == |
614 | any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) { |
615 | const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode()); |
616 | return MCDesc.mayLoad() || MCDesc.mayStore(); |
617 | })) |
618 | continue; |
619 | Point.Error = "filtered out by user" ; |
620 | } |
621 | } |
622 | |
623 | static void analysisMain() { |
624 | ExitOnErr.setBanner("llvm-exegesis: " ); |
625 | if (BenchmarkFile.empty()) |
626 | ExitWithError(Args: "--benchmarks-file must be set" ); |
627 | |
628 | if (AnalysisClustersOutputFile.empty() && |
629 | AnalysisInconsistenciesOutputFile.empty()) { |
630 | ExitWithError( |
631 | Args: "for --mode=analysis: At least one of --analysis-clusters-output-file " |
632 | "and --analysis-inconsistencies-output-file must be specified" ); |
633 | } |
634 | |
635 | InitializeAllExegesisTargets(); |
636 | #define LLVM_EXEGESIS(TargetName) \ |
637 | LLVMInitialize##TargetName##AsmPrinter(); \ |
638 | LLVMInitialize##TargetName##Disassembler(); |
639 | #include "llvm/Config/TargetExegesis.def" |
640 | |
641 | auto MemoryBuffer = ExitOnFileError( |
642 | FileName: BenchmarkFile, |
643 | E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /*IsText=*/true))); |
644 | |
645 | const auto TriplesAndCpus = ExitOnFileError( |
646 | FileName: BenchmarkFile, |
647 | E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer)); |
648 | if (TriplesAndCpus.empty()) { |
649 | errs() << "no benchmarks to analyze\n" ; |
650 | return; |
651 | } |
652 | if (TriplesAndCpus.size() > 1) { |
653 | ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This " |
654 | "is unsupported." ); |
655 | } |
656 | auto TripleAndCpu = *TriplesAndCpus.begin(); |
657 | if (AnalysisOverrideBenchmarksTripleAndCpu) { |
658 | errs() << "overridding file CPU name (" << TripleAndCpu.CpuName |
659 | << ") with provided tripled (" << TripleName << ") and CPU name (" |
660 | << MCPU << ")\n" ; |
661 | TripleAndCpu.LLVMTriple = TripleName; |
662 | TripleAndCpu.CpuName = MCPU; |
663 | } |
664 | errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" |
665 | << TripleAndCpu.CpuName << "'\n" ; |
666 | |
667 | // Read benchmarks. |
668 | const LLVMState State = ExitOnErr( |
669 | LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName)); |
670 | std::vector<Benchmark> Points = ExitOnFileError( |
671 | FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer)); |
672 | |
673 | outs() << "Parsed " << Points.size() << " benchmark points\n" ; |
674 | if (Points.empty()) { |
675 | errs() << "no benchmarks to analyze\n" ; |
676 | return; |
677 | } |
678 | // FIXME: Merge points from several runs (latency and uops). |
679 | |
680 | filterPoints(Points, MCII: State.getInstrInfo()); |
681 | |
682 | const auto Clustering = ExitOnErr(BenchmarkClustering::create( |
683 | Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints, |
684 | AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(), |
685 | InstrInfo: &State.getInstrInfo())); |
686 | |
687 | const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon, |
688 | AnalysisDisplayUnstableOpcodes); |
689 | |
690 | maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters" , |
691 | OutputFilename: AnalysisClustersOutputFile); |
692 | maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>( |
693 | Analyzer, Name: "sched class consistency analysis" , |
694 | OutputFilename: AnalysisInconsistenciesOutputFile); |
695 | } |
696 | |
697 | } // namespace exegesis |
698 | } // namespace llvm |
699 | |
700 | int main(int Argc, char **Argv) { |
701 | using namespace llvm; |
702 | |
703 | InitLLVM X(Argc, Argv); |
704 | |
705 | // Initialize targets so we can print them when flag --version is specified. |
706 | #define LLVM_EXEGESIS(TargetName) \ |
707 | LLVMInitialize##TargetName##Target(); \ |
708 | LLVMInitialize##TargetName##TargetInfo(); \ |
709 | LLVMInitialize##TargetName##TargetMC(); |
710 | #include "llvm/Config/TargetExegesis.def" |
711 | |
712 | // Register the Target and CPU printer for --version. |
713 | cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU); |
714 | |
715 | // Enable printing of available targets when flag --version is specified. |
716 | cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion); |
717 | |
718 | cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions, |
719 | &exegesis::AnalysisOptions}); |
720 | |
721 | cl::ParseCommandLineOptions(argc: Argc, argv: Argv, |
722 | Overview: "llvm host machine instruction characteristics " |
723 | "measurment and analysis.\n" ); |
724 | |
725 | exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) { |
726 | if (Err.isA<exegesis::ClusteringError>()) |
727 | return EXIT_SUCCESS; |
728 | return EXIT_FAILURE; |
729 | }); |
730 | |
731 | if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) { |
732 | exegesis::analysisMain(); |
733 | } else { |
734 | exegesis::benchmarkMain(); |
735 | } |
736 | return EXIT_SUCCESS; |
737 | } |
738 | |