llvm-exegesis.cpp source code [llvm_projects/llvm/tools/llvm-exegesis/llvm-exegesis.cpp]

1	//===-- llvm-exegesis.cpp ---------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Measures execution properties (latencies/uops) of an instruction.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "lib/Analysis.h"
15	#include "lib/BenchmarkResult.h"
16	#include "lib/BenchmarkRunner.h"
17	#include "lib/Clustering.h"
18	#include "lib/CodeTemplate.h"
19	#include "lib/Error.h"
20	#include "lib/LlvmState.h"
21	#include "lib/PerfHelper.h"
22	#include "lib/ProgressMeter.h"
23	#include "lib/ResultAggregator.h"
24	#include "lib/SnippetFile.h"
25	#include "lib/SnippetRepetitor.h"
26	#include "lib/Target.h"
27	#include "lib/TargetSelect.h"
28	#include "lib/ValidationEvent.h"
29	#include "llvm/ADT/StringExtras.h"
30	#include "llvm/ADT/Twine.h"
31	#include "llvm/MC/MCInstBuilder.h"
32	#include "llvm/MC/MCObjectFileInfo.h"
33	#include "llvm/MC/MCParser/MCAsmParser.h"
34	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
35	#include "llvm/MC/MCRegisterInfo.h"
36	#include "llvm/MC/MCSubtargetInfo.h"
37	#include "llvm/MC/TargetRegistry.h"
38	#include "llvm/Object/ObjectFile.h"
39	#include "llvm/Support/CommandLine.h"
40	#include "llvm/Support/FileSystem.h"
41	#include "llvm/Support/Format.h"
42	#include "llvm/Support/InitLLVM.h"
43	#include "llvm/Support/Path.h"
44	#include "llvm/Support/SourceMgr.h"
45	#include "llvm/Support/TargetSelect.h"
46	#include "llvm/TargetParser/Host.h"
47	#include <algorithm>
48	#include <string>
49
50	namespace llvm {
51	namespace exegesis {
52
53	static cl::opt<int> OpcodeIndex(
54	"opcode-index",
55	cl::desc ("opcode to measure, by index, or -1 to measure all opcodes"),
56	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
57
58	static cl::opt<std::string>
59	OpcodeNames("opcode-name",
60	cl::desc ("comma-separated list of opcodes to measure, by name"),
61	cl::cat (BenchmarkOptions), cl::init(Val: ""));
62
63	static cl::opt<std::string> SnippetsFile("snippets-file",
64	cl::desc ("code snippets to measure"),
65	cl::cat (BenchmarkOptions),
66	cl::init(Val: ""));
67
68	static cl::opt<std::string>
69	BenchmarkFile("benchmarks-file",
70	cl::desc ("File to read (analysis mode) or write "
71	"(latency/uops/inverse_throughput modes) benchmark "
72	"results. “-” uses stdin/stdout."),
73	cl::cat (Options), cl::init(Val: ""));
74
75	static cl::opt<Benchmark::ModeE> BenchmarkMode(
76	"mode", cl::desc ("the mode to run"), cl::cat (Options),
77	cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),
78	clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",
79	"Instruction Inverse Throughput"),
80	clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),
81	// When not asking for a specific benchmark mode,
82	// we'll analyse the results.
83	clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));
84
85	static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(
86	"result-aggregation-mode", cl::desc ("How to aggregate multi-values result"),
87	cl::cat (BenchmarkOptions),
88	cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),
89	clEnumValN(Benchmark::Max, "max", "Keep max reading"),
90	clEnumValN(Benchmark::Mean, "mean",
91	"Compute mean of all readings"),
92	clEnumValN(Benchmark::MinVariance, "min-variance",
93	"Keep readings set with min-variance")),
94	cl::init(Val: Benchmark::Min));
95
96	static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(
97	"repetition-mode", cl::desc ("how to repeat the instruction snippet"),
98	cl::cat (BenchmarkOptions),
99	cl::values(
100	clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),
101	clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),
102	clEnumValN(Benchmark::AggregateMin, "min",
103	"All of the above and take the minimum of measurements"),
104	clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",
105	"Middle half duplicate mode"),
106	clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",
107	"Middle half loop mode")),
108	cl::init(Val: Benchmark::Duplicate));
109
110	static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
111	"measurements-print-progress",
112	cl::desc ("Produce progress indicator when performing measurements"),
113	cl::cat (BenchmarkOptions), cl::init(Val: false));
114
115	static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
116	"benchmark-phase",
117	cl::desc (
118	"it is possible to stop the benchmarking process after some phase"),
119	cl::cat (BenchmarkOptions),
120	cl::values(
121	clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
122	"Only generate the minimal instruction sequence"),
123	clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
124	"prepare-and-assemble-snippet",
125	"Same as prepare-snippet, but also dumps an excerpt of the "
126	"sequence (hex encoded)"),
127	clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,
128	"assemble-measured-code",
129	"Same as prepare-and-assemble-snippet, but also creates the "
130	"full sequence "
131	"that can be dumped to a file using --dump-object-to-disk"),
132	clEnumValN(
133	BenchmarkPhaseSelectorE::Measure, "measure",
134	"Same as prepare-measured-code, but also runs the measurement "
135	"(default)")),
136	cl::init(Val: BenchmarkPhaseSelectorE::Measure));
137
138	static cl::opt<bool>
139	UseDummyPerfCounters("use-dummy-perf-counters",
140	cl::desc ("Do not read real performance counters, use "
141	"dummy values (for testing)"),
142	cl::cat (BenchmarkOptions), cl::init(Val: false));
143
144	static cl::opt<unsigned>
145	MinInstructions("min-instructions",
146	cl::desc ("The minimum number of instructions that should "
147	"be included in the snippet"),
148	cl::cat (BenchmarkOptions), cl::init(Val: `10000`));
149
150	static cl::opt<unsigned>
151	LoopBodySize("loop-body-size",
152	cl::desc ("when repeating the instruction snippet by looping "
153	"over it, duplicate the snippet until the loop body "
154	"contains at least this many instruction"),
155	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
156
157	static cl::opt<unsigned> MaxConfigsPerOpcode(
158	"max-configs-per-opcode",
159	cl::desc (
160	"allow to snippet generator to generate at most that many configs"),
161	cl::cat (BenchmarkOptions), cl::init(Val: `1`));
162
163	static cl::opt<bool> IgnoreInvalidSchedClass(
164	"ignore-invalid-sched-class",
165	cl::desc ("ignore instructions that do not define a sched class"),
166	cl::cat (BenchmarkOptions), cl::init(Val: false));
167
168	static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(
169	"analysis-filter", cl::desc ("Filter the benchmarks before analysing them"),
170	cl::cat (BenchmarkOptions),
171	cl::values(
172	clEnumValN(BenchmarkFilter::All, "all",
173	"Keep all benchmarks (default)"),
174	clEnumValN(BenchmarkFilter::RegOnly, "reg-only",
175	"Keep only those benchmarks that do NOT involve memory"),
176	clEnumValN(BenchmarkFilter::WithMem, "mem-only",
177	"Keep only the benchmarks that DO involve memory")),
178	cl::init(Val: BenchmarkFilter::All));
179
180	static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(
181	"analysis-clustering", cl::desc ("the clustering algorithm to use"),
182	cl::cat (AnalysisOptions),
183	cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",
184	"use DBSCAN/OPTICS algorithm"),
185	clEnumValN(BenchmarkClustering::Naive, "naive",
186	"one cluster per opcode")),
187	cl::init(Val: BenchmarkClustering::Dbscan));
188
189	static cl::opt<unsigned> AnalysisDbscanNumPoints(
190	"analysis-numpoints",
191	cl::desc ("minimum number of points in an analysis cluster (dbscan only)"),
192	cl::cat (AnalysisOptions), cl::init(Val: `3`));
193
194	static cl::opt<float> AnalysisClusteringEpsilon(
195	"analysis-clustering-epsilon",
196	cl::desc ("epsilon for benchmark point clustering"),
197	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
198
199	static cl::opt<float> AnalysisInconsistencyEpsilon(
200	"analysis-inconsistency-epsilon",
201	cl::desc ("epsilon for detection of when the cluster is different from the "
202	"LLVM schedule profile values"),
203	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
204
205	static cl::opt<std::string>
206	AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc (""),
207	cl::cat (AnalysisOptions), cl::init(Val: ""));
208	static cl::opt<std::string>
209	AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
210	cl::desc (""), cl::cat (AnalysisOptions),
211	cl::init(Val: ""));
212
213	static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
214	"analysis-display-unstable-clusters",
215	cl::desc ("if there is more than one benchmark for an opcode, said "
216	"benchmarks may end up not being clustered into the same cluster "
217	"if the measured performance characteristics are different. by "
218	"default all such opcodes are filtered out. this flag will "
219	"instead show only such unstable opcodes"),
220	cl::cat (AnalysisOptions), cl::init(Val: false));
221
222	static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
223	"analysis-override-benchmark-triple-and-cpu",
224	cl::desc ("By default, we analyze the benchmarks for the triple/CPU they "
225	"were measured for, but if you want to analyze them for some "
226	"other combination (specified via -mtriple/-mcpu), you can "
227	"pass this flag."),
228	cl::cat (AnalysisOptions), cl::init(Val: false));
229
230	static cl::opt<std::string>
231	TripleName("mtriple",
232	cl::desc ("Target triple. See -version for available targets"),
233	cl::cat (Options));
234
235	static cl::opt<std::string>
236	MCPU("mcpu",
237	cl::desc ("Target a specific cpu type (-mcpu=help for details)"),
238	cl::value_desc ("cpu-name"), cl::cat (Options), cl::init(Val: "native"));
239
240	static cl::opt<std::string>
241	DumpObjectToDisk("dump-object-to-disk",
242	cl::desc ("dumps the generated benchmark object to disk "
243	"and prints a message to access it"),
244	cl::ValueOptional, cl::cat (BenchmarkOptions));
245
246	static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(
247	"execution-mode",
248	cl::desc ("Selects the execution mode to use for running snippets"),
249	cl::cat (BenchmarkOptions),
250	cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,
251	"inprocess",
252	"Executes the snippets within the same process"),
253	clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,
254	"subprocess",
255	"Spawns a subprocess for each snippet execution, "
256	"allows for the use of memory annotations")),
257	cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess));
258
259	static cl::opt<unsigned> BenchmarkRepeatCount(
260	"benchmark-repeat-count",
261	cl::desc ("The number of times to repeat measurements on the benchmark k "
262	"before aggregating the results"),
263	cl::cat (BenchmarkOptions), cl::init(Val: `30`));
264
265	static cl::list<ValidationEvent> ValidationCounters(
266	"validation-counter",
267	cl::desc (
268	"The name of a validation counter to run concurrently with the main "
269	"counter to validate benchmarking assumptions"),
270	cl::CommaSeparated, cl::cat (BenchmarkOptions), ValidationEventOptions ());
271
272	static ExitOnError ExitOnErr("llvm-exegesis error: ");
273
274	// Helper function that logs the error(s) and exits.
275	template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {
276	ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));
277	}
278
279	// Check Err. If it's in a failure state log the file error(s) and exit.
280	static void ExitOnFileError(const Twine &FileName, Error Err) {
281	if (Err) {
282	ExitOnErr (createFileError(F: FileName, E: std::move(Err)));
283	}
284	}
285
286	// Check E. If it's in a success state then return the contained value.
287	// If it's in a failure state log the file error(s) and exit.
288	template <typename T>
289	T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
290	ExitOnFileError(FileName, E.takeError());
291	return std::move(*E);
292	}
293
294	// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
295	// and returns the opcode indices or {} if snippets should be read from
296	// `SnippetsFile`.
297	static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
298	const size_t NumSetFlags = (OpcodeNames.empty() ? `0` : `1`) +
299	(OpcodeIndex == `0` ? `0` : `1`) +
300	(SnippetsFile.empty() ? `0` : `1`);
301	const auto &ET = State.getExegesisTarget();
302	const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits();
303
304	if (NumSetFlags != `1`) {
305	ExitOnErr.setBanner("llvm-exegesis: ");
306	ExitWithError(Args: "please provide one and only one of 'opcode-index', "
307	"'opcode-name' or 'snippets-file'");
308	}
309	if (!SnippetsFile.empty())
310	return {};
311	if (OpcodeIndex > `0`)
312	return {static_cast<unsigned>(OpcodeIndex)};
313	if (OpcodeIndex < `0`) {
314	std::vector<unsigned> Result;
315	unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes();
316	Result.reserve(n: NumOpcodes);
317	for (unsigned I = `0`, E = NumOpcodes; I < E; ++I) {
318	if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures))
319	continue;
320	Result.push_back(x: I);
321	}
322	return Result;
323	}
324	// Resolve opcode name -> opcode.
325	const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned {
326	const auto &Map = State.getOpcodeNameToOpcodeIdxMapping();
327	auto I = Map.find(Val: OpcodeName);
328	if (I != Map.end())
329	return I ->getSecond();
330	return `0u`;
331	};
332	SmallVector<StringRef, `2`> Pieces;
333	StringRef (OpcodeNames.getValue())
334	.split(A&: Pieces, Separator: ",", / MaxSplit / -`1`, / KeepEmpty / false);
335	std::vector<unsigned> Result;
336	Result.reserve(n: Pieces.size());
337	for (const StringRef &OpcodeName : Pieces) {
338	if (unsigned Opcode = ResolveName (OpcodeName))
339	Result.push_back(x: Opcode);
340	else
341	ExitWithError(Args: Twine ("unknown opcode ").concat(Suffix: OpcodeName));
342	}
343	return Result;
344	}
345
346	// Generates code snippets for opcode `Opcode`.
347	static Expected<std::vector<BenchmarkCode>>
348	generateSnippets(const LLVMState &State, unsigned Opcode,
349	const BitVector &ForbiddenRegs) {
350	const Instruction &Instr = State.getIC().getInstr(Opcode);
351	const MCInstrDesc &InstrDesc = Instr.Description;
352	// Ignore instructions that we cannot run.
353	if (InstrDesc.isPseudo() \|\| InstrDesc.usesCustomInsertionHook())
354	return make_error<Failure>(
355	Args: "Unsupported opcode: isPseudo/usesCustomInserter");
356	if (InstrDesc.isBranch() \|\| InstrDesc.isIndirectBranch())
357	return make_error<Failure>(Args: "Unsupported opcode: isBranch/isIndirectBranch");
358	if (InstrDesc.isCall() \|\| InstrDesc.isReturn())
359	return make_error<Failure>(Args: "Unsupported opcode: isCall/isReturn");
360
361	const std::vector<InstructionTemplate> InstructionVariants =
362	State.getExegesisTarget().generateInstructionVariants(
363	Instr, MaxConfigsPerOpcode);
364
365	SnippetGenerator::Options SnippetOptions;
366	SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
367	const std::unique_ptr<SnippetGenerator> Generator =
368	State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State,
369	Opts: SnippetOptions);
370	if (!Generator)
371	ExitWithError(Args: "cannot create snippet generator");
372
373	std::vector<BenchmarkCode> Benchmarks;
374	for (const InstructionTemplate &Variant : InstructionVariants) {
375	if (Benchmarks.size() >= MaxConfigsPerOpcode)
376	break;
377	if (auto Err = Generator ->generateConfigurations(Variant, Benchmarks,
378	ExtraForbiddenRegs: ForbiddenRegs))
379	return std::move(Err);
380	}
381	return Benchmarks;
382	}
383
384	static void runBenchmarkConfigurations(
385	const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
386	ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
387	const BenchmarkRunner &Runner) {
388	assert(!Configurations.empty() && "Don't have any configurations to run.");
389	std::optional<raw_fd_ostream> FileOstr;
390	if (BenchmarkFile != "-") {
391	int ResultFD = `0`;
392	// Create output file or open existing file and truncate it, once.
393	ExitOnErr (errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD,
394	Disp: sys::fs::CD_CreateAlways,
395	Flags: sys::fs::OF_TextWithCRLF)));
396	FileOstr.emplace(args&: ResultFD, args: true /shouldClose/);
397	}
398	raw_ostream &Ostr = FileOstr ? *FileOstr : outs();
399
400	std::optional<ProgressMeter<>> Meter;
401	if (BenchmarkMeasurementsPrintProgress)
402	Meter.emplace(args: Configurations.size());
403
404	SmallVector<unsigned, `2`> MinInstructionCounts = {MinInstructions};
405	if (RepetitionMode == Benchmark::MiddleHalfDuplicate \|\|
406	RepetitionMode == Benchmark::MiddleHalfLoop)
407	MinInstructionCounts.push_back(Elt: MinInstructions * `2`);
408
409	for (const BenchmarkCode &Conf : Configurations) {
410	ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &Meter : nullptr*);
411	SmallVector<Benchmark, `2`> AllResults;
412
413	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
414	Repetitors) {
415	for (unsigned IterationRepetitions : MinInstructionCounts) {
416	auto RC = ExitOnErr (Runner.getRunnableConfiguration(
417	Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor));
418	std::optional<StringRef> DumpFile;
419	if (DumpObjectToDisk.getNumOccurrences())
420	DumpFile = DumpObjectToDisk;
421	auto [Err, BenchmarkResult] =
422	Runner.runConfiguration(RC: std::move(RC), DumpFile);
423	if (Err) {
424	// Errors from executing the snippets are fine.
425	// All other errors are a framework issue and should fail.
426	if (!Err.isA<SnippetExecutionFailure>())
427	ExitOnErr (std::move(Err));
428
429	BenchmarkResult.Error = toString(E: std::move(Err));
430	}
431	AllResults.push_back(Elt: std::move(BenchmarkResult));
432	}
433	}
434
435	Benchmark &Result = AllResults.front();
436
437	// If any of our measurements failed, pretend they all have failed.
438	if (AllResults.size() > `1` &&
439	any_of(Range&: AllResults, P: [](const Benchmark &R) {
440	return R.Measurements.empty();
441	}))
442	Result.Measurements.clear();
443
444	std::unique_ptr<ResultAggregator> ResultAgg =
445	ResultAggregator::CreateAggregator(RepetitionMode);
446	ResultAgg ->AggregateResults(Result,
447	OtherResults: ArrayRef<Benchmark>(AllResults).drop_front());
448
449	// With dummy counters, measurements are rather meaningless,
450	// so drop them altogether.
451	if (UseDummyPerfCounters)
452	Result.Measurements.clear();
453
454	ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr));
455	}
456	}
457
458	void benchmarkMain() {
459	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
460	!UseDummyPerfCounters) {
461	#ifndef HAVE_LIBPFM
462	ExitWithError(
463	Args: "benchmarking unavailable, LLVM was built without libpfm. You can "
464	"pass --benchmark-phase=... to skip the actual benchmarking or "
465	"--use-dummy-perf-counters to not query the kernel for real event "
466	"counts.");
467	#else
468	if (pfm::pfmInitialize())
469	ExitWithError("cannot initialize libpfm");
470	#endif
471	}
472
473	InitializeAllExegesisTargets();
474	#define LLVM_EXEGESIS(TargetName) \
475	LLVMInitialize##TargetName##AsmPrinter(); \
476	LLVMInitialize##TargetName##AsmParser();
477	#include "llvm/Config/TargetExegesis.def"
478
479	const LLVMState State =
480	ExitOnErr (LLVMState::Create(TripleName, CpuName: MCPU, Features: "", UseDummyPerfCounters));
481
482	// Preliminary check to ensure features needed for requested
483	// benchmark mode are present on target CPU and/or OS.
484	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
485	ExitOnErr (State.getExegesisTarget().checkFeatureSupport());
486
487	if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&
488	UseDummyPerfCounters)
489	ExitWithError(Args: "Dummy perf counters are not supported in the subprocess "
490	"execution mode.");
491
492	const std::unique_ptr<BenchmarkRunner> Runner =
493	ExitOnErr (State.getExegesisTarget().createBenchmarkRunner(
494	Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,
495	BenchmarkRepeatCount, ValidationCounters, ResultAggMode));
496	if (!Runner) {
497	ExitWithError(Args: "cannot create benchmark runner");
498	}
499
500	const auto Opcodes = getOpcodesOrDie(State);
501	std::vector<BenchmarkCode> Configurations;
502
503	unsigned LoopRegister =
504	State.getExegesisTarget().getDefaultLoopCounterRegister(
505	State.getTargetMachine().getTargetTriple());
506
507	if (Opcodes.empty()) {
508	Configurations = ExitOnErr (readSnippets(State, Filename: SnippetsFile));
509	for (const auto &Configuration : Configurations) {
510	if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
511	(Configuration.Key.MemoryMappings.size() != `0` \|\|
512	Configuration.Key.MemoryValues.size() != `0` \|\|
513	Configuration.Key.SnippetAddress != `0`))
514	ExitWithError(Args: "Memory and snippet address annotations are only "
515	"supported in subprocess "
516	"execution mode");
517	}
518	LoopRegister = Configurations [`0`].Key.LoopRegister;
519	}
520
521	SmallVector<std::unique_ptr<const SnippetRepetitor>, `2`> Repetitors;
522	if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
523	Repetitors.emplace_back(
524	Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister));
525	else {
526	for (Benchmark::RepetitionModeE RepMode :
527	{Benchmark::RepetitionModeE::Duplicate,
528	Benchmark::RepetitionModeE::Loop})
529	Repetitors.emplace_back(
530	Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister));
531	}
532
533	BitVector AllReservedRegs;
534	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
535	AllReservedRegs \|= Repetitor ->getReservedRegs();
536
537	if (!Opcodes.empty()) {
538	for (const unsigned Opcode : Opcodes) {
539	// Ignore instructions without a sched class if
540	// -ignore-invalid-sched-class is passed.
541	if (IgnoreInvalidSchedClass &&
542	State.getInstrInfo().get(Opcode).getSchedClass() == `0`) {
543	errs() << State.getInstrInfo().getName(Opcode)
544	<< ": ignoring instruction without sched class\n";
545	continue;
546	}
547
548	auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs);
549	if (!ConfigsForInstr) {
550	logAllUnhandledErrors(
551	E: ConfigsForInstr.takeError(), OS&: errs(),
552	ErrorBanner: Twine (State.getInstrInfo().getName(Opcode)).concat(Suffix: ": "));
553	continue;
554	}
555	std::move(first: ConfigsForInstr ->begin(), last: ConfigsForInstr ->end(),
556	result: std::back_inserter(x&: Configurations));
557	}
558	}
559
560	if (MinInstructions == `0`) {
561	ExitOnErr.setBanner("llvm-exegesis: ");
562	ExitWithError(Args: "--min-instructions must be greater than zero");
563	}
564
565	// Write to standard output if file is not set.
566	if (BenchmarkFile.empty())
567	BenchmarkFile = "-";
568
569	if (!Configurations.empty())
570	runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner);
571
572	pfm::pfmTerminate();
573	}
574
575	// Prints the results of running analysis pass `Pass` to file `OutputFilename`
576	// if OutputFilename is non-empty.
577	template <typename Pass>
578	static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
579	const std::string &OutputFilename) {
580	if (OutputFilename.empty())
581	return;
582	if (OutputFilename != "-") {
583	errs() << "Printing " << Name << " results to file '" << OutputFilename
584	<< "'\n";
585	}
586	std::error_code ErrorCode;
587	raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
588	sys::fs::FA_Read \| sys::fs::FA_Write);
589	if (ErrorCode)
590	ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode));
591	if (auto Err = Analyzer.run<Pass>(ClustersOS))
592	ExitOnFileError(OutputFilename, std::move(Err));
593	}
594
595	static void filterPoints(MutableArrayRef<Benchmark> Points,
596	const MCInstrInfo &MCII) {
597	if (AnalysisSnippetFilter == BenchmarkFilter::All)
598	return;
599
600	bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem;
601	for (Benchmark &Point : Points) {
602	if (!Point.Error.empty())
603	continue;
604	if (WantPointsWithMemOps ==
605	any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) {
606	const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode());
607	return MCDesc.mayLoad() \|\| MCDesc.mayStore();
608	}))
609	continue;
610	Point.Error = "filtered out by user";
611	}
612	}
613
614	static void analysisMain() {
615	ExitOnErr.setBanner("llvm-exegesis: ");
616	if (BenchmarkFile.empty())
617	ExitWithError(Args: "--benchmarks-file must be set");
618
619	if (AnalysisClustersOutputFile.empty() &&
620	AnalysisInconsistenciesOutputFile.empty()) {
621	ExitWithError(
622	Args: "for --mode=analysis: At least one of --analysis-clusters-output-file "
623	"and --analysis-inconsistencies-output-file must be specified");
624	}
625
626	InitializeAllExegesisTargets();
627	#define LLVM_EXEGESIS(TargetName) \
628	LLVMInitialize##TargetName##AsmPrinter(); \
629	LLVMInitialize##TargetName##Disassembler();
630	#include "llvm/Config/TargetExegesis.def"
631
632	auto MemoryBuffer = ExitOnFileError(
633	FileName: BenchmarkFile,
634	E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /IsText=/true)));
635
636	const auto TriplesAndCpus = ExitOnFileError(
637	FileName: BenchmarkFile,
638	E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer));
639	if (TriplesAndCpus.empty()) {
640	errs() << "no benchmarks to analyze\n";
641	return;
642	}
643	if (TriplesAndCpus.size() > `1`) {
644	ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This "
645	"is unsupported.");
646	}
647	auto TripleAndCpu = *TriplesAndCpus.begin();
648	if (AnalysisOverrideBenchmarksTripleAndCpu) {
649	errs() << "overridding file CPU name (" << TripleAndCpu.CpuName
650	<< ") with provided tripled (" << TripleName << ") and CPU name ("
651	<< MCPU << ")\n";
652	TripleAndCpu.LLVMTriple = TripleName;
653	TripleAndCpu.CpuName = MCPU;
654	}
655	errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"
656	<< TripleAndCpu.CpuName << "'\n";
657
658	// Read benchmarks.
659	const LLVMState State = ExitOnErr (
660	LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName));
661	std::vector<Benchmark> Points = ExitOnFileError(
662	FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer));
663
664	outs() << "Parsed " << Points.size() << " benchmark points\n";
665	if (Points.empty()) {
666	errs() << "no benchmarks to analyze\n";
667	return;
668	}
669	// FIXME: Merge points from several runs (latency and uops).
670
671	filterPoints(Points, MCII: State.getInstrInfo());
672
673	const auto Clustering = ExitOnErr (BenchmarkClustering::create(
674	Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints,
675	AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(),
676	InstrInfo: &State.getInstrInfo()));
677
678	const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon,
679	AnalysisDisplayUnstableOpcodes);
680
681	maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters",
682	OutputFilename: AnalysisClustersOutputFile);
683	maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
684	Analyzer, Name: "sched class consistency analysis",
685	OutputFilename: AnalysisInconsistenciesOutputFile);
686	}
687
688	} // namespace exegesis
689	} // namespace llvm
690
691	int main(int Argc, char **Argv) {
692	using namespace llvm;
693
694	InitLLVM X(Argc, Argv);
695
696	// Initialize targets so we can print them when flag --version is specified.
697	#define LLVM_EXEGESIS(TargetName) \
698	LLVMInitialize##TargetName##Target(); \
699	LLVMInitialize##TargetName##TargetInfo(); \
700	LLVMInitialize##TargetName##TargetMC();
701	#include "llvm/Config/TargetExegesis.def"
702
703	// Register the Target and CPU printer for --version.
704	cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU);
705
706	// Enable printing of available targets when flag --version is specified.
707	cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion);
708
709	cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions,
710	&exegesis::AnalysisOptions});
711
712	cl::ParseCommandLineOptions(argc: Argc, argv: Argv,
713	Overview: "llvm host machine instruction characteristics "
714	"measurment and analysis.\n");
715
716	exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
717	if (Err.isA<exegesis::ClusteringError>())
718	return EXIT_SUCCESS;
719	return EXIT_FAILURE;
720	});
721
722	if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) {
723	exegesis::analysisMain();
724	} else {
725	exegesis::benchmarkMain();
726	}
727	return EXIT_SUCCESS;
728	}
729

Browse the source code of llvm_projects/llvm/tools/llvm-exegesis/llvm-exegesis.cpp