llvm-exegesis.cpp source code [llvm_projects/llvm/tools/llvm-exegesis/llvm-exegesis.cpp]

1	//===-- llvm-exegesis.cpp ---------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Measures execution properties (latencies/uops) of an instruction.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "lib/Analysis.h"
15	#include "lib/BenchmarkResult.h"
16	#include "lib/BenchmarkRunner.h"
17	#include "lib/Clustering.h"
18	#include "lib/CodeTemplate.h"
19	#include "lib/Error.h"
20	#include "lib/LlvmState.h"
21	#include "lib/PerfHelper.h"
22	#include "lib/ProgressMeter.h"
23	#include "lib/ResultAggregator.h"
24	#include "lib/SnippetFile.h"
25	#include "lib/SnippetRepetitor.h"
26	#include "lib/Target.h"
27	#include "lib/TargetSelect.h"
28	#include "lib/ValidationEvent.h"
29	#include "llvm/ADT/StringExtras.h"
30	#include "llvm/ADT/Twine.h"
31	#include "llvm/MC/MCInstBuilder.h"
32	#include "llvm/MC/MCObjectFileInfo.h"
33	#include "llvm/MC/MCParser/MCAsmParser.h"
34	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
35	#include "llvm/MC/MCRegisterInfo.h"
36	#include "llvm/MC/MCSubtargetInfo.h"
37	#include "llvm/MC/TargetRegistry.h"
38	#include "llvm/Object/ObjectFile.h"
39	#include "llvm/Support/CommandLine.h"
40	#include "llvm/Support/FileSystem.h"
41	#include "llvm/Support/Format.h"
42	#include "llvm/Support/InitLLVM.h"
43	#include "llvm/Support/Path.h"
44	#include "llvm/Support/SourceMgr.h"
45	#include "llvm/Support/TargetSelect.h"
46	#include "llvm/TargetParser/Host.h"
47	#include <algorithm>
48	#include <string>
49
50	namespace llvm {
51	namespace exegesis {
52
53	static cl::opt<int> OpcodeIndex(
54	"opcode-index",
55	cl::desc ("opcode to measure, by index, or -1 to measure all opcodes"),
56	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
57
58	static cl::opt<std::string>
59	OpcodeNames("opcode-name",
60	cl::desc ("comma-separated list of opcodes to measure, by name"),
61	cl::cat (BenchmarkOptions), cl::init(Val: ""));
62
63	static cl::opt<std::string> SnippetsFile("snippets-file",
64	cl::desc ("code snippets to measure"),
65	cl::cat (BenchmarkOptions),
66	cl::init(Val: ""));
67
68	static cl::opt<std::string>
69	BenchmarkFile("benchmarks-file",
70	cl::desc ("File to read (analysis mode) or write "
71	"(latency/uops/inverse_throughput modes) benchmark "
72	"results. “-” uses stdin/stdout."),
73	cl::cat (Options), cl::init(Val: ""));
74
75	static cl::opt<Benchmark::ModeE> BenchmarkMode(
76	"mode", cl::desc ("the mode to run"), cl::cat (Options),
77	cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),
78	clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",
79	"Instruction Inverse Throughput"),
80	clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),
81	// When not asking for a specific benchmark mode,
82	// we'll analyse the results.
83	clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));
84
85	static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(
86	"result-aggregation-mode", cl::desc ("How to aggregate multi-values result"),
87	cl::cat (BenchmarkOptions),
88	cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),
89	clEnumValN(Benchmark::Max, "max", "Keep max reading"),
90	clEnumValN(Benchmark::Mean, "mean",
91	"Compute mean of all readings"),
92	clEnumValN(Benchmark::MinVariance, "min-variance",
93	"Keep readings set with min-variance")),
94	cl::init(Val: Benchmark::Min));
95
96	static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(
97	"repetition-mode", cl::desc ("how to repeat the instruction snippet"),
98	cl::cat (BenchmarkOptions),
99	cl::values(
100	clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),
101	clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),
102	clEnumValN(Benchmark::AggregateMin, "min",
103	"All of the above and take the minimum of measurements"),
104	clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",
105	"Middle half duplicate mode"),
106	clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",
107	"Middle half loop mode")),
108	cl::init(Val: Benchmark::Duplicate));
109
110	static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
111	"measurements-print-progress",
112	cl::desc ("Produce progress indicator when performing measurements"),
113	cl::cat (BenchmarkOptions), cl::init(Val: false));
114
115	static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
116	"benchmark-phase",
117	cl::desc (
118	"it is possible to stop the benchmarking process after some phase"),
119	cl::cat (BenchmarkOptions),
120	cl::values(
121	clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
122	"Only generate the minimal instruction sequence"),
123	clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
124	"prepare-and-assemble-snippet",
125	"Same as prepare-snippet, but also dumps an excerpt of the "
126	"sequence (hex encoded)"),
127	clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,
128	"assemble-measured-code",
129	"Same as prepare-and-assemble-snippet, but also creates the "
130	"full sequence "
131	"that can be dumped to a file using --dump-object-to-disk"),
132	clEnumValN(
133	BenchmarkPhaseSelectorE::Measure, "measure",
134	"Same as prepare-measured-code, but also runs the measurement "
135	"(default)")),
136	cl::init(Val: BenchmarkPhaseSelectorE::Measure));
137
138	static cl::opt<bool>
139	UseDummyPerfCounters("use-dummy-perf-counters",
140	cl::desc ("Do not read real performance counters, use "
141	"dummy values (for testing)"),
142	cl::cat (BenchmarkOptions), cl::init(Val: false));
143
144	static cl::opt<unsigned>
145	MinInstructions("min-instructions",
146	cl::desc ("The minimum number of instructions that should "
147	"be included in the snippet"),
148	cl::cat (BenchmarkOptions), cl::init(Val: `10000`));
149
150	static cl::opt<unsigned>
151	LoopBodySize("loop-body-size",
152	cl::desc ("when repeating the instruction snippet by looping "
153	"over it, duplicate the snippet until the loop body "
154	"contains at least this many instruction"),
155	cl::cat (BenchmarkOptions), cl::init(Val: `0`));
156
157	static cl::opt<unsigned> MaxConfigsPerOpcode(
158	"max-configs-per-opcode",
159	cl::desc (
160	"allow to snippet generator to generate at most that many configs"),
161	cl::cat (BenchmarkOptions), cl::init(Val: `1`));
162
163	static cl::opt<bool> IgnoreInvalidSchedClass(
164	"ignore-invalid-sched-class",
165	cl::desc ("ignore instructions that do not define a sched class"),
166	cl::cat (BenchmarkOptions), cl::init(Val: false));
167
168	static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(
169	"analysis-filter", cl::desc ("Filter the benchmarks before analysing them"),
170	cl::cat (BenchmarkOptions),
171	cl::values(
172	clEnumValN(BenchmarkFilter::All, "all",
173	"Keep all benchmarks (default)"),
174	clEnumValN(BenchmarkFilter::RegOnly, "reg-only",
175	"Keep only those benchmarks that do NOT involve memory"),
176	clEnumValN(BenchmarkFilter::WithMem, "mem-only",
177	"Keep only the benchmarks that DO involve memory")),
178	cl::init(Val: BenchmarkFilter::All));
179
180	static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(
181	"analysis-clustering", cl::desc ("the clustering algorithm to use"),
182	cl::cat (AnalysisOptions),
183	cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",
184	"use DBSCAN/OPTICS algorithm"),
185	clEnumValN(BenchmarkClustering::Naive, "naive",
186	"one cluster per opcode")),
187	cl::init(Val: BenchmarkClustering::Dbscan));
188
189	static cl::opt<unsigned> AnalysisDbscanNumPoints(
190	"analysis-numpoints",
191	cl::desc ("minimum number of points in an analysis cluster (dbscan only)"),
192	cl::cat (AnalysisOptions), cl::init(Val: `3`));
193
194	static cl::opt<float> AnalysisClusteringEpsilon(
195	"analysis-clustering-epsilon",
196	cl::desc ("epsilon for benchmark point clustering"),
197	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
198
199	static cl::opt<float> AnalysisInconsistencyEpsilon(
200	"analysis-inconsistency-epsilon",
201	cl::desc ("epsilon for detection of when the cluster is different from the "
202	"LLVM schedule profile values"),
203	cl::cat (AnalysisOptions), cl::init(Val: `0.1`));
204
205	static cl::opt<std::string>
206	AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc (""),
207	cl::cat (AnalysisOptions), cl::init(Val: ""));
208	static cl::opt<std::string>
209	AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
210	cl::desc (""), cl::cat (AnalysisOptions),
211	cl::init(Val: ""));
212
213	static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
214	"analysis-display-unstable-clusters",
215	cl::desc ("if there is more than one benchmark for an opcode, said "
216	"benchmarks may end up not being clustered into the same cluster "
217	"if the measured performance characteristics are different. by "
218	"default all such opcodes are filtered out. this flag will "
219	"instead show only such unstable opcodes"),
220	cl::cat (AnalysisOptions), cl::init(Val: false));
221
222	static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
223	"analysis-override-benchmark-triple-and-cpu",
224	cl::desc ("By default, we analyze the benchmarks for the triple/CPU they "
225	"were measured for, but if you want to analyze them for some "
226	"other combination (specified via -mtriple/-mcpu), you can "
227	"pass this flag."),
228	cl::cat (AnalysisOptions), cl::init(Val: false));
229
230	static cl::opt<std::string>
231	TripleName("mtriple",
232	cl::desc ("Target triple. See -version for available targets"),
233	cl::cat (Options));
234
235	static cl::opt<std::string>
236	MCPU("mcpu",
237	cl::desc ("Target a specific cpu type (-mcpu=help for details)"),
238	cl::value_desc ("cpu-name"), cl::cat (Options), cl::init(Val: "native"));
239
240	static cl::opt<std::string>
241	DumpObjectToDisk("dump-object-to-disk",
242	cl::desc ("dumps the generated benchmark object to disk "
243	"and prints a message to access it"),
244	cl::ValueOptional, cl::cat (BenchmarkOptions));
245
246	static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(
247	"execution-mode",
248	cl::desc ("Selects the execution mode to use for running snippets"),
249	cl::cat (BenchmarkOptions),
250	cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,
251	"inprocess",
252	"Executes the snippets within the same process"),
253	clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,
254	"subprocess",
255	"Spawns a subprocess for each snippet execution, "
256	"allows for the use of memory annotations")),
257	cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess));
258
259	static cl::opt<unsigned> BenchmarkRepeatCount(
260	"benchmark-repeat-count",
261	cl::desc ("The number of times to repeat measurements on the benchmark k "
262	"before aggregating the results"),
263	cl::cat (BenchmarkOptions), cl::init(Val: `30`));
264
265	static cl::list<ValidationEvent> ValidationCounters(
266	"validation-counter",
267	cl::desc (
268	"The name of a validation counter to run concurrently with the main "
269	"counter to validate benchmarking assumptions"),
270	cl::CommaSeparated, cl::cat (BenchmarkOptions), ValidationEventOptions ());
271
272	static cl::opt<int> BenchmarkProcessCPU(
273	"benchmark-process-cpu",
274	cl::desc ("The CPU number that the benchmarking process should executon on"),
275	cl::cat (BenchmarkOptions), cl::init(Val: -`1`));
276
277	static cl::opt<std::string> MAttr(
278	"mattr", cl::desc ("comma-separated list of target architecture features"),
279	cl::value_desc ("+feature1,-feature2,..."), cl::cat (Options), cl::init(Val: ""));
280
281	static ExitOnError ExitOnErr("llvm-exegesis error: ");
282
283	// Helper function that logs the error(s) and exits.
284	template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {
285	ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));
286	}
287
288	// Check Err. If it's in a failure state log the file error(s) and exit.
289	static void ExitOnFileError(const Twine &FileName, Error Err) {
290	if (Err) {
291	ExitOnErr (createFileError(F: FileName, E: std::move(Err)));
292	}
293	}
294
295	// Check E. If it's in a success state then return the contained value.
296	// If it's in a failure state log the file error(s) and exit.
297	template <typename T>
298	T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
299	ExitOnFileError(FileName, E.takeError());
300	return std::move(*E);
301	}
302
303	// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
304	// and returns the opcode indices or {} if snippets should be read from
305	// `SnippetsFile`.
306	static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
307	const size_t NumSetFlags = (OpcodeNames.empty() ? `0` : `1`) +
308	(OpcodeIndex == `0` ? `0` : `1`) +
309	(SnippetsFile.empty() ? `0` : `1`);
310	const auto &ET = State.getExegesisTarget();
311	const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits();
312
313	if (NumSetFlags != `1`) {
314	ExitOnErr.setBanner("llvm-exegesis: ");
315	ExitWithError(Args: "please provide one and only one of 'opcode-index', "
316	"'opcode-name' or 'snippets-file'");
317	}
318	if (!SnippetsFile.empty())
319	return {};
320	if (OpcodeIndex > `0`)
321	return {static_cast<unsigned>(OpcodeIndex)};
322	if (OpcodeIndex < `0`) {
323	std::vector<unsigned> Result;
324	unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes();
325	Result.reserve(n: NumOpcodes);
326	for (unsigned I = `0`, E = NumOpcodes; I < E; ++I) {
327	if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures))
328	continue;
329	Result.push_back(x: I);
330	}
331	return Result;
332	}
333	// Resolve opcode name -> opcode.
334	const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned {
335	const auto &Map = State.getOpcodeNameToOpcodeIdxMapping();
336	auto I = Map.find(Val: OpcodeName);
337	if (I != Map.end())
338	return I ->getSecond();
339	return `0u`;
340	};
341
342	SmallVector<StringRef, `2`> Pieces;
343	StringRef (OpcodeNames.getValue())
344	.split(A&: Pieces, Separator: ",", / MaxSplit / -`1`, / KeepEmpty / false);
345	std::vector<unsigned> Result;
346	Result.reserve(n: Pieces.size());
347	for (const StringRef &OpcodeName : Pieces) {
348	if (unsigned Opcode = ResolveName (OpcodeName))
349	Result.push_back(x: Opcode);
350	else
351	ExitWithError(Args: Twine ("unknown opcode ").concat(Suffix: OpcodeName));
352	}
353	return Result;
354	}
355
356	// Generates code snippets for opcode `Opcode`.
357	static Expected<std::vector<BenchmarkCode>>
358	generateSnippets(const LLVMState &State, unsigned Opcode,
359	const BitVector &ForbiddenRegs) {
360	// Ignore instructions that we cannot run.
361	if (const char *Reason =
362	State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode))
363	return make_error<Failure>(Args&: Reason);
364
365	const Instruction &Instr = State.getIC().getInstr(Opcode);
366	const std::vector<InstructionTemplate> InstructionVariants =
367	State.getExegesisTarget().generateInstructionVariants(
368	Instr, MaxConfigsPerOpcode);
369
370	SnippetGenerator::Options SnippetOptions;
371	SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
372	const std::unique_ptr<SnippetGenerator> Generator =
373	State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State,
374	Opts: SnippetOptions);
375	if (!Generator)
376	ExitWithError(Args: "cannot create snippet generator");
377
378	std::vector<BenchmarkCode> Benchmarks;
379	for (const InstructionTemplate &Variant : InstructionVariants) {
380	if (Benchmarks.size() >= MaxConfigsPerOpcode)
381	break;
382	if (auto Err = Generator ->generateConfigurations(Variant, Benchmarks,
383	ExtraForbiddenRegs: ForbiddenRegs))
384	return std::move(Err);
385	}
386	return Benchmarks;
387	}
388
389	static void runBenchmarkConfigurations(
390	const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
391	ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
392	const BenchmarkRunner &Runner) {
393	assert(!Configurations.empty() && "Don't have any configurations to run.");
394	std::optional<raw_fd_ostream> FileOstr;
395	if (BenchmarkFile != "-") {
396	int ResultFD = `0`;
397	// Create output file or open existing file and truncate it, once.
398	ExitOnErr (errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD,
399	Disp: sys::fs::CD_CreateAlways,
400	Flags: sys::fs::OF_TextWithCRLF)));
401	FileOstr.emplace(args&: ResultFD, args: true /shouldClose/);
402	}
403	raw_ostream &Ostr = FileOstr ? *FileOstr : outs();
404
405	std::optional<ProgressMeter<>> Meter;
406	if (BenchmarkMeasurementsPrintProgress)
407	Meter.emplace(args: Configurations.size());
408
409	SmallVector<unsigned, `2`> MinInstructionCounts = {MinInstructions};
410	if (RepetitionMode == Benchmark::MiddleHalfDuplicate \|\|
411	RepetitionMode == Benchmark::MiddleHalfLoop)
412	MinInstructionCounts.push_back(Elt: MinInstructions * `2`);
413
414	for (const BenchmarkCode &Conf : Configurations) {
415	ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &Meter : nullptr*);
416	SmallVector<Benchmark, `2`> AllResults;
417
418	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
419	Repetitors) {
420	for (unsigned IterationRepetitions : MinInstructionCounts) {
421	auto RC = ExitOnErr (Runner.getRunnableConfiguration(
422	Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor));
423	std::optional<StringRef> DumpFile;
424	if (DumpObjectToDisk.getNumOccurrences())
425	DumpFile = DumpObjectToDisk;
426	const std::optional<int> BenchmarkCPU =
427	BenchmarkProcessCPU == -`1`
428	? std::nullopt
429	: std::optional(BenchmarkProcessCPU.getValue());
430	auto [Err, BenchmarkResult] =
431	Runner.runConfiguration(RC: std::move(RC), DumpFile, BenchmarkProcessCPU: BenchmarkCPU);
432	if (Err) {
433	// Errors from executing the snippets are fine.
434	// All other errors are a framework issue and should fail.
435	if (!Err.isA<SnippetExecutionFailure>())
436	ExitOnErr (std::move(Err));
437
438	BenchmarkResult.Error = toString(E: std::move(Err));
439	}
440	AllResults.push_back(Elt: std::move(BenchmarkResult));
441	}
442	}
443
444	Benchmark &Result = AllResults.front();
445
446	// If any of our measurements failed, pretend they all have failed.
447	if (AllResults.size() > `1` &&
448	any_of(Range&: AllResults, P: [](const Benchmark &R) {
449	return R.Measurements.empty();
450	}))
451	Result.Measurements.clear();
452
453	std::unique_ptr<ResultAggregator> ResultAgg =
454	ResultAggregator::CreateAggregator(RepetitionMode);
455	ResultAgg ->AggregateResults(Result,
456	OtherResults: ArrayRef<Benchmark>(AllResults).drop_front());
457
458	// With dummy counters, measurements are rather meaningless,
459	// so drop them altogether.
460	if (UseDummyPerfCounters)
461	Result.Measurements.clear();
462
463	ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr));
464	}
465	}
466
467	void benchmarkMain() {
468	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
469	!UseDummyPerfCounters) {
470	#ifndef HAVE_LIBPFM
471	ExitWithError(
472	Args: "benchmarking unavailable, LLVM was built without libpfm. You can "
473	"pass --benchmark-phase=... to skip the actual benchmarking or "
474	"--use-dummy-perf-counters to not query the kernel for real event "
475	"counts.");
476	#else
477	if (pfm::pfmInitialize())
478	ExitWithError("cannot initialize libpfm");
479	#endif
480	}
481
482	InitializeAllExegesisTargets();
483	#define LLVM_EXEGESIS(TargetName) \
484	LLVMInitialize##TargetName##AsmPrinter(); \
485	LLVMInitialize##TargetName##AsmParser();
486	#include "llvm/Config/TargetExegesis.def"
487
488	const LLVMState State = ExitOnErr (
489	LLVMState::Create(TripleName, CpuName: MCPU, Features: MAttr, UseDummyPerfCounters));
490
491	// Preliminary check to ensure features needed for requested
492	// benchmark mode are present on target CPU and/or OS.
493	if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
494	ExitOnErr (State.getExegesisTarget().checkFeatureSupport());
495
496	if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&
497	UseDummyPerfCounters)
498	ExitWithError(Args: "Dummy perf counters are not supported in the subprocess "
499	"execution mode.");
500
501	const std::unique_ptr<BenchmarkRunner> Runner =
502	ExitOnErr (State.getExegesisTarget().createBenchmarkRunner(
503	Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,
504	BenchmarkRepeatCount, ValidationCounters, ResultAggMode));
505	if (!Runner) {
506	ExitWithError(Args: "cannot create benchmark runner");
507	}
508
509	const auto Opcodes = getOpcodesOrDie(State);
510	std::vector<BenchmarkCode> Configurations;
511
512	MCRegister LoopRegister =
513	State.getExegesisTarget().getDefaultLoopCounterRegister(
514	State.getTargetMachine().getTargetTriple());
515
516	if (Opcodes.empty()) {
517	Configurations = ExitOnErr (readSnippets(State, Filename: SnippetsFile));
518	for (const auto &Configuration : Configurations) {
519	if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
520	(Configuration.Key.MemoryMappings.size() != `0` \|\|
521	Configuration.Key.MemoryValues.size() != `0` \|\|
522	Configuration.Key.SnippetAddress != `0`))
523	ExitWithError(Args: "Memory and snippet address annotations are only "
524	"supported in subprocess "
525	"execution mode");
526	}
527	LoopRegister = Configurations [`0`].Key.LoopRegister;
528	}
529
530	SmallVector<std::unique_ptr<const SnippetRepetitor>, `2`> Repetitors;
531	if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
532	Repetitors.emplace_back(
533	Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister));
534	else {
535	for (Benchmark::RepetitionModeE RepMode :
536	{Benchmark::RepetitionModeE::Duplicate,
537	Benchmark::RepetitionModeE::Loop})
538	Repetitors.emplace_back(
539	Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister));
540	}
541
542	BitVector AllReservedRegs;
543	for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
544	AllReservedRegs \|= Repetitor ->getReservedRegs();
545
546	if (!Opcodes.empty()) {
547	for (const unsigned Opcode : Opcodes) {
548	// Ignore instructions without a sched class if
549	// -ignore-invalid-sched-class is passed.
550	if (IgnoreInvalidSchedClass &&
551	State.getInstrInfo().get(Opcode).getSchedClass() == `0`) {
552	errs() << State.getInstrInfo().getName(Opcode)
553	<< ": ignoring instruction without sched class\n";
554	continue;
555	}
556
557	auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs);
558	if (!ConfigsForInstr) {
559	logAllUnhandledErrors(
560	E: ConfigsForInstr.takeError(), OS&: errs(),
561	ErrorBanner: Twine (State.getInstrInfo().getName(Opcode)).concat(Suffix: ": "));
562	continue;
563	}
564	std::move(first: ConfigsForInstr ->begin(), last: ConfigsForInstr ->end(),
565	result: std::back_inserter(x&: Configurations));
566	}
567	}
568
569	if (MinInstructions == `0`) {
570	ExitOnErr.setBanner("llvm-exegesis: ");
571	ExitWithError(Args: "--min-instructions must be greater than zero");
572	}
573
574	// Write to standard output if file is not set.
575	if (BenchmarkFile.empty())
576	BenchmarkFile = "-";
577
578	if (!Configurations.empty())
579	runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner);
580
581	pfm::pfmTerminate();
582	}
583
584	// Prints the results of running analysis pass `Pass` to file `OutputFilename`
585	// if OutputFilename is non-empty.
586	template <typename Pass>
587	static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
588	const std::string &OutputFilename) {
589	if (OutputFilename.empty())
590	return;
591	if (OutputFilename != "-") {
592	errs() << "Printing " << Name << " results to file '" << OutputFilename
593	<< "'\n";
594	}
595	std::error_code ErrorCode;
596	raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
597	sys::fs::FA_Read \| sys::fs::FA_Write);
598	if (ErrorCode)
599	ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode));
600	if (auto Err = Analyzer.run<Pass>(ClustersOS))
601	ExitOnFileError(OutputFilename, std::move(Err));
602	}
603
604	static void filterPoints(MutableArrayRef<Benchmark> Points,
605	const MCInstrInfo &MCII) {
606	if (AnalysisSnippetFilter == BenchmarkFilter::All)
607	return;
608
609	bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem;
610	for (Benchmark &Point : Points) {
611	if (!Point.Error.empty())
612	continue;
613	if (WantPointsWithMemOps ==
614	any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) {
615	const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode());
616	return MCDesc.mayLoad() \|\| MCDesc.mayStore();
617	}))
618	continue;
619	Point.Error = "filtered out by user";
620	}
621	}
622
623	static void analysisMain() {
624	ExitOnErr.setBanner("llvm-exegesis: ");
625	if (BenchmarkFile.empty())
626	ExitWithError(Args: "--benchmarks-file must be set");
627
628	if (AnalysisClustersOutputFile.empty() &&
629	AnalysisInconsistenciesOutputFile.empty()) {
630	ExitWithError(
631	Args: "for --mode=analysis: At least one of --analysis-clusters-output-file "
632	"and --analysis-inconsistencies-output-file must be specified");
633	}
634
635	InitializeAllExegesisTargets();
636	#define LLVM_EXEGESIS(TargetName) \
637	LLVMInitialize##TargetName##AsmPrinter(); \
638	LLVMInitialize##TargetName##Disassembler();
639	#include "llvm/Config/TargetExegesis.def"
640
641	auto MemoryBuffer = ExitOnFileError(
642	FileName: BenchmarkFile,
643	E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /IsText=/true)));
644
645	const auto TriplesAndCpus = ExitOnFileError(
646	FileName: BenchmarkFile,
647	E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer));
648	if (TriplesAndCpus.empty()) {
649	errs() << "no benchmarks to analyze\n";
650	return;
651	}
652	if (TriplesAndCpus.size() > `1`) {
653	ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This "
654	"is unsupported.");
655	}
656	auto TripleAndCpu = *TriplesAndCpus.begin();
657	if (AnalysisOverrideBenchmarksTripleAndCpu) {
658	errs() << "overridding file CPU name (" << TripleAndCpu.CpuName
659	<< ") with provided tripled (" << TripleName << ") and CPU name ("
660	<< MCPU << ")\n";
661	TripleAndCpu.LLVMTriple = TripleName;
662	TripleAndCpu.CpuName = MCPU;
663	}
664	errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"
665	<< TripleAndCpu.CpuName << "'\n";
666
667	// Read benchmarks.
668	const LLVMState State = ExitOnErr (
669	LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName));
670	std::vector<Benchmark> Points = ExitOnFileError(
671	FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer));
672
673	outs() << "Parsed " << Points.size() << " benchmark points\n";
674	if (Points.empty()) {
675	errs() << "no benchmarks to analyze\n";
676	return;
677	}
678	// FIXME: Merge points from several runs (latency and uops).
679
680	filterPoints(Points, MCII: State.getInstrInfo());
681
682	const auto Clustering = ExitOnErr (BenchmarkClustering::create(
683	Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints,
684	AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(),
685	InstrInfo: &State.getInstrInfo()));
686
687	const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon,
688	AnalysisDisplayUnstableOpcodes);
689
690	maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters",
691	OutputFilename: AnalysisClustersOutputFile);
692	maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
693	Analyzer, Name: "sched class consistency analysis",
694	OutputFilename: AnalysisInconsistenciesOutputFile);
695	}
696
697	} // namespace exegesis
698	} // namespace llvm
699
700	int main(int Argc, char **Argv) {
701	using namespace llvm;
702
703	InitLLVM X(Argc, Argv);
704
705	// Initialize targets so we can print them when flag --version is specified.
706	#define LLVM_EXEGESIS(TargetName) \
707	LLVMInitialize##TargetName##Target(); \
708	LLVMInitialize##TargetName##TargetInfo(); \
709	LLVMInitialize##TargetName##TargetMC();
710	#include "llvm/Config/TargetExegesis.def"
711
712	// Register the Target and CPU printer for --version.
713	cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU);
714
715	// Enable printing of available targets when flag --version is specified.
716	cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion);
717
718	cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions,
719	&exegesis::AnalysisOptions});
720
721	cl::ParseCommandLineOptions(argc: Argc, argv: Argv,
722	Overview: "llvm host machine instruction characteristics "
723	"measurment and analysis.\n");
724
725	exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
726	if (Err.isA<exegesis::ClusteringError>())
727	return EXIT_SUCCESS;
728	return EXIT_FAILURE;
729	});
730
731	if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) {
732	exegesis::analysisMain();
733	} else {
734	exegesis::benchmarkMain();
735	}
736	return EXIT_SUCCESS;
737	}
738

Browse the source code of llvm_projects/llvm/tools/llvm-exegesis/llvm-exegesis.cpp