| 1 | //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// Measures execution properties (latencies/uops) of an instruction. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "lib/Analysis.h" |
| 15 | #include "lib/BenchmarkResult.h" |
| 16 | #include "lib/BenchmarkRunner.h" |
| 17 | #include "lib/Clustering.h" |
| 18 | #include "lib/CodeTemplate.h" |
| 19 | #include "lib/Error.h" |
| 20 | #include "lib/LlvmState.h" |
| 21 | #include "lib/PerfHelper.h" |
| 22 | #include "lib/ProgressMeter.h" |
| 23 | #include "lib/ResultAggregator.h" |
| 24 | #include "lib/SnippetFile.h" |
| 25 | #include "lib/SnippetRepetitor.h" |
| 26 | #include "lib/Target.h" |
| 27 | #include "lib/TargetSelect.h" |
| 28 | #include "lib/ValidationEvent.h" |
| 29 | #include "llvm/ADT/StringExtras.h" |
| 30 | #include "llvm/ADT/Twine.h" |
| 31 | #include "llvm/MC/MCInstBuilder.h" |
| 32 | #include "llvm/MC/MCObjectFileInfo.h" |
| 33 | #include "llvm/MC/MCParser/MCAsmParser.h" |
| 34 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
| 35 | #include "llvm/MC/MCRegisterInfo.h" |
| 36 | #include "llvm/MC/MCSubtargetInfo.h" |
| 37 | #include "llvm/MC/TargetRegistry.h" |
| 38 | #include "llvm/Object/ObjectFile.h" |
| 39 | #include "llvm/Support/CommandLine.h" |
| 40 | #include "llvm/Support/FileSystem.h" |
| 41 | #include "llvm/Support/Format.h" |
| 42 | #include "llvm/Support/InitLLVM.h" |
| 43 | #include "llvm/Support/Path.h" |
| 44 | #include "llvm/Support/SourceMgr.h" |
| 45 | #include "llvm/Support/TargetSelect.h" |
| 46 | #include "llvm/TargetParser/Host.h" |
| 47 | #include <algorithm> |
| 48 | #include <string> |
| 49 | |
| 50 | namespace llvm { |
| 51 | namespace exegesis { |
| 52 | |
| 53 | static cl::opt<int> OpcodeIndex( |
| 54 | "opcode-index" , |
| 55 | cl::desc("opcode to measure, by index, or -1 to measure all opcodes" ), |
| 56 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
| 57 | |
| 58 | static cl::opt<std::string> |
| 59 | OpcodeNames("opcode-name" , |
| 60 | cl::desc("comma-separated list of opcodes to measure, by name" ), |
| 61 | cl::cat(BenchmarkOptions), cl::init(Val: "" )); |
| 62 | |
| 63 | static cl::opt<std::string> SnippetsFile("snippets-file" , |
| 64 | cl::desc("code snippets to measure" ), |
| 65 | cl::cat(BenchmarkOptions), |
| 66 | cl::init(Val: "" )); |
| 67 | |
| 68 | static cl::opt<std::string> |
| 69 | BenchmarkFile("benchmarks-file" , |
| 70 | cl::desc("File to read (analysis mode) or write " |
| 71 | "(latency/uops/inverse_throughput modes) benchmark " |
| 72 | "results. “-” uses stdin/stdout." ), |
| 73 | cl::cat(Options), cl::init(Val: "" )); |
| 74 | |
| 75 | static cl::opt<Benchmark::ModeE> BenchmarkMode( |
| 76 | "mode" , cl::desc("the mode to run" ), cl::cat(Options), |
| 77 | cl::values(clEnumValN(Benchmark::Latency, "latency" , "Instruction Latency" ), |
| 78 | clEnumValN(Benchmark::InverseThroughput, "inverse_throughput" , |
| 79 | "Instruction Inverse Throughput" ), |
| 80 | clEnumValN(Benchmark::Uops, "uops" , "Uop Decomposition" ), |
| 81 | // When not asking for a specific benchmark mode, |
| 82 | // we'll analyse the results. |
| 83 | clEnumValN(Benchmark::Unknown, "analysis" , "Analysis" ))); |
| 84 | |
| 85 | static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode( |
| 86 | "result-aggregation-mode" , cl::desc("How to aggregate multi-values result" ), |
| 87 | cl::cat(BenchmarkOptions), |
| 88 | cl::values(clEnumValN(Benchmark::Min, "min" , "Keep min reading" ), |
| 89 | clEnumValN(Benchmark::Max, "max" , "Keep max reading" ), |
| 90 | clEnumValN(Benchmark::Mean, "mean" , |
| 91 | "Compute mean of all readings" ), |
| 92 | clEnumValN(Benchmark::MinVariance, "min-variance" , |
| 93 | "Keep readings set with min-variance" )), |
| 94 | cl::init(Val: Benchmark::Min)); |
| 95 | |
| 96 | static cl::opt<Benchmark::RepetitionModeE> RepetitionMode( |
| 97 | "repetition-mode" , cl::desc("how to repeat the instruction snippet" ), |
| 98 | cl::cat(BenchmarkOptions), |
| 99 | cl::values( |
| 100 | clEnumValN(Benchmark::Duplicate, "duplicate" , "Duplicate the snippet" ), |
| 101 | clEnumValN(Benchmark::Loop, "loop" , "Loop over the snippet" ), |
| 102 | clEnumValN(Benchmark::AggregateMin, "min" , |
| 103 | "All of the above and take the minimum of measurements" ), |
| 104 | clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate" , |
| 105 | "Middle half duplicate mode" ), |
| 106 | clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop" , |
| 107 | "Middle half loop mode" )), |
| 108 | cl::init(Val: Benchmark::Duplicate)); |
| 109 | |
| 110 | static cl::opt<bool> BenchmarkMeasurementsPrintProgress( |
| 111 | "measurements-print-progress" , |
| 112 | cl::desc("Produce progress indicator when performing measurements" ), |
| 113 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
| 114 | |
| 115 | static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector( |
| 116 | "benchmark-phase" , |
| 117 | cl::desc( |
| 118 | "it is possible to stop the benchmarking process after some phase" ), |
| 119 | cl::cat(BenchmarkOptions), |
| 120 | cl::values( |
| 121 | clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet" , |
| 122 | "Only generate the minimal instruction sequence" ), |
| 123 | clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet, |
| 124 | "prepare-and-assemble-snippet" , |
| 125 | "Same as prepare-snippet, but also dumps an excerpt of the " |
| 126 | "sequence (hex encoded)" ), |
| 127 | clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode, |
| 128 | "assemble-measured-code" , |
| 129 | "Same as prepare-and-assemble-snippet, but also creates the " |
| 130 | "full sequence " |
| 131 | "that can be dumped to a file using --dump-object-to-disk" ), |
| 132 | clEnumValN( |
| 133 | BenchmarkPhaseSelectorE::Measure, "measure" , |
| 134 | "Same as prepare-measured-code, but also runs the measurement " |
| 135 | "(default)" )), |
| 136 | cl::init(Val: BenchmarkPhaseSelectorE::Measure)); |
| 137 | |
| 138 | static cl::opt<bool> |
| 139 | UseDummyPerfCounters("use-dummy-perf-counters" , |
| 140 | cl::desc("Do not read real performance counters, use " |
| 141 | "dummy values (for testing)" ), |
| 142 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
| 143 | |
| 144 | static cl::opt<unsigned> |
| 145 | MinInstructions("min-instructions" , |
| 146 | cl::desc("The minimum number of instructions that should " |
| 147 | "be included in the snippet" ), |
| 148 | cl::cat(BenchmarkOptions), cl::init(Val: 10000)); |
| 149 | |
| 150 | static cl::opt<unsigned> |
| 151 | LoopBodySize("loop-body-size" , |
| 152 | cl::desc("when repeating the instruction snippet by looping " |
| 153 | "over it, duplicate the snippet until the loop body " |
| 154 | "contains at least this many instruction" ), |
| 155 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
| 156 | |
| 157 | static cl::opt<unsigned> MaxConfigsPerOpcode( |
| 158 | "max-configs-per-opcode" , |
| 159 | cl::desc( |
| 160 | "allow to snippet generator to generate at most that many configs" ), |
| 161 | cl::cat(BenchmarkOptions), cl::init(Val: 1)); |
| 162 | |
| 163 | static cl::opt<bool> IgnoreInvalidSchedClass( |
| 164 | "ignore-invalid-sched-class" , |
| 165 | cl::desc("ignore instructions that do not define a sched class" ), |
| 166 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
| 167 | |
| 168 | static cl::opt<BenchmarkFilter> AnalysisSnippetFilter( |
| 169 | "analysis-filter" , cl::desc("Filter the benchmarks before analysing them" ), |
| 170 | cl::cat(BenchmarkOptions), |
| 171 | cl::values( |
| 172 | clEnumValN(BenchmarkFilter::All, "all" , |
| 173 | "Keep all benchmarks (default)" ), |
| 174 | clEnumValN(BenchmarkFilter::RegOnly, "reg-only" , |
| 175 | "Keep only those benchmarks that do *NOT* involve memory" ), |
| 176 | clEnumValN(BenchmarkFilter::WithMem, "mem-only" , |
| 177 | "Keep only the benchmarks that *DO* involve memory" )), |
| 178 | cl::init(Val: BenchmarkFilter::All)); |
| 179 | |
| 180 | static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm( |
| 181 | "analysis-clustering" , cl::desc("the clustering algorithm to use" ), |
| 182 | cl::cat(AnalysisOptions), |
| 183 | cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan" , |
| 184 | "use DBSCAN/OPTICS algorithm" ), |
| 185 | clEnumValN(BenchmarkClustering::Naive, "naive" , |
| 186 | "one cluster per opcode" )), |
| 187 | cl::init(Val: BenchmarkClustering::Dbscan)); |
| 188 | |
| 189 | static cl::opt<unsigned> AnalysisDbscanNumPoints( |
| 190 | "analysis-numpoints" , |
| 191 | cl::desc("minimum number of points in an analysis cluster (dbscan only)" ), |
| 192 | cl::cat(AnalysisOptions), cl::init(Val: 3)); |
| 193 | |
| 194 | static cl::opt<float> AnalysisClusteringEpsilon( |
| 195 | "analysis-clustering-epsilon" , |
| 196 | cl::desc("epsilon for benchmark point clustering" ), |
| 197 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
| 198 | |
| 199 | static cl::opt<float> AnalysisInconsistencyEpsilon( |
| 200 | "analysis-inconsistency-epsilon" , |
| 201 | cl::desc("epsilon for detection of when the cluster is different from the " |
| 202 | "LLVM schedule profile values" ), |
| 203 | cl::cat(AnalysisOptions), cl::init(Val: 0.1)); |
| 204 | |
| 205 | static cl::opt<std::string> |
| 206 | AnalysisClustersOutputFile("analysis-clusters-output-file" , cl::desc("" ), |
| 207 | cl::cat(AnalysisOptions), cl::init(Val: "" )); |
| 208 | static cl::opt<std::string> |
| 209 | AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file" , |
| 210 | cl::desc("" ), cl::cat(AnalysisOptions), |
| 211 | cl::init(Val: "" )); |
| 212 | |
| 213 | static cl::opt<bool> AnalysisDisplayUnstableOpcodes( |
| 214 | "analysis-display-unstable-clusters" , |
| 215 | cl::desc("if there is more than one benchmark for an opcode, said " |
| 216 | "benchmarks may end up not being clustered into the same cluster " |
| 217 | "if the measured performance characteristics are different. by " |
| 218 | "default all such opcodes are filtered out. this flag will " |
| 219 | "instead show only such unstable opcodes" ), |
| 220 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
| 221 | |
| 222 | static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu( |
| 223 | "analysis-override-benchmark-triple-and-cpu" , |
| 224 | cl::desc("By default, we analyze the benchmarks for the triple/CPU they " |
| 225 | "were measured for, but if you want to analyze them for some " |
| 226 | "other combination (specified via -mtriple/-mcpu), you can " |
| 227 | "pass this flag." ), |
| 228 | cl::cat(AnalysisOptions), cl::init(Val: false)); |
| 229 | |
| 230 | static cl::opt<std::string> |
| 231 | TripleName("mtriple" , |
| 232 | cl::desc("Target triple. See -version for available targets" ), |
| 233 | cl::cat(Options)); |
| 234 | |
| 235 | static cl::opt<std::string> |
| 236 | MCPU("mcpu" , |
| 237 | cl::desc("Target a specific cpu type (-mcpu=help for details)" ), |
| 238 | cl::value_desc("cpu-name" ), cl::cat(Options), cl::init(Val: "native" )); |
| 239 | |
| 240 | static cl::opt<std::string> |
| 241 | DumpObjectToDisk("dump-object-to-disk" , |
| 242 | cl::desc("dumps the generated benchmark object to disk " |
| 243 | "and prints a message to access it" ), |
| 244 | cl::ValueOptional, cl::cat(BenchmarkOptions)); |
| 245 | |
| 246 | static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode( |
| 247 | "execution-mode" , |
| 248 | cl::desc("Selects the execution mode to use for running snippets" ), |
| 249 | cl::cat(BenchmarkOptions), |
| 250 | cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess, |
| 251 | "inprocess" , |
| 252 | "Executes the snippets within the same process" ), |
| 253 | clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess, |
| 254 | "subprocess" , |
| 255 | "Spawns a subprocess for each snippet execution, " |
| 256 | "allows for the use of memory annotations" )), |
| 257 | cl::init(Val: BenchmarkRunner::ExecutionModeE::InProcess)); |
| 258 | |
| 259 | static cl::opt<unsigned> BenchmarkRepeatCount( |
| 260 | "benchmark-repeat-count" , |
| 261 | cl::desc("The number of times to repeat measurements on the benchmark k " |
| 262 | "before aggregating the results" ), |
| 263 | cl::cat(BenchmarkOptions), cl::init(Val: 30)); |
| 264 | |
| 265 | static cl::list<ValidationEvent> ValidationCounters( |
| 266 | "validation-counter" , |
| 267 | cl::desc( |
| 268 | "The name of a validation counter to run concurrently with the main " |
| 269 | "counter to validate benchmarking assumptions" ), |
| 270 | cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions()); |
| 271 | |
| 272 | static cl::opt<int> BenchmarkProcessCPU( |
| 273 | "benchmark-process-cpu" , |
| 274 | cl::desc("The CPU number that the benchmarking process should executon on" ), |
| 275 | cl::cat(BenchmarkOptions), cl::init(Val: -1)); |
| 276 | |
| 277 | static cl::opt<std::string> MAttr( |
| 278 | "mattr" , cl::desc("comma-separated list of target architecture features" ), |
| 279 | cl::value_desc("+feature1,-feature2,..." ), cl::cat(Options), cl::init(Val: "" )); |
| 280 | |
| 281 | static ExitOnError ExitOnErr("llvm-exegesis error: " ); |
| 282 | |
| 283 | // Helper function that logs the error(s) and exits. |
| 284 | template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { |
| 285 | ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...)); |
| 286 | } |
| 287 | |
| 288 | // Check Err. If it's in a failure state log the file error(s) and exit. |
| 289 | static void ExitOnFileError(const Twine &FileName, Error Err) { |
| 290 | if (Err) { |
| 291 | ExitOnErr(createFileError(F: FileName, E: std::move(Err))); |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | // Check E. If it's in a success state then return the contained value. |
| 296 | // If it's in a failure state log the file error(s) and exit. |
| 297 | template <typename T> |
| 298 | T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { |
| 299 | ExitOnFileError(FileName, E.takeError()); |
| 300 | return std::move(*E); |
| 301 | } |
| 302 | |
| 303 | // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, |
| 304 | // and returns the opcode indices or {} if snippets should be read from |
| 305 | // `SnippetsFile`. |
| 306 | static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { |
| 307 | const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) + |
| 308 | (OpcodeIndex == 0 ? 0 : 1) + |
| 309 | (SnippetsFile.empty() ? 0 : 1); |
| 310 | const auto &ET = State.getExegesisTarget(); |
| 311 | const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits(); |
| 312 | |
| 313 | if (NumSetFlags != 1) { |
| 314 | ExitOnErr.setBanner("llvm-exegesis: " ); |
| 315 | ExitWithError(Args: "please provide one and only one of 'opcode-index', " |
| 316 | "'opcode-name' or 'snippets-file'" ); |
| 317 | } |
| 318 | if (!SnippetsFile.empty()) |
| 319 | return {}; |
| 320 | if (OpcodeIndex > 0) |
| 321 | return {static_cast<unsigned>(OpcodeIndex)}; |
| 322 | if (OpcodeIndex < 0) { |
| 323 | std::vector<unsigned> Result; |
| 324 | unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes(); |
| 325 | Result.reserve(n: NumOpcodes); |
| 326 | for (unsigned I = 0, E = NumOpcodes; I < E; ++I) { |
| 327 | if (!ET.isOpcodeAvailable(Opcode: I, Features: AvailableFeatures)) |
| 328 | continue; |
| 329 | Result.push_back(x: I); |
| 330 | } |
| 331 | return Result; |
| 332 | } |
| 333 | // Resolve opcode name -> opcode. |
| 334 | const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned { |
| 335 | const auto &Map = State.getOpcodeNameToOpcodeIdxMapping(); |
| 336 | auto I = Map.find(Val: OpcodeName); |
| 337 | if (I != Map.end()) |
| 338 | return I->getSecond(); |
| 339 | return 0u; |
| 340 | }; |
| 341 | |
| 342 | SmallVector<StringRef, 2> Pieces; |
| 343 | StringRef(OpcodeNames.getValue()) |
| 344 | .split(A&: Pieces, Separator: "," , /* MaxSplit */ -1, /* KeepEmpty */ false); |
| 345 | std::vector<unsigned> Result; |
| 346 | Result.reserve(n: Pieces.size()); |
| 347 | for (const StringRef &OpcodeName : Pieces) { |
| 348 | if (unsigned Opcode = ResolveName(OpcodeName)) |
| 349 | Result.push_back(x: Opcode); |
| 350 | else |
| 351 | ExitWithError(Args: Twine("unknown opcode " ).concat(Suffix: OpcodeName)); |
| 352 | } |
| 353 | return Result; |
| 354 | } |
| 355 | |
| 356 | // Generates code snippets for opcode `Opcode`. |
| 357 | static Expected<std::vector<BenchmarkCode>> |
| 358 | generateSnippets(const LLVMState &State, unsigned Opcode, |
| 359 | const BitVector &ForbiddenRegs) { |
| 360 | // Ignore instructions that we cannot run. |
| 361 | if (const char *Reason = |
| 362 | State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode)) |
| 363 | return make_error<Failure>(Args&: Reason); |
| 364 | |
| 365 | const Instruction &Instr = State.getIC().getInstr(Opcode); |
| 366 | const std::vector<InstructionTemplate> InstructionVariants = |
| 367 | State.getExegesisTarget().generateInstructionVariants( |
| 368 | Instr, MaxConfigsPerOpcode); |
| 369 | |
| 370 | SnippetGenerator::Options SnippetOptions; |
| 371 | SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode; |
| 372 | const std::unique_ptr<SnippetGenerator> Generator = |
| 373 | State.getExegesisTarget().createSnippetGenerator(Mode: BenchmarkMode, State, |
| 374 | Opts: SnippetOptions); |
| 375 | if (!Generator) |
| 376 | ExitWithError(Args: "cannot create snippet generator" ); |
| 377 | |
| 378 | std::vector<BenchmarkCode> Benchmarks; |
| 379 | for (const InstructionTemplate &Variant : InstructionVariants) { |
| 380 | if (Benchmarks.size() >= MaxConfigsPerOpcode) |
| 381 | break; |
| 382 | if (auto Err = Generator->generateConfigurations(Variant, Benchmarks, |
| 383 | ExtraForbiddenRegs: ForbiddenRegs)) |
| 384 | return std::move(Err); |
| 385 | } |
| 386 | return Benchmarks; |
| 387 | } |
| 388 | |
| 389 | static void runBenchmarkConfigurations( |
| 390 | const LLVMState &State, ArrayRef<BenchmarkCode> Configurations, |
| 391 | ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, |
| 392 | const BenchmarkRunner &Runner) { |
| 393 | assert(!Configurations.empty() && "Don't have any configurations to run." ); |
| 394 | std::optional<raw_fd_ostream> FileOstr; |
| 395 | if (BenchmarkFile != "-" ) { |
| 396 | int ResultFD = 0; |
| 397 | // Create output file or open existing file and truncate it, once. |
| 398 | ExitOnErr(errorCodeToError(EC: openFileForWrite(Name: BenchmarkFile, ResultFD, |
| 399 | Disp: sys::fs::CD_CreateAlways, |
| 400 | Flags: sys::fs::OF_TextWithCRLF))); |
| 401 | FileOstr.emplace(args&: ResultFD, args: true /*shouldClose*/); |
| 402 | } |
| 403 | raw_ostream &Ostr = FileOstr ? *FileOstr : outs(); |
| 404 | |
| 405 | std::optional<ProgressMeter<>> Meter; |
| 406 | if (BenchmarkMeasurementsPrintProgress) |
| 407 | Meter.emplace(args: Configurations.size()); |
| 408 | |
| 409 | SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions}; |
| 410 | if (RepetitionMode == Benchmark::MiddleHalfDuplicate || |
| 411 | RepetitionMode == Benchmark::MiddleHalfLoop) |
| 412 | MinInstructionCounts.push_back(Elt: MinInstructions * 2); |
| 413 | |
| 414 | for (const BenchmarkCode &Conf : Configurations) { |
| 415 | ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr); |
| 416 | SmallVector<Benchmark, 2> AllResults; |
| 417 | |
| 418 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : |
| 419 | Repetitors) { |
| 420 | for (unsigned IterationRepetitions : MinInstructionCounts) { |
| 421 | auto RC = ExitOnErr(Runner.getRunnableConfiguration( |
| 422 | Configuration: Conf, MinInstructions: IterationRepetitions, LoopUnrollFactor: LoopBodySize, Repetitor: *Repetitor)); |
| 423 | std::optional<StringRef> DumpFile; |
| 424 | if (DumpObjectToDisk.getNumOccurrences()) |
| 425 | DumpFile = DumpObjectToDisk; |
| 426 | const std::optional<int> BenchmarkCPU = |
| 427 | BenchmarkProcessCPU == -1 |
| 428 | ? std::nullopt |
| 429 | : std::optional(BenchmarkProcessCPU.getValue()); |
| 430 | auto [Err, BenchmarkResult] = |
| 431 | Runner.runConfiguration(RC: std::move(RC), DumpFile, BenchmarkProcessCPU: BenchmarkCPU); |
| 432 | if (Err) { |
| 433 | // Errors from executing the snippets are fine. |
| 434 | // All other errors are a framework issue and should fail. |
| 435 | if (!Err.isA<SnippetExecutionFailure>()) |
| 436 | ExitOnErr(std::move(Err)); |
| 437 | |
| 438 | BenchmarkResult.Error = toString(E: std::move(Err)); |
| 439 | } |
| 440 | AllResults.push_back(Elt: std::move(BenchmarkResult)); |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | Benchmark &Result = AllResults.front(); |
| 445 | |
| 446 | // If any of our measurements failed, pretend they all have failed. |
| 447 | if (AllResults.size() > 1 && |
| 448 | any_of(Range&: AllResults, P: [](const Benchmark &R) { |
| 449 | return R.Measurements.empty(); |
| 450 | })) |
| 451 | Result.Measurements.clear(); |
| 452 | |
| 453 | std::unique_ptr<ResultAggregator> ResultAgg = |
| 454 | ResultAggregator::CreateAggregator(RepetitionMode); |
| 455 | ResultAgg->AggregateResults(Result, |
| 456 | OtherResults: ArrayRef<Benchmark>(AllResults).drop_front()); |
| 457 | |
| 458 | // With dummy counters, measurements are rather meaningless, |
| 459 | // so drop them altogether. |
| 460 | if (UseDummyPerfCounters) |
| 461 | Result.Measurements.clear(); |
| 462 | |
| 463 | ExitOnFileError(FileName: BenchmarkFile, Err: Result.writeYamlTo(State, S&: Ostr)); |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | void benchmarkMain() { |
| 468 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && |
| 469 | !UseDummyPerfCounters) { |
| 470 | #ifndef HAVE_LIBPFM |
| 471 | ExitWithError( |
| 472 | Args: "benchmarking unavailable, LLVM was built without libpfm. You can " |
| 473 | "pass --benchmark-phase=... to skip the actual benchmarking or " |
| 474 | "--use-dummy-perf-counters to not query the kernel for real event " |
| 475 | "counts." ); |
| 476 | #else |
| 477 | if (pfm::pfmInitialize()) |
| 478 | ExitWithError("cannot initialize libpfm" ); |
| 479 | #endif |
| 480 | } |
| 481 | |
| 482 | InitializeAllExegesisTargets(); |
| 483 | #define LLVM_EXEGESIS(TargetName) \ |
| 484 | LLVMInitialize##TargetName##AsmPrinter(); \ |
| 485 | LLVMInitialize##TargetName##AsmParser(); |
| 486 | #include "llvm/Config/TargetExegesis.def" |
| 487 | |
| 488 | const LLVMState State = ExitOnErr( |
| 489 | LLVMState::Create(TripleName, CpuName: MCPU, Features: MAttr, UseDummyPerfCounters)); |
| 490 | |
| 491 | // Preliminary check to ensure features needed for requested |
| 492 | // benchmark mode are present on target CPU and/or OS. |
| 493 | if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) |
| 494 | ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); |
| 495 | |
| 496 | if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && |
| 497 | UseDummyPerfCounters) |
| 498 | ExitWithError(Args: "Dummy perf counters are not supported in the subprocess " |
| 499 | "execution mode." ); |
| 500 | |
| 501 | const std::unique_ptr<BenchmarkRunner> Runner = |
| 502 | ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( |
| 503 | Mode: BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode, |
| 504 | BenchmarkRepeatCount, ValidationCounters, ResultAggMode)); |
| 505 | if (!Runner) { |
| 506 | ExitWithError(Args: "cannot create benchmark runner" ); |
| 507 | } |
| 508 | |
| 509 | const auto Opcodes = getOpcodesOrDie(State); |
| 510 | std::vector<BenchmarkCode> Configurations; |
| 511 | |
| 512 | MCRegister LoopRegister = |
| 513 | State.getExegesisTarget().getDefaultLoopCounterRegister( |
| 514 | State.getTargetMachine().getTargetTriple()); |
| 515 | |
| 516 | if (Opcodes.empty()) { |
| 517 | Configurations = ExitOnErr(readSnippets(State, Filename: SnippetsFile)); |
| 518 | for (const auto &Configuration : Configurations) { |
| 519 | if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess && |
| 520 | (Configuration.Key.MemoryMappings.size() != 0 || |
| 521 | Configuration.Key.MemoryValues.size() != 0 || |
| 522 | Configuration.Key.SnippetAddress != 0)) |
| 523 | ExitWithError(Args: "Memory and snippet address annotations are only " |
| 524 | "supported in subprocess " |
| 525 | "execution mode" ); |
| 526 | } |
| 527 | LoopRegister = Configurations[0].Key.LoopRegister; |
| 528 | } |
| 529 | |
| 530 | SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors; |
| 531 | if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin) |
| 532 | Repetitors.emplace_back( |
| 533 | Args: SnippetRepetitor::Create(Mode: RepetitionMode, State, LoopRegister)); |
| 534 | else { |
| 535 | for (Benchmark::RepetitionModeE RepMode : |
| 536 | {Benchmark::RepetitionModeE::Duplicate, |
| 537 | Benchmark::RepetitionModeE::Loop}) |
| 538 | Repetitors.emplace_back( |
| 539 | Args: SnippetRepetitor::Create(Mode: RepMode, State, LoopRegister)); |
| 540 | } |
| 541 | |
| 542 | BitVector AllReservedRegs; |
| 543 | for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors) |
| 544 | AllReservedRegs |= Repetitor->getReservedRegs(); |
| 545 | |
| 546 | if (!Opcodes.empty()) { |
| 547 | for (const unsigned Opcode : Opcodes) { |
| 548 | // Ignore instructions without a sched class if |
| 549 | // -ignore-invalid-sched-class is passed. |
| 550 | if (IgnoreInvalidSchedClass && |
| 551 | State.getInstrInfo().get(Opcode).getSchedClass() == 0) { |
| 552 | errs() << State.getInstrInfo().getName(Opcode) |
| 553 | << ": ignoring instruction without sched class\n" ; |
| 554 | continue; |
| 555 | } |
| 556 | |
| 557 | auto ConfigsForInstr = generateSnippets(State, Opcode, ForbiddenRegs: AllReservedRegs); |
| 558 | if (!ConfigsForInstr) { |
| 559 | logAllUnhandledErrors( |
| 560 | E: ConfigsForInstr.takeError(), OS&: errs(), |
| 561 | ErrorBanner: Twine(State.getInstrInfo().getName(Opcode)).concat(Suffix: ": " )); |
| 562 | continue; |
| 563 | } |
| 564 | std::move(first: ConfigsForInstr->begin(), last: ConfigsForInstr->end(), |
| 565 | result: std::back_inserter(x&: Configurations)); |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | if (MinInstructions == 0) { |
| 570 | ExitOnErr.setBanner("llvm-exegesis: " ); |
| 571 | ExitWithError(Args: "--min-instructions must be greater than zero" ); |
| 572 | } |
| 573 | |
| 574 | // Write to standard output if file is not set. |
| 575 | if (BenchmarkFile.empty()) |
| 576 | BenchmarkFile = "-" ; |
| 577 | |
| 578 | if (!Configurations.empty()) |
| 579 | runBenchmarkConfigurations(State, Configurations, Repetitors, Runner: *Runner); |
| 580 | |
| 581 | pfm::pfmTerminate(); |
| 582 | } |
| 583 | |
| 584 | // Prints the results of running analysis pass `Pass` to file `OutputFilename` |
| 585 | // if OutputFilename is non-empty. |
| 586 | template <typename Pass> |
| 587 | static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name, |
| 588 | const std::string &OutputFilename) { |
| 589 | if (OutputFilename.empty()) |
| 590 | return; |
| 591 | if (OutputFilename != "-" ) { |
| 592 | errs() << "Printing " << Name << " results to file '" << OutputFilename |
| 593 | << "'\n" ; |
| 594 | } |
| 595 | std::error_code ErrorCode; |
| 596 | raw_fd_ostream ClustersOS(OutputFilename, ErrorCode, |
| 597 | sys::fs::FA_Read | sys::fs::FA_Write); |
| 598 | if (ErrorCode) |
| 599 | ExitOnFileError(FileName: OutputFilename, Err: errorCodeToError(EC: ErrorCode)); |
| 600 | if (auto Err = Analyzer.run<Pass>(ClustersOS)) |
| 601 | ExitOnFileError(OutputFilename, std::move(Err)); |
| 602 | } |
| 603 | |
| 604 | static void filterPoints(MutableArrayRef<Benchmark> Points, |
| 605 | const MCInstrInfo &MCII) { |
| 606 | if (AnalysisSnippetFilter == BenchmarkFilter::All) |
| 607 | return; |
| 608 | |
| 609 | bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem; |
| 610 | for (Benchmark &Point : Points) { |
| 611 | if (!Point.Error.empty()) |
| 612 | continue; |
| 613 | if (WantPointsWithMemOps == |
| 614 | any_of(Range&: Point.Key.Instructions, P: [&MCII](const MCInst &Inst) { |
| 615 | const MCInstrDesc &MCDesc = MCII.get(Opcode: Inst.getOpcode()); |
| 616 | return MCDesc.mayLoad() || MCDesc.mayStore(); |
| 617 | })) |
| 618 | continue; |
| 619 | Point.Error = "filtered out by user" ; |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | static void analysisMain() { |
| 624 | ExitOnErr.setBanner("llvm-exegesis: " ); |
| 625 | if (BenchmarkFile.empty()) |
| 626 | ExitWithError(Args: "--benchmarks-file must be set" ); |
| 627 | |
| 628 | if (AnalysisClustersOutputFile.empty() && |
| 629 | AnalysisInconsistenciesOutputFile.empty()) { |
| 630 | ExitWithError( |
| 631 | Args: "for --mode=analysis: At least one of --analysis-clusters-output-file " |
| 632 | "and --analysis-inconsistencies-output-file must be specified" ); |
| 633 | } |
| 634 | |
| 635 | InitializeAllExegesisTargets(); |
| 636 | #define LLVM_EXEGESIS(TargetName) \ |
| 637 | LLVMInitialize##TargetName##AsmPrinter(); \ |
| 638 | LLVMInitialize##TargetName##Disassembler(); |
| 639 | #include "llvm/Config/TargetExegesis.def" |
| 640 | |
| 641 | auto MemoryBuffer = ExitOnFileError( |
| 642 | FileName: BenchmarkFile, |
| 643 | E: errorOrToExpected(EO: MemoryBuffer::getFile(Filename: BenchmarkFile, /*IsText=*/true))); |
| 644 | |
| 645 | const auto TriplesAndCpus = ExitOnFileError( |
| 646 | FileName: BenchmarkFile, |
| 647 | E: Benchmark::readTriplesAndCpusFromYamls(Buffer: *MemoryBuffer)); |
| 648 | if (TriplesAndCpus.empty()) { |
| 649 | errs() << "no benchmarks to analyze\n" ; |
| 650 | return; |
| 651 | } |
| 652 | if (TriplesAndCpus.size() > 1) { |
| 653 | ExitWithError(Args: "analysis file contains benchmarks from several CPUs. This " |
| 654 | "is unsupported." ); |
| 655 | } |
| 656 | auto TripleAndCpu = *TriplesAndCpus.begin(); |
| 657 | if (AnalysisOverrideBenchmarksTripleAndCpu) { |
| 658 | errs() << "overridding file CPU name (" << TripleAndCpu.CpuName |
| 659 | << ") with provided tripled (" << TripleName << ") and CPU name (" |
| 660 | << MCPU << ")\n" ; |
| 661 | TripleAndCpu.LLVMTriple = TripleName; |
| 662 | TripleAndCpu.CpuName = MCPU; |
| 663 | } |
| 664 | errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" |
| 665 | << TripleAndCpu.CpuName << "'\n" ; |
| 666 | |
| 667 | // Read benchmarks. |
| 668 | const LLVMState State = ExitOnErr( |
| 669 | LLVMState::Create(TripleName: TripleAndCpu.LLVMTriple, CpuName: TripleAndCpu.CpuName)); |
| 670 | std::vector<Benchmark> Points = ExitOnFileError( |
| 671 | FileName: BenchmarkFile, E: Benchmark::readYamls(State, Buffer: *MemoryBuffer)); |
| 672 | |
| 673 | outs() << "Parsed " << Points.size() << " benchmark points\n" ; |
| 674 | if (Points.empty()) { |
| 675 | errs() << "no benchmarks to analyze\n" ; |
| 676 | return; |
| 677 | } |
| 678 | // FIXME: Merge points from several runs (latency and uops). |
| 679 | |
| 680 | filterPoints(Points, MCII: State.getInstrInfo()); |
| 681 | |
| 682 | const auto Clustering = ExitOnErr(BenchmarkClustering::create( |
| 683 | Points, Mode: AnalysisClusteringAlgorithm, DbscanMinPts: AnalysisDbscanNumPoints, |
| 684 | AnalysisClusteringEpsilon, SubtargetInfo: &State.getSubtargetInfo(), |
| 685 | InstrInfo: &State.getInstrInfo())); |
| 686 | |
| 687 | const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon, |
| 688 | AnalysisDisplayUnstableOpcodes); |
| 689 | |
| 690 | maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, Name: "analysis clusters" , |
| 691 | OutputFilename: AnalysisClustersOutputFile); |
| 692 | maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>( |
| 693 | Analyzer, Name: "sched class consistency analysis" , |
| 694 | OutputFilename: AnalysisInconsistenciesOutputFile); |
| 695 | } |
| 696 | |
| 697 | } // namespace exegesis |
| 698 | } // namespace llvm |
| 699 | |
| 700 | int main(int Argc, char **Argv) { |
| 701 | using namespace llvm; |
| 702 | |
| 703 | InitLLVM X(Argc, Argv); |
| 704 | |
| 705 | // Initialize targets so we can print them when flag --version is specified. |
| 706 | #define LLVM_EXEGESIS(TargetName) \ |
| 707 | LLVMInitialize##TargetName##Target(); \ |
| 708 | LLVMInitialize##TargetName##TargetInfo(); \ |
| 709 | LLVMInitialize##TargetName##TargetMC(); |
| 710 | #include "llvm/Config/TargetExegesis.def" |
| 711 | |
| 712 | // Register the Target and CPU printer for --version. |
| 713 | cl::AddExtraVersionPrinter(func: sys::printDefaultTargetAndDetectedCPU); |
| 714 | |
| 715 | // Enable printing of available targets when flag --version is specified. |
| 716 | cl::AddExtraVersionPrinter(func: TargetRegistry::printRegisteredTargetsForVersion); |
| 717 | |
| 718 | cl::HideUnrelatedOptions(Categories: {&exegesis::Options, &exegesis::BenchmarkOptions, |
| 719 | &exegesis::AnalysisOptions}); |
| 720 | |
| 721 | cl::ParseCommandLineOptions(argc: Argc, argv: Argv, |
| 722 | Overview: "llvm host machine instruction characteristics " |
| 723 | "measurment and analysis.\n" ); |
| 724 | |
| 725 | exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) { |
| 726 | if (Err.isA<exegesis::ClusteringError>()) |
| 727 | return EXIT_SUCCESS; |
| 728 | return EXIT_FAILURE; |
| 729 | }); |
| 730 | |
| 731 | if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) { |
| 732 | exegesis::analysisMain(); |
| 733 | } else { |
| 734 | exegesis::benchmarkMain(); |
| 735 | } |
| 736 | return EXIT_SUCCESS; |
| 737 | } |
| 738 | |