1//===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Analysis.h"
10#include "BenchmarkResult.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/MC/MCAsmInfo.h"
13#include "llvm/MC/MCTargetOptions.h"
14#include "llvm/Support/FormatVariadic.h"
15#include <limits>
16#include <vector>
17
18namespace llvm {
19namespace exegesis {
20
21static const char kCsvSep = ',';
22
23namespace {
24
25enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
26
27template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S);
28
29template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) {
30 if (!S.contains(C: kCsvSep)) {
31 OS << S;
32 } else {
33 // Needs escaping.
34 OS << '"';
35 for (const char C : S) {
36 if (C == '"')
37 OS << "\"\"";
38 else
39 OS << C;
40 }
41 OS << '"';
42 }
43}
44
45template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) {
46 for (const char C : S) {
47 if (C == '<')
48 OS << "&lt;";
49 else if (C == '>')
50 OS << "&gt;";
51 else if (C == '&')
52 OS << "&amp;";
53 else
54 OS << C;
55 }
56}
57
58template <>
59void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) {
60 for (const char C : S) {
61 if (C == '"')
62 OS << "\\\"";
63 else
64 OS << C;
65 }
66}
67
68} // namespace
69
70template <EscapeTag Tag>
71static void
72writeClusterId(raw_ostream &OS,
73 const BenchmarkClustering::ClusterId &CID) {
74 if (CID.isNoise())
75 writeEscaped<Tag>(OS, "[noise]");
76 else if (CID.isError())
77 writeEscaped<Tag>(OS, "[error]");
78 else
79 OS << CID.getId();
80}
81
82template <EscapeTag Tag>
83static void writeMeasurementValue(raw_ostream &OS, const double Value) {
84 // Given Value, if we wanted to serialize it to a string,
85 // how many base-10 digits will we need to store, max?
86 static constexpr auto MaxDigitCount =
87 std::numeric_limits<decltype(Value)>::max_digits10;
88 // Also, we will need a decimal separator.
89 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
90 // So how long of a string will the serialization produce, max?
91 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
92
93 // WARNING: when changing the format, also adjust the small-size estimate ^.
94 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
95
96 writeEscaped<Tag>(
97 OS, formatv(Fmt: SimpleFloatFormat.data(), Vals: Value).sstr<SerializationLen>());
98}
99
100template <typename EscapeTag, EscapeTag Tag>
101void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
102 const char *Separator) const {
103 SmallVector<std::string, 3> Lines;
104 // Parse the asm snippet and print it.
105 while (!Bytes.empty()) {
106 MCInst MI;
107 uint64_t MISize = 0;
108 if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) {
109 writeEscaped<Tag>(OS, join(R&: Lines, Separator));
110 writeEscaped<Tag>(OS, Separator);
111 writeEscaped<Tag>(OS, "[error decoding asm snippet]");
112 return;
113 }
114 SmallString<128> InstPrinterStr; // FIXME: magic number.
115 raw_svector_ostream OSS(InstPrinterStr);
116 DisasmHelper_->printInst(MI: &MI, OS&: OSS);
117 Bytes = Bytes.drop_front(N: MISize);
118 Lines.emplace_back(Args: InstPrinterStr.str().trim());
119 }
120 writeEscaped<Tag>(OS, join(R&: Lines, Separator));
121}
122
123// Prints a row representing an instruction, along with scheduling info and
124// point coordinates (measurements).
125void Analysis::printInstructionRowCsv(const size_t PointId,
126 raw_ostream &OS) const {
127 const Benchmark &Point = Clustering_.getPoints()[PointId];
128 writeClusterId<kEscapeCsv>(OS, CID: Clustering_.getClusterIdForPoint(P: PointId));
129 OS << kCsvSep;
130 writeSnippet<EscapeTag, kEscapeCsv>(OS, Bytes: Point.AssembledSnippet, Separator: "; ");
131 OS << kCsvSep;
132 writeEscaped<kEscapeCsv>(OS, S: Point.Key.Config);
133 OS << kCsvSep;
134 assert(!Point.Key.Instructions.empty());
135 const MCInst &MCI = Point.keyInstruction();
136 unsigned SchedClassId;
137 std::tie(args&: SchedClassId, args: std::ignore) = ResolvedSchedClass::resolveSchedClassId(
138 SubtargetInfo: State_.getSubtargetInfo(), InstrInfo: State_.getInstrInfo(), MCI);
139#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
140 StringRef SCDescName =
141 State_.getSubtargetInfo().getSchedModel().getSchedClassName(SchedClassId);
142 writeEscaped<kEscapeCsv>(OS, SCDescName);
143#else
144 OS << SchedClassId;
145#endif
146 for (const auto &Measurement : Point.Measurements) {
147 OS << kCsvSep;
148 writeMeasurementValue<kEscapeCsv>(OS, Value: Measurement.PerInstructionValue);
149 }
150 OS << "\n";
151}
152
153Analysis::Analysis(const LLVMState &State,
154 const BenchmarkClustering &Clustering,
155 double AnalysisInconsistencyEpsilon,
156 bool AnalysisDisplayUnstableOpcodes)
157 : Clustering_(Clustering), State_(State),
158 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
159 AnalysisInconsistencyEpsilon),
160 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
161 if (Clustering.getPoints().empty())
162 return;
163
164 DisasmHelper_ = std::make_unique<DisassemblerHelper>(args: State);
165}
166
167template <>
168Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const {
169 if (Clustering_.getPoints().empty())
170 return Error::success();
171
172 // Write the header.
173 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
174 << kCsvSep << "sched_class";
175 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
176 OS << kCsvSep;
177 writeEscaped<kEscapeCsv>(OS, S: Measurement.Key);
178 }
179 OS << "\n";
180
181 // Write the points.
182 for (const auto &ClusterIt : Clustering_.getValidClusters()) {
183 for (const size_t PointId : ClusterIt.PointIndices) {
184 printInstructionRowCsv(PointId, OS);
185 }
186 OS << "\n\n";
187 }
188 return Error::success();
189}
190
191Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
192 ResolvedSchedClass &&RSC)
193 : RSC(std::move(RSC)) {}
194
195std::vector<Analysis::ResolvedSchedClassAndPoints>
196Analysis::makePointsPerSchedClass() const {
197 std::vector<ResolvedSchedClassAndPoints> Entries;
198 // Maps SchedClassIds to index in result.
199 std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
200 const auto &Points = Clustering_.getPoints();
201 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
202 const Benchmark &Point = Points[PointId];
203 if (!Point.Error.empty())
204 continue;
205 assert(!Point.Key.Instructions.empty());
206 // FIXME: we should be using the tuple of classes for instructions in the
207 // snippet as key.
208 const MCInst &MCI = Point.keyInstruction();
209 unsigned SchedClassId;
210 bool WasVariant;
211 std::tie(args&: SchedClassId, args&: WasVariant) =
212 ResolvedSchedClass::resolveSchedClassId(SubtargetInfo: State_.getSubtargetInfo(),
213 InstrInfo: State_.getInstrInfo(), MCI);
214 const auto IndexIt = SchedClassIdToIndex.find(x: SchedClassId);
215 if (IndexIt == SchedClassIdToIndex.end()) {
216 // Create a new entry.
217 SchedClassIdToIndex.emplace(args&: SchedClassId, args: Entries.size());
218 ResolvedSchedClassAndPoints Entry(ResolvedSchedClass(
219 State_.getSubtargetInfo(), SchedClassId, WasVariant));
220 Entry.PointIds.push_back(x: PointId);
221 Entries.push_back(x: std::move(Entry));
222 } else {
223 // Append to the existing entry.
224 Entries[IndexIt->second].PointIds.push_back(x: PointId);
225 }
226 }
227 return Entries;
228}
229
230// Parallel benchmarks repeat the same opcode multiple times. Just show this
231// opcode and show the whole snippet only on hover.
232static void writeParallelSnippetHtml(raw_ostream &OS,
233 const std::vector<MCInst> &Instructions,
234 const MCInstrInfo &InstrInfo) {
235 if (Instructions.empty())
236 return;
237 writeEscaped<kEscapeHtml>(OS, S: InstrInfo.getName(Opcode: Instructions[0].getOpcode()));
238 if (Instructions.size() > 1)
239 OS << " (x" << Instructions.size() << ")";
240}
241
242// Latency tries to find a serial path. Just show the opcode path and show the
243// whole snippet only on hover.
244static void writeLatencySnippetHtml(raw_ostream &OS,
245 const std::vector<MCInst> &Instructions,
246 const MCInstrInfo &InstrInfo) {
247 ListSeparator LS(" &rarr; ");
248 for (const MCInst &Instr : Instructions) {
249 OS << LS;
250 writeEscaped<kEscapeHtml>(OS, S: InstrInfo.getName(Opcode: Instr.getOpcode()));
251 }
252}
253
254void Analysis::printPointHtml(const Benchmark &Point, raw_ostream &OS) const {
255 OS << "<li><span class=\"mono\" title=\"";
256 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Bytes: Point.AssembledSnippet, Separator: "\n");
257 OS << "\">";
258 switch (Point.Mode) {
259 case Benchmark::Latency:
260 writeLatencySnippetHtml(OS, Instructions: Point.Key.Instructions, InstrInfo: State_.getInstrInfo());
261 break;
262 case Benchmark::Uops:
263 case Benchmark::InverseThroughput:
264 writeParallelSnippetHtml(OS, Instructions: Point.Key.Instructions, InstrInfo: State_.getInstrInfo());
265 break;
266 default:
267 llvm_unreachable("invalid mode");
268 }
269 OS << "</span> <span class=\"mono\">";
270 writeEscaped<kEscapeHtml>(OS, S: Point.Key.Config);
271 OS << "</span></li>";
272}
273
274void Analysis::printSchedClassClustersHtml(
275 const std::vector<SchedClassCluster> &Clusters,
276 const ResolvedSchedClass &RSC, raw_ostream &OS) const {
277 const auto &Points = Clustering_.getPoints();
278 OS << "<table class=\"sched-class-clusters\">";
279 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
280 assert(!Clusters.empty());
281 for (const auto &Measurement :
282 Points[Clusters[0].getPointIds()[0]].Measurements) {
283 OS << "<th>";
284 writeEscaped<kEscapeHtml>(OS, S: Measurement.Key);
285 OS << "</th>";
286 }
287 OS << "</tr>";
288 for (const SchedClassCluster &Cluster : Clusters) {
289 OS << "<tr class=\""
290 << (Cluster.measurementsMatch(STI: State_.getSubtargetInfo(), SC: RSC,
291 Clustering: Clustering_,
292 AnalysisInconsistencyEpsilonSquared_)
293 ? "good-cluster"
294 : "bad-cluster")
295 << "\"><td>";
296 writeClusterId<kEscapeHtml>(OS, CID: Cluster.id());
297 OS << "</td><td><ul>";
298 for (const size_t PointId : Cluster.getPointIds()) {
299 printPointHtml(Point: Points[PointId], OS);
300 }
301 OS << "</ul></td>";
302 for (const auto &Stats : Cluster.getCentroid().getStats()) {
303 OS << "<td class=\"measurement\">";
304 writeMeasurementValue<kEscapeHtml>(OS, Value: Stats.avg());
305 OS << "<br><span class=\"minmax\">[";
306 writeMeasurementValue<kEscapeHtml>(OS, Value: Stats.min());
307 OS << ";";
308 writeMeasurementValue<kEscapeHtml>(OS, Value: Stats.max());
309 OS << "]</span></td>";
310 }
311 OS << "</tr>";
312 }
313 OS << "</table>";
314}
315
316void Analysis::SchedClassCluster::addPoint(
317 size_t PointId, const BenchmarkClustering &Clustering) {
318 PointIds.push_back(x: PointId);
319 const auto &Point = Clustering.getPoints()[PointId];
320 if (ClusterId.isUndef())
321 ClusterId = Clustering.getClusterIdForPoint(P: PointId);
322 assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
323
324 Centroid.addPoint(Point: Point.Measurements);
325}
326
327bool Analysis::SchedClassCluster::measurementsMatch(
328 const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
329 const BenchmarkClustering &Clustering,
330 const double AnalysisInconsistencyEpsilonSquared_) const {
331 assert(!Clustering.getPoints().empty());
332 const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
333
334 if (!Centroid.validate(Mode))
335 return false;
336
337 const std::vector<BenchmarkMeasure> ClusterCenterPoint =
338 Centroid.getAsPoint();
339
340 const std::vector<BenchmarkMeasure> SchedClassPoint =
341 RSC.getAsPoint(Mode, STI, Representative: Centroid.getStats());
342 if (SchedClassPoint.empty())
343 return false; // In Uops mode validate() may not be enough.
344
345 assert(ClusterCenterPoint.size() == SchedClassPoint.size() &&
346 "Expected measured/sched data dimensions to match.");
347
348 return Clustering.isNeighbour(P: ClusterCenterPoint, Q: SchedClassPoint,
349 EpsilonSquared_: AnalysisInconsistencyEpsilonSquared_);
350}
351
352void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
353 raw_ostream &OS) const {
354 OS << "<table class=\"sched-class-desc\">";
355 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
356 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the "
357 "idealized unit resource (port) pressure assuming ideal "
358 "distribution\">Idealized Resource Pressure</th></tr>";
359 if (RSC.SCDesc->isValid()) {
360 const auto &SI = State_.getSubtargetInfo();
361 const auto &SM = SI.getSchedModel();
362 OS << "<tr><td>&#10004;</td>";
363 OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
364 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
365 // Latencies.
366 OS << "<td><ul>";
367 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
368 const auto *const Entry = SI.getWriteLatencyEntry(SC: RSC.SCDesc, DefIdx: I);
369 OS << "<li>" << Entry->Cycles;
370 if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
371 // Dismabiguate if more than 1 latency.
372 OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
373 }
374 OS << "</li>";
375 }
376 OS << "</ul></td>";
377 // inverse throughput.
378 OS << "<td>";
379 writeMeasurementValue<kEscapeHtml>(
380 OS, Value: MCSchedModel::getReciprocalThroughput(STI: SI, SCDesc: *RSC.SCDesc));
381 OS << "</td>";
382 // WriteProcRes.
383 OS << "<td><ul>";
384 for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
385 OS << "<li><span class=\"mono\">";
386 writeEscaped<kEscapeHtml>(OS,
387 S: SM.getProcResource(ProcResourceIdx: WPR.ProcResourceIdx)->Name);
388 OS << "</span>: " << WPR.ReleaseAtCycle << "</li>";
389 }
390 OS << "</ul></td>";
391 // Idealized port pressure.
392 OS << "<td><ul>";
393 for (const auto &Pressure : RSC.IdealizedProcResPressure) {
394 OS << "<li><span class=\"mono\">";
395 writeEscaped<kEscapeHtml>(
396 OS, S: SI.getSchedModel().getProcResource(ProcResourceIdx: Pressure.first)->Name);
397 OS << "</span>: ";
398 writeMeasurementValue<kEscapeHtml>(OS, Value: Pressure.second);
399 OS << "</li>";
400 }
401 OS << "</ul></td>";
402 OS << "</tr>";
403 } else {
404 OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
405 }
406 OS << "</table>";
407}
408
409void Analysis::printClusterRawHtml(const BenchmarkClustering::ClusterId &Id,
410 StringRef display_name,
411 raw_ostream &OS) const {
412 const auto &Points = Clustering_.getPoints();
413 const auto &Cluster = Clustering_.getCluster(Id);
414 if (Cluster.PointIndices.empty())
415 return;
416
417 OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster ("
418 << Cluster.PointIndices.size() << " points)</p>";
419 OS << "<table class=\"sched-class-clusters\">";
420 // Table Header.
421 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
422 for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) {
423 OS << "<th>";
424 writeEscaped<kEscapeHtml>(OS, S: Measurement.Key);
425 OS << "</th>";
426 }
427 OS << "</tr>";
428
429 // Point data.
430 for (const auto &PointId : Cluster.PointIndices) {
431 OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>";
432 printPointHtml(Point: Points[PointId], OS);
433 OS << "</ul></td>";
434 for (const auto &Measurement : Points[PointId].Measurements) {
435 OS << "<td class=\"measurement\">";
436 writeMeasurementValue<kEscapeHtml>(OS, Value: Measurement.PerInstructionValue);
437 }
438 OS << "</tr>";
439 }
440 OS << "</table>";
441
442 OS << "</div>";
443
444} // namespace exegesis
445
446static constexpr char kHtmlHead[] = R"(
447<head>
448<title>llvm-exegesis Analysis Results</title>
449<style>
450body {
451 font-family: sans-serif
452}
453span.sched-class-name {
454 font-weight: bold;
455 font-family: monospace;
456}
457span.opcode {
458 font-family: monospace;
459}
460span.config {
461 font-family: monospace;
462}
463div.inconsistency {
464 margin-top: 50px;
465}
466table {
467 margin-left: 50px;
468 border-collapse: collapse;
469}
470table, table tr,td,th {
471 border: 1px solid #444;
472}
473table ul {
474 padding-left: 0px;
475 margin: 0px;
476 list-style-type: none;
477}
478table.sched-class-clusters td {
479 padding-left: 10px;
480 padding-right: 10px;
481 padding-top: 10px;
482 padding-bottom: 10px;
483}
484table.sched-class-desc td {
485 padding-left: 10px;
486 padding-right: 10px;
487 padding-top: 2px;
488 padding-bottom: 2px;
489}
490span.mono {
491 font-family: monospace;
492}
493td.measurement {
494 text-align: center;
495}
496tr.good-cluster td.measurement {
497 color: #292
498}
499tr.bad-cluster td.measurement {
500 color: #922
501}
502tr.good-cluster td.measurement span.minmax {
503 color: #888;
504}
505tr.bad-cluster td.measurement span.minmax {
506 color: #888;
507}
508</style>
509</head>
510)";
511
512template <>
513Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
514 raw_ostream &OS) const {
515 const auto &FirstPoint = Clustering_.getPoints()[0];
516 // Print the header.
517 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
518 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
519 OS << "<h3>Triple: <span class=\"mono\">";
520 writeEscaped<kEscapeHtml>(OS, S: FirstPoint.LLVMTriple);
521 OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
522 writeEscaped<kEscapeHtml>(OS, S: FirstPoint.CpuName);
523 OS << "</span></h3>";
524 OS << "<h3>Epsilon: <span class=\"mono\">"
525 << format(Fmt: "%0.2f", Vals: std::sqrt(x: AnalysisInconsistencyEpsilonSquared_))
526 << "</span></h3>";
527
528 const auto &SI = State_.getSubtargetInfo();
529 for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
530 if (!RSCAndPoints.RSC.SCDesc)
531 continue;
532 // Bucket sched class points into sched class clusters.
533 std::vector<SchedClassCluster> SchedClassClusters;
534 for (const size_t PointId : RSCAndPoints.PointIds) {
535 const auto &ClusterId = Clustering_.getClusterIdForPoint(P: PointId);
536 if (!ClusterId.isValid())
537 continue; // Ignore noise and errors. FIXME: take noise into account ?
538 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_)
539 continue; // Either display stable or unstable clusters only.
540 auto SchedClassClusterIt =
541 find_if(Range&: SchedClassClusters, P: [ClusterId](const SchedClassCluster &C) {
542 return C.id() == ClusterId;
543 });
544 if (SchedClassClusterIt == SchedClassClusters.end()) {
545 SchedClassClusters.emplace_back();
546 SchedClassClusterIt = std::prev(x: SchedClassClusters.end());
547 }
548 SchedClassClusterIt->addPoint(PointId, Clustering: Clustering_);
549 }
550
551 // Print any scheduling class that has at least one cluster that does not
552 // match the checked-in data.
553 if (all_of(Range&: SchedClassClusters, P: [this, &RSCAndPoints,
554 &SI](const SchedClassCluster &C) {
555 return C.measurementsMatch(STI: SI, RSC: RSCAndPoints.RSC, Clustering: Clustering_,
556 AnalysisInconsistencyEpsilonSquared_);
557 }))
558 continue; // Nothing weird.
559
560 OS << "<div class=\"inconsistency\"><p>Sched Class <span "
561 "class=\"sched-class-name\">";
562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
563 writeEscaped<kEscapeHtml>(OS, SI.getSchedModel().getSchedClassName(
564 RSCAndPoints.RSC.SchedClassId));
565#else
566 OS << RSCAndPoints.RSC.SchedClassId;
567#endif
568 OS << "</span> contains instructions whose performance characteristics do"
569 " not match that of LLVM:</p>";
570 printSchedClassClustersHtml(Clusters: SchedClassClusters, RSC: RSCAndPoints.RSC, OS);
571 OS << "<p>llvm SchedModel data:</p>";
572 printSchedClassDescHtml(RSC: RSCAndPoints.RSC, OS);
573 OS << "</div>";
574 }
575
576 printClusterRawHtml(Id: BenchmarkClustering::ClusterId::noise(),
577 display_name: "[noise]", OS);
578
579 OS << "</body></html>";
580 return Error::success();
581}
582
583} // namespace exegesis
584} // namespace llvm
585