1//===-- Clustering.h --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Utilities to compute benchmark result clusters.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
15#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16
17#include "BenchmarkResult.h"
18#include "llvm/Support/Error.h"
19#include <limits>
20#include <vector>
21
22namespace llvm {
23namespace exegesis {
24
25class BenchmarkClustering {
26public:
27 enum ModeE { Dbscan, Naive };
28
29 // Clusters `Points` using DBSCAN with the given parameters. See the cc file
30 // for more explanations on the algorithm.
31 static Expected<BenchmarkClustering>
32 create(const std::vector<Benchmark> &Points, ModeE Mode,
33 size_t DbscanMinPts, double AnalysisClusteringEpsilon,
34 const MCSubtargetInfo *SubtargetInfo = nullptr,
35 const MCInstrInfo *InstrInfo = nullptr);
36
37 class ClusterId {
38 public:
39 static ClusterId noise() { return ClusterId(kNoise); }
40 static ClusterId error() { return ClusterId(kError); }
41 static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
42 return ClusterId(Id, IsUnstable);
43 }
44 static ClusterId makeValidUnstable(size_t Id) {
45 return makeValid(Id, /*IsUnstable=*/IsUnstable: true);
46 }
47
48 ClusterId() : Id_(kUndef), IsUnstable_(false) {}
49
50 // Compare id's, ignoring the 'unstability' bit.
51 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
52 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
53
54 bool isValid() const { return Id_ <= kMaxValid; }
55 bool isUnstable() const { return IsUnstable_; }
56 bool isNoise() const { return Id_ == kNoise; }
57 bool isError() const { return Id_ == kError; }
58 bool isUndef() const { return Id_ == kUndef; }
59
60 // Precondition: isValid().
61 size_t getId() const {
62 assert(isValid());
63 return Id_;
64 }
65
66 private:
67 ClusterId(size_t Id, bool IsUnstable = false)
68 : Id_(Id), IsUnstable_(IsUnstable) {}
69
70 static constexpr const size_t kMaxValid =
71 (std::numeric_limits<size_t>::max() >> 1) - 4;
72 static constexpr const size_t kNoise = kMaxValid + 1;
73 static constexpr const size_t kError = kMaxValid + 2;
74 static constexpr const size_t kUndef = kMaxValid + 3;
75
76 size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
77 size_t IsUnstable_ : 1;
78 };
79 static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
80
81 struct Cluster {
82 Cluster() = delete;
83 explicit Cluster(const ClusterId &Id) : Id(Id) {}
84
85 const ClusterId Id;
86 // Indices of benchmarks within the cluster.
87 std::vector<int> PointIndices;
88 };
89
90 ClusterId getClusterIdForPoint(size_t P) const {
91 return ClusterIdForPoint_[P];
92 }
93
94 const std::vector<Benchmark> &getPoints() const { return Points_; }
95
96 const Cluster &getCluster(ClusterId Id) const {
97 assert(!Id.isUndef() && "unlabeled cluster");
98 if (Id.isNoise()) {
99 return NoiseCluster_;
100 }
101 if (Id.isError()) {
102 return ErrorCluster_;
103 }
104 return Clusters_[Id.getId()];
105 }
106
107 const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
108
109 // Returns true if the given point is within a distance Epsilon of each other.
110 bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
111 const std::vector<BenchmarkMeasure> &Q,
112 const double EpsilonSquared_) const {
113 double DistanceSquared = 0.0;
114 for (size_t I = 0, E = P.size(); I < E; ++I) {
115 const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
116 DistanceSquared += Diff * Diff;
117 }
118 return DistanceSquared <= EpsilonSquared_;
119 }
120
121private:
122 BenchmarkClustering(
123 const std::vector<Benchmark> &Points,
124 double AnalysisClusteringEpsilonSquared);
125
126 Error validateAndSetup();
127
128 void clusterizeDbScan(size_t MinPts);
129 void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
130 const MCInstrInfo &InstrInfo);
131
132 // Stabilization is only needed if dbscan was used to clusterize.
133 void stabilize(unsigned NumOpcodes);
134
135 void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
136
137 bool areAllNeighbours(ArrayRef<size_t> Pts) const;
138
139 const std::vector<Benchmark> &Points_;
140 const double AnalysisClusteringEpsilonSquared_;
141
142 int NumDimensions_ = 0;
143 // ClusterForPoint_[P] is the cluster id for Points[P].
144 std::vector<ClusterId> ClusterIdForPoint_;
145 std::vector<Cluster> Clusters_;
146 Cluster NoiseCluster_;
147 Cluster ErrorCluster_;
148};
149
150class SchedClassClusterCentroid {
151public:
152 const std::vector<PerInstructionStats> &getStats() const {
153 return Representative;
154 }
155
156 std::vector<BenchmarkMeasure> getAsPoint() const;
157
158 void addPoint(ArrayRef<BenchmarkMeasure> Point);
159
160 bool validate(Benchmark::ModeE Mode) const;
161
162private:
163 // Measurement stats for the points in the SchedClassCluster.
164 std::vector<PerInstructionStats> Representative;
165};
166
167} // namespace exegesis
168} // namespace llvm
169
170#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
171