1 | //===-- Clustering.h --------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Utilities to compute benchmark result clusters. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |
15 | #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |
16 | |
17 | #include "BenchmarkResult.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <limits> |
20 | #include <vector> |
21 | |
22 | namespace llvm { |
23 | namespace exegesis { |
24 | |
25 | class BenchmarkClustering { |
26 | public: |
27 | enum ModeE { Dbscan, Naive }; |
28 | |
29 | // Clusters `Points` using DBSCAN with the given parameters. See the cc file |
30 | // for more explanations on the algorithm. |
31 | static Expected<BenchmarkClustering> |
32 | create(const std::vector<Benchmark> &Points, ModeE Mode, |
33 | size_t DbscanMinPts, double AnalysisClusteringEpsilon, |
34 | const MCSubtargetInfo *SubtargetInfo = nullptr, |
35 | const MCInstrInfo *InstrInfo = nullptr); |
36 | |
37 | class ClusterId { |
38 | public: |
39 | static ClusterId noise() { return ClusterId(kNoise); } |
40 | static ClusterId error() { return ClusterId(kError); } |
41 | static ClusterId makeValid(size_t Id, bool IsUnstable = false) { |
42 | return ClusterId(Id, IsUnstable); |
43 | } |
44 | static ClusterId makeValidUnstable(size_t Id) { |
45 | return makeValid(Id, /*IsUnstable=*/IsUnstable: true); |
46 | } |
47 | |
48 | ClusterId() : Id_(kUndef), IsUnstable_(false) {} |
49 | |
50 | // Compare id's, ignoring the 'unstability' bit. |
51 | bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } |
52 | bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } |
53 | |
54 | bool isValid() const { return Id_ <= kMaxValid; } |
55 | bool isUnstable() const { return IsUnstable_; } |
56 | bool isNoise() const { return Id_ == kNoise; } |
57 | bool isError() const { return Id_ == kError; } |
58 | bool isUndef() const { return Id_ == kUndef; } |
59 | |
60 | // Precondition: isValid(). |
61 | size_t getId() const { |
62 | assert(isValid()); |
63 | return Id_; |
64 | } |
65 | |
66 | private: |
67 | ClusterId(size_t Id, bool IsUnstable = false) |
68 | : Id_(Id), IsUnstable_(IsUnstable) {} |
69 | |
70 | static constexpr const size_t kMaxValid = |
71 | (std::numeric_limits<size_t>::max() >> 1) - 4; |
72 | static constexpr const size_t kNoise = kMaxValid + 1; |
73 | static constexpr const size_t kError = kMaxValid + 2; |
74 | static constexpr const size_t kUndef = kMaxValid + 3; |
75 | |
76 | size_t Id_ : (std::numeric_limits<size_t>::digits - 1); |
77 | size_t IsUnstable_ : 1; |
78 | }; |
79 | static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field." ); |
80 | |
81 | struct Cluster { |
82 | Cluster() = delete; |
83 | explicit Cluster(const ClusterId &Id) : Id(Id) {} |
84 | |
85 | const ClusterId Id; |
86 | // Indices of benchmarks within the cluster. |
87 | std::vector<int> PointIndices; |
88 | }; |
89 | |
90 | ClusterId getClusterIdForPoint(size_t P) const { |
91 | return ClusterIdForPoint_[P]; |
92 | } |
93 | |
94 | const std::vector<Benchmark> &getPoints() const { return Points_; } |
95 | |
96 | const Cluster &getCluster(ClusterId Id) const { |
97 | assert(!Id.isUndef() && "unlabeled cluster" ); |
98 | if (Id.isNoise()) { |
99 | return NoiseCluster_; |
100 | } |
101 | if (Id.isError()) { |
102 | return ErrorCluster_; |
103 | } |
104 | return Clusters_[Id.getId()]; |
105 | } |
106 | |
107 | const std::vector<Cluster> &getValidClusters() const { return Clusters_; } |
108 | |
109 | // Returns true if the given point is within a distance Epsilon of each other. |
110 | bool isNeighbour(const std::vector<BenchmarkMeasure> &P, |
111 | const std::vector<BenchmarkMeasure> &Q, |
112 | const double EpsilonSquared_) const { |
113 | double DistanceSquared = 0.0; |
114 | for (size_t I = 0, E = P.size(); I < E; ++I) { |
115 | const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; |
116 | DistanceSquared += Diff * Diff; |
117 | } |
118 | return DistanceSquared <= EpsilonSquared_; |
119 | } |
120 | |
121 | private: |
122 | BenchmarkClustering( |
123 | const std::vector<Benchmark> &Points, |
124 | double AnalysisClusteringEpsilonSquared); |
125 | |
126 | Error validateAndSetup(); |
127 | |
128 | void clusterizeDbScan(size_t MinPts); |
129 | void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, |
130 | const MCInstrInfo &InstrInfo); |
131 | |
132 | // Stabilization is only needed if dbscan was used to clusterize. |
133 | void stabilize(unsigned NumOpcodes); |
134 | |
135 | void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; |
136 | |
137 | bool areAllNeighbours(ArrayRef<size_t> Pts) const; |
138 | |
139 | const std::vector<Benchmark> &Points_; |
140 | const double AnalysisClusteringEpsilonSquared_; |
141 | |
142 | int NumDimensions_ = 0; |
143 | // ClusterForPoint_[P] is the cluster id for Points[P]. |
144 | std::vector<ClusterId> ClusterIdForPoint_; |
145 | std::vector<Cluster> Clusters_; |
146 | Cluster NoiseCluster_; |
147 | Cluster ErrorCluster_; |
148 | }; |
149 | |
150 | class SchedClassClusterCentroid { |
151 | public: |
152 | const std::vector<PerInstructionStats> &getStats() const { |
153 | return Representative; |
154 | } |
155 | |
156 | std::vector<BenchmarkMeasure> getAsPoint() const; |
157 | |
158 | void addPoint(ArrayRef<BenchmarkMeasure> Point); |
159 | |
160 | bool validate(Benchmark::ModeE Mode) const; |
161 | |
162 | private: |
163 | // Measurement stats for the points in the SchedClassCluster. |
164 | std::vector<PerInstructionStats> Representative; |
165 | }; |
166 | |
167 | } // namespace exegesis |
168 | } // namespace llvm |
169 | |
170 | #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |
171 | |