1 | //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements feature and label extraction for offline supervised learning |
10 | // of a IR to native size model. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
14 | |
15 | #ifdef LLVM_HAVE_TFLITE |
16 | #include "llvm/Analysis/Utils/TFUtils.h" |
17 | #endif |
18 | #include "llvm/IR/Function.h" |
19 | #include "llvm/IR/PassManager.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | using namespace llvm; |
23 | |
24 | AnalysisKey InlineSizeEstimatorAnalysis::Key; |
25 | |
26 | #ifdef LLVM_HAVE_TFLITE |
27 | #include "llvm/Analysis/LoopInfo.h" |
28 | #include "llvm/Analysis/TargetLibraryInfo.h" |
29 | #include "llvm/Analysis/TargetTransformInfo.h" |
30 | #include "llvm/IR/BasicBlock.h" |
31 | #include "llvm/IR/Dominators.h" |
32 | #include "llvm/IR/Instructions.h" |
33 | #include "llvm/Support/Casting.h" |
34 | #include "llvm/Support/CommandLine.h" |
35 | #include <algorithm> |
36 | #include <deque> |
37 | #include <optional> |
38 | |
39 | cl::opt<std::string> TFIR2NativeModelPath( |
40 | "ml-inliner-ir2native-model" , cl::Hidden, |
41 | cl::desc("Path to saved model evaluating native size from IR." )); |
42 | |
43 | #define DEBUG_TYPE "inline-size-estimator" |
44 | namespace { |
45 | unsigned getMaxInstructionID() { |
46 | #define LAST_OTHER_INST(NR) return NR; |
47 | #include "llvm/IR/Instruction.def" |
48 | } |
49 | |
50 | class IRToNativeSizeLearning { |
51 | public: |
52 | enum class NamedFeatureIndex : size_t { |
53 | InitialSize, |
54 | Blocks, |
55 | Calls, |
56 | IsLocal, |
57 | IsLinkOnceODR, |
58 | IsLinkOnce, |
59 | Loops, |
60 | MaxLoopDepth, |
61 | MaxDomTreeLevel, |
62 | |
63 | NumNamedFeatures |
64 | }; |
65 | static const size_t NumNamedFeatures = |
66 | static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); |
67 | struct FunctionFeatures { |
68 | static const size_t FeatureCount; |
69 | |
70 | std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; |
71 | std::vector<int32_t> InstructionHistogram; |
72 | std::vector<int32_t> InstructionPairHistogram; |
73 | |
74 | void fillTensor(int32_t *Ptr) const; |
75 | int32_t &operator[](NamedFeatureIndex Pos) { |
76 | return NamedFeatures[static_cast<size_t>(Pos)]; |
77 | } |
78 | }; |
79 | IRToNativeSizeLearning() = default; |
80 | |
81 | static FunctionFeatures getFunctionFeatures(Function &F, |
82 | FunctionAnalysisManager &FAM); |
83 | }; |
84 | |
85 | // This is a point in time - we determined including these pairs of |
86 | // consecutive instructions (in the IR layout available at inline time) as |
87 | // features improves the model performance. We want to move away from manual |
88 | // feature selection. |
89 | // The array is given in opcode pairs rather than labels because 1) labels |
90 | // weren't readily available, and 2) the successions were hand - extracted. |
91 | // |
92 | // This array must be sorted. |
93 | static const std::array<std::pair<size_t, size_t>, 137> |
94 | ImportantInstructionSuccessions{ |
95 | {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, |
96 | {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, |
97 | {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, |
98 | {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, |
99 | {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, |
100 | {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, |
101 | {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, |
102 | {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, |
103 | {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, |
104 | {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, |
105 | {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, |
106 | {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, |
107 | {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, |
108 | {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, |
109 | {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, |
110 | {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, |
111 | {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, |
112 | {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, |
113 | {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, |
114 | {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; |
115 | |
116 | // We have: 9 calculated features (the features here); 1 feature for each |
117 | // instruction opcode; and 1 feature for each manually-identified sequence. |
118 | // For the latter 2, we build a histogram: we count the number of |
119 | // occurrences of each instruction opcode or succession of instructions, |
120 | // respectively. |
121 | // Note that instruction opcodes start from 1. For convenience, we also have an |
122 | // always 0 feature for the '0' opcode, hence the extra 1. |
123 | const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = |
124 | ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + |
125 | IRToNativeSizeLearning::NumNamedFeatures; |
126 | |
127 | size_t getSize(Function &F, TargetTransformInfo &TTI) { |
128 | size_t Ret = 0; |
129 | for (const auto &BB : F) |
130 | for (const auto &I : BB) |
131 | Ret += *(TTI.getInstructionCost( |
132 | &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); |
133 | return Ret; |
134 | } |
135 | |
136 | size_t getSize(Function &F, FunctionAnalysisManager &FAM) { |
137 | auto &TTI = FAM.getResult<TargetIRAnalysis>(F); |
138 | return getSize(F, TTI); |
139 | } |
140 | |
141 | unsigned getMaxDominatorTreeDepth(const Function &F, |
142 | const DominatorTree &Tree) { |
143 | unsigned Ret = 0; |
144 | for (const auto &BB : F) |
145 | if (const auto *TN = Tree.getNode(&BB)) |
146 | Ret = std::max(Ret, TN->getLevel()); |
147 | return Ret; |
148 | } |
149 | } // namespace |
150 | |
151 | IRToNativeSizeLearning::FunctionFeatures |
152 | IRToNativeSizeLearning::getFunctionFeatures(Function &F, |
153 | FunctionAnalysisManager &FAM) { |
154 | assert(llvm::is_sorted(ImportantInstructionSuccessions) && |
155 | "expected function features are sorted" ); |
156 | |
157 | auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); |
158 | FunctionFeatures FF; |
159 | size_t InstrCount = getMaxInstructionID() + 1; |
160 | FF.InstructionHistogram.resize(InstrCount); |
161 | |
162 | FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); |
163 | |
164 | int StartID = 0; |
165 | int LastID = StartID; |
166 | auto getPairIndex = [](size_t a, size_t b) { |
167 | auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); |
168 | if (I == ImportantInstructionSuccessions.end()) |
169 | return -1; |
170 | return static_cast<int>( |
171 | std::distance(ImportantInstructionSuccessions.begin(), I)); |
172 | }; |
173 | |
174 | // We don't want debug calls, because they'd just add noise. |
175 | for (const auto &BB : F) { |
176 | for (const auto &I : BB.instructionsWithoutDebug()) { |
177 | auto ID = I.getOpcode(); |
178 | |
179 | ++FF.InstructionHistogram[ID]; |
180 | int PairIndex = getPairIndex(LastID, ID); |
181 | if (PairIndex >= 0) |
182 | ++FF.InstructionPairHistogram[PairIndex]; |
183 | LastID = ID; |
184 | if (isa<CallBase>(I)) |
185 | ++FF[NamedFeatureIndex::Calls]; |
186 | } |
187 | } |
188 | |
189 | FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); |
190 | FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); |
191 | FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); |
192 | FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); |
193 | FF[NamedFeatureIndex::Blocks] = F.size(); |
194 | auto &LI = FAM.getResult<LoopAnalysis>(F); |
195 | FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); |
196 | for (auto &L : LI) |
197 | FF[NamedFeatureIndex::MaxLoopDepth] = |
198 | std::max(FF[NamedFeatureIndex::MaxLoopDepth], |
199 | static_cast<int32_t>(L->getLoopDepth())); |
200 | FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); |
201 | return FF; |
202 | } |
203 | |
204 | void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { |
205 | std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); |
206 | Ptr += NamedFeatures.size(); |
207 | std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); |
208 | Ptr += InstructionHistogram.size(); |
209 | std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), |
210 | Ptr); |
211 | } |
212 | |
213 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { |
214 | return !TFIR2NativeModelPath.empty(); |
215 | } |
216 | |
217 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { |
218 | if (!isEvaluatorRequested()) { |
219 | return; |
220 | } |
221 | std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( |
222 | "serving_default_input_1" , |
223 | {1, static_cast<int64_t>( |
224 | IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; |
225 | std::vector<TensorSpec> OutputSpecs{ |
226 | TensorSpec::createSpec<float>("StatefulPartitionedCall" , {1})}; |
227 | Evaluator = std::make_unique<TFModelEvaluator>( |
228 | TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); |
229 | if (!Evaluator || !Evaluator->isValid()) { |
230 | Evaluator.reset(); |
231 | return; |
232 | } |
233 | } |
234 | |
235 | InlineSizeEstimatorAnalysis::Result |
236 | InlineSizeEstimatorAnalysis::run(const Function &F, |
237 | FunctionAnalysisManager &FAM) { |
238 | if (!Evaluator) |
239 | return std::nullopt; |
240 | auto Features = IRToNativeSizeLearning::getFunctionFeatures( |
241 | const_cast<Function &>(F), FAM); |
242 | int32_t *V = Evaluator->getInput<int32_t>(0); |
243 | Features.fillTensor(V); |
244 | auto ER = Evaluator->evaluate(); |
245 | if (!ER) |
246 | return std::nullopt; |
247 | float Ret = *ER->getTensorValue<float>(0); |
248 | if (Ret < 0.0) |
249 | Ret = 0.0; |
250 | return static_cast<size_t>(Ret); |
251 | } |
252 | |
253 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} |
254 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( |
255 | InlineSizeEstimatorAnalysis &&Other) |
256 | : Evaluator(std::move(Other.Evaluator)) {} |
257 | |
258 | #else |
259 | namespace llvm { |
260 | class TFModelEvaluator {}; |
261 | } // namespace llvm |
262 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; |
263 | InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( |
264 | InlineSizeEstimatorAnalysis &&) {} |
265 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; |
266 | InlineSizeEstimatorAnalysis::Result |
267 | InlineSizeEstimatorAnalysis::run(const Function &F, |
268 | FunctionAnalysisManager &FAM) { |
269 | return std::nullopt; |
270 | } |
271 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } |
272 | #endif |
273 | |
274 | PreservedAnalyses |
275 | InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, |
276 | FunctionAnalysisManager &AM) { |
277 | OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() |
278 | << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(IR&: F) << "\n" ; |
279 | return PreservedAnalyses::all(); |
280 | } |
281 | |