1 | //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a model runner using TFLite, allowing the |
10 | // loading of a model from a command line option. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/Analysis/TensorSpec.h" |
14 | #include "llvm/Config/config.h" |
15 | #if defined(LLVM_HAVE_TFLITE) |
16 | |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/Analysis/CallGraph.h" |
19 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
20 | #include "llvm/Analysis/MLInlineAdvisor.h" |
21 | #include "llvm/Analysis/ModelUnderTrainingRunner.h" |
22 | #include "llvm/Analysis/NoInferenceModelRunner.h" |
23 | #include "llvm/Analysis/Utils/TFUtils.h" |
24 | #include "llvm/Analysis/Utils/TrainingLogger.h" |
25 | #include "llvm/IR/LLVMContext.h" |
26 | #include "llvm/IR/Module.h" |
27 | #include "llvm/Support/CommandLine.h" |
28 | #include "llvm/Support/ManagedStatic.h" |
29 | |
30 | #include <vector> |
31 | #include <optional> |
32 | |
33 | using namespace llvm; |
34 | |
35 | static cl::opt<std::string> TrainingLog( |
36 | "training-log" , cl::Hidden, |
37 | cl::desc("Path where the development - mode inlining log is saved." )); |
38 | |
39 | static cl::opt<std::string> TFModelUnderTrainingPath( |
40 | "ml-inliner-model-under-training" , cl::Hidden, |
41 | cl::desc(R"(Path to SavedModel from the previous training iteration. |
42 | The directory is also expected to contain a JSON specification of the |
43 | outputs expected to be logged, where the first entry must be the |
44 | inlining decision. The file containing the specification should be |
45 | called output_spec.json. The expected JSON value is an array of |
46 | dictionaries. Each dictionary should have 2 keys: |
47 | |
48 | - "tensor_spec, followed by the TensorSpec description of the |
49 | output; and |
50 | - "logging_name", a string indicating the name to use when |
51 | logging the output values. |
52 | |
53 | Example: |
54 | [ |
55 | { |
56 | "logging_name" : "some_name", |
57 | "tensor_spec" : { |
58 | "name" : "model_name", |
59 | "port" : 0, |
60 | "shape" : [2, 3], |
61 | "type" : "float" |
62 | } |
63 | } |
64 | ] |
65 | |
66 | The first value must always correspond to the decision.)" )); |
67 | |
68 | static cl::opt<std::string> TFOutputSpecOverride( |
69 | "ml-inliner-output-spec-override" , cl::Hidden, |
70 | cl::desc("Override the path to the output spec json file. See " |
71 | "-ml-inliner-model-under-training documentation for the " |
72 | "specification of that file." )); |
73 | |
74 | static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix" , |
75 | cl::Hidden, cl::init("action_" ), |
76 | cl::desc("Prefix for feature names." )); |
77 | |
78 | namespace { |
79 | /// An InlineEvent, used by TrainingLogger. |
80 | struct InlineEvent { |
81 | /// What the default policy's decision would have been. |
82 | int64_t DefaultDecision = 0; |
83 | |
84 | /// What we advised. When training off the default policy, this is the same as |
85 | /// DefaultDecision. |
86 | int64_t AdvisedDecision = 0; |
87 | |
88 | /// What actually happened. This would be 'false' in the case of an inline |
89 | /// error, even if AdvisedDecision were true, otherwise it agrees with |
90 | /// AdvisedDecision. |
91 | bool Effect = false; |
92 | |
93 | /// What the change in size was: size_after - size_before |
94 | int64_t Reward = 0; |
95 | }; |
96 | |
97 | /// Collect data we may use for training a model. |
98 | class TrainingLogger final { |
99 | public: |
100 | TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); |
101 | |
102 | /// Log one inlining event. |
103 | void logInlineEvent(const InlineEvent &Event, |
104 | const MLModelRunner &ModelRunner); |
105 | |
106 | private: |
107 | StringRef LogFileName; |
108 | const ModelUnderTrainingRunner *const MUTR; |
109 | std::unique_ptr<Logger> L; |
110 | BitVector Effects; |
111 | /// Set these 2 clearly OOB, to make sure we set them later. |
112 | size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); |
113 | size_t DecisionPos = std::numeric_limits<size_t>::max(); |
114 | }; |
115 | |
116 | /// An extension of the MLInlineAdvisor for the 'development' mode, targeting |
117 | /// the offline training scenario. Note that training happens outside of the |
118 | /// compiler, this facility is concerned with producing training data ("logs"). |
119 | /// This InlineAdvisor can operate in the following modes: |
120 | /// |
121 | /// 1) collect logs for the default policy. This is useful for bootstrapping |
122 | /// training, which will be considerably faster by starting from a reasonable |
123 | /// policy. |
124 | /// |
125 | /// 2) collect logs for the ML policy, using a model from a previous |
126 | /// training. Potentially, that model uses internally some small random |
127 | /// perturbation of its weights, to induce exploration (setting this up is the |
128 | /// responsibility of the training algorithm). The logs would then be used to |
129 | /// retrain and improve on this model. |
130 | /// |
131 | /// 3) use the provided model, with no logging. This is useful for end to end |
132 | /// validation - the model, in this case, is a release candidate and shouldn't |
133 | /// have random perturbations. It is a convenience feature: rather than needing |
134 | /// to take the release candidate model and compile it in 'release' mode, |
135 | /// validate it, then potentially discard it, it's easier to just pass the model |
136 | /// to the compiler, albeit compilation would be slower, as a one-off. Once the |
137 | /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in |
138 | /// release mode. The expectation is that a well-trained model provides a good |
139 | /// policy over a sufficiently diverse codebase, over many changes (i.e. |
140 | /// training happens seldom). |
141 | class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { |
142 | public: |
143 | DevelopmentModeMLInlineAdvisor( |
144 | Module &M, ModuleAnalysisManager &MAM, |
145 | std::unique_ptr<MLModelRunner> ModelRunner, |
146 | std::function<bool(CallBase &)> GetDefaultAdvice, |
147 | std::unique_ptr<TrainingLogger> Logger); |
148 | |
149 | size_t getTotalSizeEstimate(); |
150 | |
151 | void updateNativeSizeEstimate(int64_t Change) { |
152 | *CurrentNativeSize += Change; |
153 | } |
154 | void resetNativeSize(Function *F) { |
155 | PreservedAnalyses PA = PreservedAnalyses::all(); |
156 | PA.abandon<InlineSizeEstimatorAnalysis>(); |
157 | FAM.invalidate(*F, PA); |
158 | } |
159 | |
160 | std::unique_ptr<MLInlineAdvice> |
161 | getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; |
162 | |
163 | std::optional<size_t> getNativeSizeEstimate(const Function &F) const; |
164 | |
165 | private: |
166 | bool isLogging() const { return !!Logger; } |
167 | std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; |
168 | |
169 | const bool IsDoingInference; |
170 | std::unique_ptr<TrainingLogger> Logger; |
171 | |
172 | const std::optional<int32_t> InitialNativeSize; |
173 | std::optional<int32_t> CurrentNativeSize; |
174 | }; |
175 | |
176 | /// A variant of MLInlineAdvice that tracks all non-trivial inlining |
177 | /// decisions, for training/logging. |
178 | class LoggingMLInlineAdvice : public MLInlineAdvice { |
179 | public: |
180 | LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, |
181 | OptimizationRemarkEmitter &ORE, bool Recommendation, |
182 | TrainingLogger &Logger, |
183 | std::optional<size_t> CallerSizeEstimateBefore, |
184 | std::optional<size_t> CalleeSizeEstimateBefore, |
185 | bool DefaultDecision, bool Mandatory = false) |
186 | : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), |
187 | CallerSizeEstimateBefore(CallerSizeEstimateBefore), |
188 | CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), |
189 | DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} |
190 | |
191 | virtual ~LoggingMLInlineAdvice() = default; |
192 | |
193 | private: |
194 | DevelopmentModeMLInlineAdvisor *getAdvisor() const { |
195 | return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); |
196 | } |
197 | void recordInliningImpl() override { |
198 | MLInlineAdvice::recordInliningImpl(); |
199 | getAdvisor()->resetNativeSize(Caller); |
200 | int Reward = std::numeric_limits<int>::max(); |
201 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
202 | !getAdvisor()->isForcedToStop()) { |
203 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + |
204 | *CalleeSizeEstimateBefore; |
205 | Reward = NativeSizeAfter - |
206 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
207 | getAdvisor()->updateNativeSizeEstimate(Reward); |
208 | } |
209 | log(Reward, /*Success=*/true); |
210 | } |
211 | |
212 | void recordInliningWithCalleeDeletedImpl() override { |
213 | MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); |
214 | getAdvisor()->resetNativeSize(Caller); |
215 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
216 | !getAdvisor()->isForcedToStop()) { |
217 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); |
218 | int Reward = NativeSizeAfter - |
219 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
220 | getAdvisor()->updateNativeSizeEstimate(Reward); |
221 | log(Reward, /*Success=*/true); |
222 | } else { |
223 | log(NoReward, /*Success=*/true); |
224 | } |
225 | } |
226 | |
227 | void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { |
228 | MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); |
229 | log(NoReward, /*Success=*/false); |
230 | } |
231 | |
232 | void recordUnattemptedInliningImpl() override { |
233 | MLInlineAdvice::recordUnattemptedInliningImpl(); |
234 | log(NoReward, /*Success=*/false); |
235 | } |
236 | |
237 | void log(int64_t Reward, bool Success) { |
238 | if (Mandatory) |
239 | return; |
240 | InlineEvent Event; |
241 | Event.AdvisedDecision = isInliningRecommended(); |
242 | Event.DefaultDecision = DefaultDecision; |
243 | Event.Effect = Success; |
244 | Event.Reward = Reward; |
245 | Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); |
246 | } |
247 | |
248 | static const int64_t NoReward = 0; |
249 | TrainingLogger &Logger; |
250 | const std::optional<size_t> CallerSizeEstimateBefore; |
251 | const std::optional<size_t> CalleeSizeEstimateBefore; |
252 | const int64_t DefaultDecision; |
253 | const int64_t Mandatory; |
254 | }; |
255 | |
256 | static const std::vector<TensorSpec> TrainingOnlyFeatures{ |
257 | TensorSpec::createSpec<float>(TFFeedPrefix + "discount" , {1}), |
258 | TensorSpec::createSpec<float>(TFFeedPrefix + "reward" , {1}), |
259 | TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type" , {1})}; |
260 | |
261 | static const std::vector<TensorSpec> getInputFeatures() { |
262 | std::vector<TensorSpec> InputSpecs; |
263 | for (size_t I = 0; I < NumberOfFeatures; ++I) |
264 | InputSpecs.push_back(TensorSpec::createSpec<int64_t>( |
265 | TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape())); |
266 | append_range(InputSpecs, TrainingOnlyFeatures); |
267 | return InputSpecs; |
268 | } |
269 | |
270 | } // namespace |
271 | |
272 | TrainingLogger::TrainingLogger(StringRef LogFileName, |
273 | const ModelUnderTrainingRunner *MUTR) |
274 | : LogFileName(LogFileName), MUTR(MUTR) { |
275 | // The first output is the inlining decision. |
276 | std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); |
277 | |
278 | if (MUTR) |
279 | append_range(FT, MUTR->extraOutputsForLoggingSpecs()); |
280 | |
281 | DefaultDecisionPos = FT.size(); |
282 | FT.push_back(DefaultDecisionSpec); |
283 | |
284 | DecisionPos = FT.size(); |
285 | FT.push_back(InlineDecisionSpec); |
286 | std::error_code EC; |
287 | auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); |
288 | if (EC) |
289 | dbgs() << (EC.message() + ":" + TrainingLog); |
290 | |
291 | L = std::make_unique<Logger>( |
292 | std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), |
293 | InlineSizeEstimatorAnalysis::isEvaluatorRequested()); |
294 | L->switchContext("" ); |
295 | } |
296 | |
297 | /// Log one inlining event. |
298 | void TrainingLogger::logInlineEvent(const InlineEvent &Event, |
299 | const MLModelRunner &ModelRunner) { |
300 | L->startObservation(); |
301 | size_t CurrentFeature = 0; |
302 | for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) |
303 | L->logTensorValue(CurrentFeature, |
304 | reinterpret_cast<const char *>( |
305 | ModelRunner.getTensorUntyped(CurrentFeature))); |
306 | |
307 | if (MUTR) |
308 | for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { |
309 | const char *RawData = |
310 | reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); |
311 | L->logTensorValue(CurrentFeature, RawData); |
312 | ++CurrentFeature; |
313 | } |
314 | |
315 | assert(CurrentFeature == DefaultDecisionPos); |
316 | L->logTensorValue(DefaultDecisionPos, |
317 | reinterpret_cast<const char *>(&Event.DefaultDecision)); |
318 | L->logTensorValue(DecisionPos, |
319 | reinterpret_cast<const char *>(&Event.AdvisedDecision)); |
320 | L->endObservation(); |
321 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
322 | L->logReward(Event.Reward); |
323 | |
324 | // For debugging / later use |
325 | Effects.push_back(Event.Effect); |
326 | } |
327 | |
328 | DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( |
329 | Module &M, ModuleAnalysisManager &MAM, |
330 | std::unique_ptr<MLModelRunner> ModelRunner, |
331 | std::function<bool(CallBase &)> GetDefaultAdvice, |
332 | std::unique_ptr<TrainingLogger> Logger) |
333 | : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice), |
334 | IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), |
335 | Logger(std::move(Logger)), |
336 | InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), |
337 | CurrentNativeSize(InitialNativeSize) { |
338 | // We cannot have the case of neither inference nor logging. |
339 | assert(IsDoingInference || isLogging()); |
340 | } |
341 | |
342 | std::optional<size_t> |
343 | DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { |
344 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
345 | return std::nullopt; |
346 | auto &R = |
347 | FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); |
348 | if (!R) { |
349 | F.getParent()->getContext().emitError( |
350 | "Native size estimator is not present." ); |
351 | return 0; |
352 | } |
353 | return *R; |
354 | } |
355 | |
356 | std::unique_ptr<MLInlineAdvice> |
357 | DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { |
358 | return std::make_unique<LoggingMLInlineAdvice>( |
359 | /*Advisor=*/this, |
360 | /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, |
361 | /*Logger=*/*Logger, |
362 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
363 | /*CalleeSizeEstimateBefore=*/ |
364 | getNativeSizeEstimate(*CB.getCalledFunction()), |
365 | /*DefaultDecision=*/true, /*Mandatory*/ true); |
366 | } |
367 | |
368 | std::unique_ptr<MLInlineAdvice> |
369 | DevelopmentModeMLInlineAdvisor::getAdviceFromModel( |
370 | CallBase &CB, OptimizationRemarkEmitter &ORE) { |
371 | if (IsDoingInference && !isLogging()) |
372 | return MLInlineAdvisor::getAdviceFromModel(CB, ORE); |
373 | |
374 | bool DefaultAdvice = GetDefaultAdvice(CB); |
375 | auto Recommendation = |
376 | IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) |
377 | : DefaultAdvice; |
378 | return std::make_unique<LoggingMLInlineAdvice>( |
379 | /*Advisor=*/this, |
380 | /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, |
381 | /*Logger=*/*Logger, |
382 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
383 | /*CalleeSizeEstimateBefore=*/ |
384 | getNativeSizeEstimate(*CB.getCalledFunction()), |
385 | /*DefaultDecision=*/DefaultAdvice); |
386 | } |
387 | |
388 | size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { |
389 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
390 | return 0; |
391 | size_t Ret = 0; |
392 | for (auto &F : M) { |
393 | if (F.isDeclaration()) |
394 | continue; |
395 | Ret += *getNativeSizeEstimate(F); |
396 | } |
397 | return Ret; |
398 | } |
399 | |
400 | std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( |
401 | Module &M, ModuleAnalysisManager &MAM, |
402 | std::function<bool(CallBase &)> GetDefaultAdvice) { |
403 | auto &Ctx = M.getContext(); |
404 | std::unique_ptr<MLModelRunner> Runner; |
405 | if (TFModelUnderTrainingPath.empty()) |
406 | Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); |
407 | else |
408 | Runner = ModelUnderTrainingRunner::createAndEnsureValid( |
409 | Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), |
410 | TFOutputSpecOverride); |
411 | if (!Runner) |
412 | return nullptr; |
413 | std::unique_ptr<TrainingLogger> Logger; |
414 | if (!TrainingLog.empty()) |
415 | Logger = std::make_unique<TrainingLogger>( |
416 | TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); |
417 | |
418 | return std::make_unique<DevelopmentModeMLInlineAdvisor>( |
419 | M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); |
420 | } |
421 | #endif // defined(LLVM_HAVE_TFLITE) |
422 | |