| 1 | //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements a model runner using TFLite, allowing the |
| 10 | // loading of a model from a command line option. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #include "llvm/Analysis/TensorSpec.h" |
| 14 | #include "llvm/Config/config.h" |
| 15 | #if defined(LLVM_HAVE_TFLITE) |
| 16 | |
| 17 | #include "llvm/ADT/BitVector.h" |
| 18 | #include "llvm/Analysis/CallGraph.h" |
| 19 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
| 20 | #include "llvm/Analysis/MLInlineAdvisor.h" |
| 21 | #include "llvm/Analysis/ModelUnderTrainingRunner.h" |
| 22 | #include "llvm/Analysis/NoInferenceModelRunner.h" |
| 23 | #include "llvm/Analysis/Utils/TFUtils.h" |
| 24 | #include "llvm/Analysis/Utils/TrainingLogger.h" |
| 25 | #include "llvm/IR/LLVMContext.h" |
| 26 | #include "llvm/IR/Module.h" |
| 27 | #include "llvm/Support/CommandLine.h" |
| 28 | #include "llvm/Support/ManagedStatic.h" |
| 29 | |
| 30 | #include <optional> |
| 31 | #include <vector> |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | static cl::opt<std::string> TrainingLog( |
| 36 | "training-log" , cl::Hidden, |
| 37 | cl::desc("Path where the development - mode inlining log is saved." )); |
| 38 | |
| 39 | static cl::opt<std::string> TFModelUnderTrainingPath( |
| 40 | "ml-inliner-model-under-training" , cl::Hidden, |
| 41 | cl::desc(R"(Path to SavedModel from the previous training iteration. |
| 42 | The directory is also expected to contain a JSON specification of the |
| 43 | outputs expected to be logged, where the first entry must be the |
| 44 | inlining decision. The file containing the specification should be |
| 45 | called output_spec.json. The expected JSON value is an array of |
| 46 | dictionaries. Each dictionary should have 2 keys: |
| 47 | |
| 48 | - "tensor_spec, followed by the TensorSpec description of the |
| 49 | output; and |
| 50 | - "logging_name", a string indicating the name to use when |
| 51 | logging the output values. |
| 52 | |
| 53 | Example: |
| 54 | [ |
| 55 | { |
| 56 | "logging_name" : "some_name", |
| 57 | "tensor_spec" : { |
| 58 | "name" : "model_name", |
| 59 | "port" : 0, |
| 60 | "shape" : [2, 3], |
| 61 | "type" : "float" |
| 62 | } |
| 63 | } |
| 64 | ] |
| 65 | |
| 66 | The first value must always correspond to the decision.)" )); |
| 67 | |
| 68 | static cl::opt<std::string> TFOutputSpecOverride( |
| 69 | "ml-inliner-output-spec-override" , cl::Hidden, |
| 70 | cl::desc("Override the path to the output spec json file. See " |
| 71 | "-ml-inliner-model-under-training documentation for the " |
| 72 | "specification of that file." )); |
| 73 | |
| 74 | static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix" , |
| 75 | cl::Hidden, cl::init("action_" ), |
| 76 | cl::desc("Prefix for feature names." )); |
| 77 | |
| 78 | namespace { |
| 79 | /// An InlineEvent, used by TrainingLogger. |
| 80 | struct InlineEvent { |
| 81 | /// What the default policy's decision would have been. |
| 82 | int64_t DefaultDecision = 0; |
| 83 | |
| 84 | /// What we advised. When training off the default policy, this is the same as |
| 85 | /// DefaultDecision. |
| 86 | int64_t AdvisedDecision = 0; |
| 87 | |
| 88 | /// What actually happened. This would be 'false' in the case of an inline |
| 89 | /// error, even if AdvisedDecision were true, otherwise it agrees with |
| 90 | /// AdvisedDecision. |
| 91 | bool Effect = false; |
| 92 | |
| 93 | /// What the change in size was: size_after - size_before |
| 94 | int64_t Reward = 0; |
| 95 | }; |
| 96 | |
| 97 | /// Collect data we may use for training a model. |
| 98 | class TrainingLogger final { |
| 99 | public: |
| 100 | TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); |
| 101 | |
| 102 | /// Log one inlining event. |
| 103 | void logInlineEvent(const InlineEvent &Event, |
| 104 | const MLModelRunner &ModelRunner); |
| 105 | |
| 106 | private: |
| 107 | StringRef LogFileName; |
| 108 | const ModelUnderTrainingRunner *const MUTR; |
| 109 | std::unique_ptr<Logger> L; |
| 110 | BitVector Effects; |
| 111 | /// Set these 2 clearly OOB, to make sure we set them later. |
| 112 | size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); |
| 113 | size_t DecisionPos = std::numeric_limits<size_t>::max(); |
| 114 | }; |
| 115 | |
| 116 | /// An extension of the MLInlineAdvisor for the 'development' mode, targeting |
| 117 | /// the offline training scenario. Note that training happens outside of the |
| 118 | /// compiler, this facility is concerned with producing training data ("logs"). |
| 119 | /// This InlineAdvisor can operate in the following modes: |
| 120 | /// |
| 121 | /// 1) collect logs for the default policy. This is useful for bootstrapping |
| 122 | /// training, which will be considerably faster by starting from a reasonable |
| 123 | /// policy. |
| 124 | /// |
| 125 | /// 2) collect logs for the ML policy, using a model from a previous |
| 126 | /// training. Potentially, that model uses internally some small random |
| 127 | /// perturbation of its weights, to induce exploration (setting this up is the |
| 128 | /// responsibility of the training algorithm). The logs would then be used to |
| 129 | /// retrain and improve on this model. |
| 130 | /// |
| 131 | /// 3) use the provided model, with no logging. This is useful for end to end |
| 132 | /// validation - the model, in this case, is a release candidate and shouldn't |
| 133 | /// have random perturbations. It is a convenience feature: rather than needing |
| 134 | /// to take the release candidate model and compile it in 'release' mode, |
| 135 | /// validate it, then potentially discard it, it's easier to just pass the model |
| 136 | /// to the compiler, albeit compilation would be slower, as a one-off. Once the |
| 137 | /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in |
| 138 | /// release mode. The expectation is that a well-trained model provides a good |
| 139 | /// policy over a sufficiently diverse codebase, over many changes (i.e. |
| 140 | /// training happens seldom). |
| 141 | class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { |
| 142 | public: |
| 143 | DevelopmentModeMLInlineAdvisor( |
| 144 | Module &M, ModuleAnalysisManager &MAM, |
| 145 | std::unique_ptr<MLModelRunner> ModelRunner, |
| 146 | std::function<bool(CallBase &)> GetDefaultAdvice, |
| 147 | std::unique_ptr<TrainingLogger> Logger); |
| 148 | |
| 149 | size_t getTotalSizeEstimate(); |
| 150 | |
| 151 | void updateNativeSizeEstimate(int64_t Change) { |
| 152 | *CurrentNativeSize += Change; |
| 153 | } |
| 154 | void resetNativeSize(Function *F) { |
| 155 | PreservedAnalyses PA = PreservedAnalyses::all(); |
| 156 | PA.abandon<InlineSizeEstimatorAnalysis>(); |
| 157 | FAM.invalidate(*F, PA); |
| 158 | } |
| 159 | |
| 160 | std::unique_ptr<MLInlineAdvice> |
| 161 | getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; |
| 162 | |
| 163 | std::optional<size_t> getNativeSizeEstimate(const Function &F) const; |
| 164 | |
| 165 | private: |
| 166 | bool isLogging() const { return !!Logger; } |
| 167 | std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; |
| 168 | |
| 169 | const bool IsDoingInference; |
| 170 | std::unique_ptr<TrainingLogger> Logger; |
| 171 | |
| 172 | const std::optional<int32_t> InitialNativeSize; |
| 173 | std::optional<int32_t> CurrentNativeSize; |
| 174 | }; |
| 175 | |
| 176 | /// A variant of MLInlineAdvice that tracks all non-trivial inlining |
| 177 | /// decisions, for training/logging. |
| 178 | class LoggingMLInlineAdvice : public MLInlineAdvice { |
| 179 | public: |
| 180 | LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, |
| 181 | OptimizationRemarkEmitter &ORE, bool Recommendation, |
| 182 | TrainingLogger &Logger, |
| 183 | std::optional<size_t> CallerSizeEstimateBefore, |
| 184 | std::optional<size_t> CalleeSizeEstimateBefore, |
| 185 | bool DefaultDecision, bool Mandatory = false) |
| 186 | : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), |
| 187 | CallerSizeEstimateBefore(CallerSizeEstimateBefore), |
| 188 | CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), |
| 189 | DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} |
| 190 | |
| 191 | virtual ~LoggingMLInlineAdvice() = default; |
| 192 | |
| 193 | private: |
| 194 | DevelopmentModeMLInlineAdvisor *getAdvisor() const { |
| 195 | return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); |
| 196 | } |
| 197 | void recordInliningImpl() override { |
| 198 | MLInlineAdvice::recordInliningImpl(); |
| 199 | getAdvisor()->resetNativeSize(Caller); |
| 200 | int Reward = std::numeric_limits<int>::max(); |
| 201 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
| 202 | !getAdvisor()->isForcedToStop()) { |
| 203 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + |
| 204 | *CalleeSizeEstimateBefore; |
| 205 | Reward = NativeSizeAfter - |
| 206 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
| 207 | getAdvisor()->updateNativeSizeEstimate(Reward); |
| 208 | } |
| 209 | log(Reward, /*Success=*/true); |
| 210 | } |
| 211 | |
| 212 | void recordInliningWithCalleeDeletedImpl() override { |
| 213 | MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); |
| 214 | getAdvisor()->resetNativeSize(Caller); |
| 215 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && |
| 216 | !getAdvisor()->isForcedToStop()) { |
| 217 | int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); |
| 218 | int Reward = NativeSizeAfter - |
| 219 | (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); |
| 220 | getAdvisor()->updateNativeSizeEstimate(Reward); |
| 221 | log(Reward, /*Success=*/true); |
| 222 | } else { |
| 223 | log(NoReward, /*Success=*/true); |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { |
| 228 | MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); |
| 229 | log(NoReward, /*Success=*/false); |
| 230 | } |
| 231 | |
| 232 | void recordUnattemptedInliningImpl() override { |
| 233 | MLInlineAdvice::recordUnattemptedInliningImpl(); |
| 234 | log(NoReward, /*Success=*/false); |
| 235 | } |
| 236 | |
| 237 | void log(int64_t Reward, bool Success) { |
| 238 | if (Mandatory) |
| 239 | return; |
| 240 | InlineEvent Event; |
| 241 | Event.AdvisedDecision = isInliningRecommended(); |
| 242 | Event.DefaultDecision = DefaultDecision; |
| 243 | Event.Effect = Success; |
| 244 | Event.Reward = Reward; |
| 245 | Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); |
| 246 | } |
| 247 | |
| 248 | static const int64_t NoReward = 0; |
| 249 | TrainingLogger &Logger; |
| 250 | const std::optional<size_t> CallerSizeEstimateBefore; |
| 251 | const std::optional<size_t> CalleeSizeEstimateBefore; |
| 252 | const int64_t DefaultDecision; |
| 253 | const int64_t Mandatory; |
| 254 | }; |
| 255 | |
| 256 | static const std::vector<TensorSpec> TrainingOnlyFeatures{ |
| 257 | TensorSpec::createSpec<float>(TFFeedPrefix + "discount" , {1}), |
| 258 | TensorSpec::createSpec<float>(TFFeedPrefix + "reward" , {1}), |
| 259 | TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type" , {1})}; |
| 260 | |
| 261 | static const std::vector<TensorSpec> getInputFeatures() { |
| 262 | std::vector<TensorSpec> InputSpecs; |
| 263 | for (const auto &Feature : FeatureMap) |
| 264 | InputSpecs.push_back(TensorSpec(TFFeedPrefix + Feature.name(), Feature)); |
| 265 | append_range(InputSpecs, TrainingOnlyFeatures); |
| 266 | return InputSpecs; |
| 267 | } |
| 268 | |
| 269 | } // namespace |
| 270 | |
| 271 | TrainingLogger::TrainingLogger(StringRef LogFileName, |
| 272 | const ModelUnderTrainingRunner *MUTR) |
| 273 | : LogFileName(LogFileName), MUTR(MUTR) { |
| 274 | // The first output is the inlining decision. |
| 275 | std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end()); |
| 276 | |
| 277 | if (MUTR) |
| 278 | append_range(FT, MUTR->extraOutputsForLoggingSpecs()); |
| 279 | |
| 280 | DefaultDecisionPos = FT.size(); |
| 281 | FT.push_back(DefaultDecisionSpec); |
| 282 | |
| 283 | DecisionPos = FT.size(); |
| 284 | FT.push_back(InlineDecisionSpec); |
| 285 | std::error_code EC; |
| 286 | auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC); |
| 287 | if (EC) |
| 288 | dbgs() << (EC.message() + ":" + TrainingLog); |
| 289 | |
| 290 | L = std::make_unique<Logger>( |
| 291 | std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), |
| 292 | InlineSizeEstimatorAnalysis::isEvaluatorRequested()); |
| 293 | L->switchContext("" ); |
| 294 | } |
| 295 | |
| 296 | /// Log one inlining event. |
| 297 | void TrainingLogger::logInlineEvent(const InlineEvent &Event, |
| 298 | const MLModelRunner &ModelRunner) { |
| 299 | L->startObservation(); |
| 300 | size_t CurrentFeature = 0; |
| 301 | size_t FeatureMapSize = FeatureMap.size(); |
| 302 | for (; CurrentFeature < FeatureMapSize; ++CurrentFeature) |
| 303 | L->logTensorValue(CurrentFeature, |
| 304 | reinterpret_cast<const char *>( |
| 305 | ModelRunner.getTensorUntyped(CurrentFeature))); |
| 306 | |
| 307 | if (MUTR) |
| 308 | for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) { |
| 309 | const char *RawData = |
| 310 | reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)); |
| 311 | L->logTensorValue(CurrentFeature, RawData); |
| 312 | ++CurrentFeature; |
| 313 | } |
| 314 | |
| 315 | assert(CurrentFeature == DefaultDecisionPos); |
| 316 | L->logTensorValue(DefaultDecisionPos, |
| 317 | reinterpret_cast<const char *>(&Event.DefaultDecision)); |
| 318 | L->logTensorValue(DecisionPos, |
| 319 | reinterpret_cast<const char *>(&Event.AdvisedDecision)); |
| 320 | L->endObservation(); |
| 321 | if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
| 322 | L->logReward(Event.Reward); |
| 323 | |
| 324 | // For debugging / later use |
| 325 | Effects.push_back(Event.Effect); |
| 326 | } |
| 327 | |
| 328 | DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( |
| 329 | Module &M, ModuleAnalysisManager &MAM, |
| 330 | std::unique_ptr<MLModelRunner> ModelRunner, |
| 331 | std::function<bool(CallBase &)> GetDefaultAdvice, |
| 332 | std::unique_ptr<TrainingLogger> Logger) |
| 333 | : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice), |
| 334 | IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), |
| 335 | Logger(std::move(Logger)), |
| 336 | InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), |
| 337 | CurrentNativeSize(InitialNativeSize) { |
| 338 | // We cannot have the case of neither inference nor logging. |
| 339 | assert(IsDoingInference || isLogging()); |
| 340 | } |
| 341 | |
| 342 | std::optional<size_t> |
| 343 | DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { |
| 344 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
| 345 | return std::nullopt; |
| 346 | auto &R = |
| 347 | FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); |
| 348 | if (!R) { |
| 349 | F.getParent()->getContext().emitError( |
| 350 | "Native size estimator is not present." ); |
| 351 | return 0; |
| 352 | } |
| 353 | return *R; |
| 354 | } |
| 355 | |
| 356 | std::unique_ptr<MLInlineAdvice> |
| 357 | DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { |
| 358 | return std::make_unique<LoggingMLInlineAdvice>( |
| 359 | /*Advisor=*/this, |
| 360 | /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, |
| 361 | /*Logger=*/*Logger, |
| 362 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
| 363 | /*CalleeSizeEstimateBefore=*/ |
| 364 | getNativeSizeEstimate(*CB.getCalledFunction()), |
| 365 | /*DefaultDecision=*/true, /*Mandatory*/ true); |
| 366 | } |
| 367 | |
| 368 | std::unique_ptr<MLInlineAdvice> |
| 369 | DevelopmentModeMLInlineAdvisor::getAdviceFromModel( |
| 370 | CallBase &CB, OptimizationRemarkEmitter &ORE) { |
| 371 | if (IsDoingInference && !isLogging()) |
| 372 | return MLInlineAdvisor::getAdviceFromModel(CB, ORE); |
| 373 | |
| 374 | bool DefaultAdvice = GetDefaultAdvice(CB); |
| 375 | auto Recommendation = |
| 376 | IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) |
| 377 | : DefaultAdvice; |
| 378 | return std::make_unique<LoggingMLInlineAdvice>( |
| 379 | /*Advisor=*/this, |
| 380 | /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, |
| 381 | /*Logger=*/*Logger, |
| 382 | /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), |
| 383 | /*CalleeSizeEstimateBefore=*/ |
| 384 | getNativeSizeEstimate(*CB.getCalledFunction()), |
| 385 | /*DefaultDecision=*/DefaultAdvice); |
| 386 | } |
| 387 | |
| 388 | size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { |
| 389 | if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) |
| 390 | return 0; |
| 391 | size_t Ret = 0; |
| 392 | for (auto &F : M) { |
| 393 | if (F.isDeclaration()) |
| 394 | continue; |
| 395 | Ret += *getNativeSizeEstimate(F); |
| 396 | } |
| 397 | return Ret; |
| 398 | } |
| 399 | |
| 400 | std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( |
| 401 | Module &M, ModuleAnalysisManager &MAM, |
| 402 | std::function<bool(CallBase &)> GetDefaultAdvice) { |
| 403 | auto &Ctx = M.getContext(); |
| 404 | std::unique_ptr<MLModelRunner> Runner; |
| 405 | if (TFModelUnderTrainingPath.empty()) |
| 406 | Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); |
| 407 | else |
| 408 | Runner = ModelUnderTrainingRunner::createAndEnsureValid( |
| 409 | Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), |
| 410 | TFOutputSpecOverride); |
| 411 | if (!Runner) |
| 412 | return nullptr; |
| 413 | std::unique_ptr<TrainingLogger> Logger; |
| 414 | if (!TrainingLog.empty()) |
| 415 | Logger = std::make_unique<TrainingLogger>( |
| 416 | TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); |
| 417 | |
| 418 | return std::make_unique<DevelopmentModeMLInlineAdvisor>( |
| 419 | M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); |
| 420 | } |
| 421 | #endif // defined(LLVM_HAVE_TFLITE) |
| 422 | |