| 1 | //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This implements feature and label extraction for offline supervised learning | 
|---|
| 10 | // of a IR to native size model. | 
|---|
| 11 | // | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" | 
|---|
| 14 |  | 
|---|
| 15 | #ifdef LLVM_HAVE_TFLITE | 
|---|
| 16 | #include "llvm/Analysis/Utils/TFUtils.h" | 
|---|
| 17 | #endif | 
|---|
| 18 | #include "llvm/IR/Function.h" | 
|---|
| 19 | #include "llvm/IR/PassManager.h" | 
|---|
| 20 | #include "llvm/Support/raw_ostream.h" | 
|---|
| 21 |  | 
|---|
| 22 | using namespace llvm; | 
|---|
| 23 |  | 
|---|
| 24 | AnalysisKey InlineSizeEstimatorAnalysis::Key; | 
|---|
| 25 |  | 
|---|
| 26 | #ifdef LLVM_HAVE_TFLITE | 
|---|
| 27 | #include "llvm/Analysis/LoopInfo.h" | 
|---|
| 28 | #include "llvm/Analysis/TargetLibraryInfo.h" | 
|---|
| 29 | #include "llvm/Analysis/TargetTransformInfo.h" | 
|---|
| 30 | #include "llvm/IR/BasicBlock.h" | 
|---|
| 31 | #include "llvm/IR/Dominators.h" | 
|---|
| 32 | #include "llvm/IR/Instructions.h" | 
|---|
| 33 | #include "llvm/Support/Casting.h" | 
|---|
| 34 | #include "llvm/Support/CommandLine.h" | 
|---|
| 35 | #include <algorithm> | 
|---|
| 36 | #include <deque> | 
|---|
| 37 | #include <optional> | 
|---|
| 38 |  | 
|---|
| 39 | static cl::opt<std::string> TFIR2NativeModelPath( | 
|---|
| 40 | "ml-inliner-ir2native-model", cl::Hidden, | 
|---|
| 41 | cl::desc( "Path to saved model evaluating native size from IR.")); | 
|---|
| 42 |  | 
|---|
| 43 | #define DEBUG_TYPE "inline-size-estimator" | 
|---|
| 44 | namespace { | 
|---|
| 45 | unsigned getMaxInstructionID() { | 
|---|
| 46 | #define LAST_OTHER_INST(NR) return NR; | 
|---|
| 47 | #include "llvm/IR/Instruction.def" | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | class IRToNativeSizeLearning { | 
|---|
| 51 | public: | 
|---|
| 52 | enum class NamedFeatureIndex : size_t { | 
|---|
| 53 | InitialSize, | 
|---|
| 54 | Blocks, | 
|---|
| 55 | Calls, | 
|---|
| 56 | IsLocal, | 
|---|
| 57 | IsLinkOnceODR, | 
|---|
| 58 | IsLinkOnce, | 
|---|
| 59 | Loops, | 
|---|
| 60 | MaxLoopDepth, | 
|---|
| 61 | MaxDomTreeLevel, | 
|---|
| 62 |  | 
|---|
| 63 | NumNamedFeatures | 
|---|
| 64 | }; | 
|---|
| 65 | static const size_t NumNamedFeatures = | 
|---|
| 66 | static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); | 
|---|
| 67 | struct FunctionFeatures { | 
|---|
| 68 | static const size_t FeatureCount; | 
|---|
| 69 |  | 
|---|
| 70 | std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; | 
|---|
| 71 | std::vector<int32_t> InstructionHistogram; | 
|---|
| 72 | std::vector<int32_t> InstructionPairHistogram; | 
|---|
| 73 |  | 
|---|
| 74 | void fillTensor(int32_t *Ptr) const; | 
|---|
| 75 | int32_t &operator[](NamedFeatureIndex Pos) { | 
|---|
| 76 | return NamedFeatures[static_cast<size_t>(Pos)]; | 
|---|
| 77 | } | 
|---|
| 78 | }; | 
|---|
| 79 | IRToNativeSizeLearning() = default; | 
|---|
| 80 |  | 
|---|
| 81 | static FunctionFeatures getFunctionFeatures(Function &F, | 
|---|
| 82 | FunctionAnalysisManager &FAM); | 
|---|
| 83 | }; | 
|---|
| 84 |  | 
|---|
| 85 | // This is a point in time - we determined including these pairs of | 
|---|
| 86 | // consecutive instructions (in the IR layout available at inline time) as | 
|---|
| 87 | // features improves the model performance. We want to move away from manual | 
|---|
| 88 | // feature selection. | 
|---|
| 89 | // The array is given in opcode pairs rather than labels because 1) labels | 
|---|
| 90 | // weren't readily available, and 2) the successions were hand - extracted. | 
|---|
| 91 | // | 
|---|
| 92 | // This array must be sorted. | 
|---|
| 93 | static const std::array<std::pair<size_t, size_t>, 137> | 
|---|
| 94 | ImportantInstructionSuccessions{ | 
|---|
| 95 | {{1, 1},   {1, 4},   {1, 5},   {1, 7},   {1, 8},   {1, 9},   {1, 11}, | 
|---|
| 96 | {1, 12},  {1, 13},  {1, 14},  {1, 18},  {1, 20},  {1, 22},  {1, 24}, | 
|---|
| 97 | {1, 25},  {1, 26},  {1, 27},  {1, 28},  {1, 29},  {1, 30},  {1, 31}, | 
|---|
| 98 | {1, 32},  {1, 33},  {1, 34},  {1, 39},  {1, 40},  {1, 42},  {1, 45}, | 
|---|
| 99 | {2, 1},   {2, 2},   {2, 13},  {2, 28},  {2, 29},  {2, 32},  {2, 33}, | 
|---|
| 100 | {2, 34},  {2, 38},  {2, 48},  {2, 49},  {2, 53},  {2, 55},  {2, 56}, | 
|---|
| 101 | {13, 2},  {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, | 
|---|
| 102 | {28, 2},  {28, 48}, {28, 53}, {29, 2},  {29, 33}, {29, 56}, {31, 31}, | 
|---|
| 103 | {31, 33}, {31, 34}, {31, 49}, {32, 1},  {32, 2},  {32, 13}, {32, 15}, | 
|---|
| 104 | {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, | 
|---|
| 105 | {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1},  {33, 2},  {33, 32}, | 
|---|
| 106 | {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1},  {34, 2}, | 
|---|
| 107 | {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, | 
|---|
| 108 | {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2},  {48, 34}, {48, 56}, | 
|---|
| 109 | {49, 1},  {49, 2},  {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, | 
|---|
| 110 | {49, 49}, {49, 56}, {53, 1},  {53, 2},  {53, 28}, {53, 34}, {53, 53}, | 
|---|
| 111 | {53, 57}, {55, 1},  {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, | 
|---|
| 112 | {56, 1},  {56, 2},  {56, 7},  {56, 13}, {56, 32}, {56, 33}, {56, 34}, | 
|---|
| 113 | {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, | 
|---|
| 114 | {64, 1},  {64, 64}, {65, 1},  {65, 65}}}; | 
|---|
| 115 |  | 
|---|
| 116 | // We have: 9 calculated features (the features here); 1 feature for each | 
|---|
| 117 | // instruction opcode; and 1 feature for each manually-identified sequence. | 
|---|
| 118 | // For the latter 2, we build a histogram: we count the number of | 
|---|
| 119 | // occurrences of each instruction opcode or succession of instructions, | 
|---|
| 120 | // respectively. | 
|---|
| 121 | // Note that instruction opcodes start from 1. For convenience, we also have an | 
|---|
| 122 | // always 0 feature for the '0' opcode, hence the extra 1. | 
|---|
| 123 | const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = | 
|---|
| 124 | ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + | 
|---|
| 125 | IRToNativeSizeLearning::NumNamedFeatures; | 
|---|
| 126 |  | 
|---|
| 127 | size_t getSize(Function &F, TargetTransformInfo &TTI) { | 
|---|
| 128 | size_t Ret = 0; | 
|---|
| 129 | for (const auto &BB : F) | 
|---|
| 130 | for (const auto &I : BB) | 
|---|
| 131 | Ret += TTI.getInstructionCost( | 
|---|
| 132 | &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize) | 
|---|
| 133 | .getValue(); | 
|---|
| 134 | return Ret; | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | size_t getSize(Function &F, FunctionAnalysisManager &FAM) { | 
|---|
| 138 | auto &TTI = FAM.getResult<TargetIRAnalysis>(F); | 
|---|
| 139 | return getSize(F, TTI); | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | unsigned getMaxDominatorTreeDepth(const Function &F, | 
|---|
| 143 | const DominatorTree &Tree) { | 
|---|
| 144 | unsigned Ret = 0; | 
|---|
| 145 | for (const auto &BB : F) | 
|---|
| 146 | if (const auto *TN = Tree.getNode(&BB)) | 
|---|
| 147 | Ret = std::max(Ret, TN->getLevel()); | 
|---|
| 148 | return Ret; | 
|---|
| 149 | } | 
|---|
| 150 | } // namespace | 
|---|
| 151 |  | 
|---|
| 152 | IRToNativeSizeLearning::FunctionFeatures | 
|---|
| 153 | IRToNativeSizeLearning::getFunctionFeatures(Function &F, | 
|---|
| 154 | FunctionAnalysisManager &FAM) { | 
|---|
| 155 | assert(llvm::is_sorted(ImportantInstructionSuccessions) && | 
|---|
| 156 | "expected function features are sorted"); | 
|---|
| 157 |  | 
|---|
| 158 | auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); | 
|---|
| 159 | FunctionFeatures FF; | 
|---|
| 160 | size_t InstrCount = getMaxInstructionID() + 1; | 
|---|
| 161 | FF.InstructionHistogram.resize(InstrCount); | 
|---|
| 162 |  | 
|---|
| 163 | FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); | 
|---|
| 164 |  | 
|---|
| 165 | int StartID = 0; | 
|---|
| 166 | int LastID = StartID; | 
|---|
| 167 | auto getPairIndex = [](size_t a, size_t b) { | 
|---|
| 168 | auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); | 
|---|
| 169 | if (I == ImportantInstructionSuccessions.end()) | 
|---|
| 170 | return -1; | 
|---|
| 171 | return static_cast<int>( | 
|---|
| 172 | std::distance(ImportantInstructionSuccessions.begin(), I)); | 
|---|
| 173 | }; | 
|---|
| 174 |  | 
|---|
| 175 | // We don't want debug calls, because they'd just add noise. | 
|---|
| 176 | for (const auto &BB : F) { | 
|---|
| 177 | for (const auto &I : BB.instructionsWithoutDebug()) { | 
|---|
| 178 | auto ID = I.getOpcode(); | 
|---|
| 179 |  | 
|---|
| 180 | ++FF.InstructionHistogram[ID]; | 
|---|
| 181 | int PairIndex = getPairIndex(LastID, ID); | 
|---|
| 182 | if (PairIndex >= 0) | 
|---|
| 183 | ++FF.InstructionPairHistogram[PairIndex]; | 
|---|
| 184 | LastID = ID; | 
|---|
| 185 | if (isa<CallBase>(I)) | 
|---|
| 186 | ++FF[NamedFeatureIndex::Calls]; | 
|---|
| 187 | } | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); | 
|---|
| 191 | FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); | 
|---|
| 192 | FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); | 
|---|
| 193 | FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); | 
|---|
| 194 | FF[NamedFeatureIndex::Blocks] = F.size(); | 
|---|
| 195 | auto &LI = FAM.getResult<LoopAnalysis>(F); | 
|---|
| 196 | FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); | 
|---|
| 197 | for (auto &L : LI) | 
|---|
| 198 | FF[NamedFeatureIndex::MaxLoopDepth] = | 
|---|
| 199 | std::max(FF[NamedFeatureIndex::MaxLoopDepth], | 
|---|
| 200 | static_cast<int32_t>(L->getLoopDepth())); | 
|---|
| 201 | FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); | 
|---|
| 202 | return FF; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { | 
|---|
| 206 | std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); | 
|---|
| 207 | Ptr += NamedFeatures.size(); | 
|---|
| 208 | std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); | 
|---|
| 209 | Ptr += InstructionHistogram.size(); | 
|---|
| 210 | std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), | 
|---|
| 211 | Ptr); | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { | 
|---|
| 215 | return !TFIR2NativeModelPath.empty(); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { | 
|---|
| 219 | if (!isEvaluatorRequested()) { | 
|---|
| 220 | return; | 
|---|
| 221 | } | 
|---|
| 222 | std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( | 
|---|
| 223 | "serving_default_input_1", | 
|---|
| 224 | {1, static_cast<int64_t>( | 
|---|
| 225 | IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; | 
|---|
| 226 | std::vector<TensorSpec> OutputSpecs{ | 
|---|
| 227 | TensorSpec::createSpec<float>( "StatefulPartitionedCall", {1})}; | 
|---|
| 228 | Evaluator = std::make_unique<TFModelEvaluator>( | 
|---|
| 229 | TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); | 
|---|
| 230 | if (!Evaluator || !Evaluator->isValid()) { | 
|---|
| 231 | Evaluator.reset(); | 
|---|
| 232 | return; | 
|---|
| 233 | } | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | InlineSizeEstimatorAnalysis::Result | 
|---|
| 237 | InlineSizeEstimatorAnalysis::run(const Function &F, | 
|---|
| 238 | FunctionAnalysisManager &FAM) { | 
|---|
| 239 | if (!Evaluator) | 
|---|
| 240 | return std::nullopt; | 
|---|
| 241 | auto Features = IRToNativeSizeLearning::getFunctionFeatures( | 
|---|
| 242 | const_cast<Function &>(F), FAM); | 
|---|
| 243 | int32_t *V = Evaluator->getInput<int32_t>(0); | 
|---|
| 244 | Features.fillTensor(V); | 
|---|
| 245 | auto ER = Evaluator->evaluate(); | 
|---|
| 246 | if (!ER) | 
|---|
| 247 | return std::nullopt; | 
|---|
| 248 | float Ret = *ER->getTensorValue<float>(0); | 
|---|
| 249 | if (Ret < 0.0) | 
|---|
| 250 | Ret = 0.0; | 
|---|
| 251 | return static_cast<size_t>(Ret); | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} | 
|---|
| 255 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( | 
|---|
| 256 | InlineSizeEstimatorAnalysis &&Other) | 
|---|
| 257 | : Evaluator(std::move(Other.Evaluator)) {} | 
|---|
| 258 |  | 
|---|
| 259 | #else | 
|---|
| 260 | namespace llvm { | 
|---|
| 261 | class TFModelEvaluator {}; | 
|---|
| 262 | } // namespace llvm | 
|---|
| 263 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; | 
|---|
| 264 | InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( | 
|---|
| 265 | InlineSizeEstimatorAnalysis &&) {} | 
|---|
| 266 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; | 
|---|
| 267 | InlineSizeEstimatorAnalysis::Result | 
|---|
| 268 | InlineSizeEstimatorAnalysis::run(const Function &F, | 
|---|
| 269 | FunctionAnalysisManager &FAM) { | 
|---|
| 270 | return std::nullopt; | 
|---|
| 271 | } | 
|---|
| 272 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } | 
|---|
| 273 | #endif | 
|---|
| 274 |  | 
|---|
| 275 | PreservedAnalyses | 
|---|
| 276 | InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, | 
|---|
| 277 | FunctionAnalysisManager &AM) { | 
|---|
| 278 | OS << "[InlineSizeEstimatorAnalysis] size estimate for "<< F.getName() | 
|---|
| 279 | << ": "<< AM.getResult<InlineSizeEstimatorAnalysis>(IR&: F) << "\n"; | 
|---|
| 280 | return PreservedAnalyses::all(); | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|